1 //===-- NVPTXISelDAGToDAG.cpp - A dag to dag inst selector for NVPTX ------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file defines an instruction selector for the NVPTX target. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "NVPTXISelDAGToDAG.h" 14 #include "MCTargetDesc/NVPTXBaseInfo.h" 15 #include "NVPTXUtilities.h" 16 #include "llvm/Analysis/ValueTracking.h" 17 #include "llvm/IR/GlobalValue.h" 18 #include "llvm/IR/Instructions.h" 19 #include "llvm/IR/IntrinsicsNVPTX.h" 20 #include "llvm/Support/AtomicOrdering.h" 21 #include "llvm/Support/CommandLine.h" 22 #include "llvm/Support/Debug.h" 23 #include "llvm/Support/ErrorHandling.h" 24 #include "llvm/Support/raw_ostream.h" 25 #include "llvm/Target/TargetIntrinsicInfo.h" 26 27 using namespace llvm; 28 29 #define DEBUG_TYPE "nvptx-isel" 30 #define PASS_NAME "NVPTX DAG->DAG Pattern Instruction Selection" 31 32 /// createNVPTXISelDag - This pass converts a legalized DAG into a 33 /// NVPTX-specific DAG, ready for instruction scheduling. 34 FunctionPass *llvm::createNVPTXISelDag(NVPTXTargetMachine &TM, 35 llvm::CodeGenOpt::Level OptLevel) { 36 return new NVPTXDAGToDAGISel(TM, OptLevel); 37 } 38 39 char NVPTXDAGToDAGISel::ID = 0; 40 41 INITIALIZE_PASS(NVPTXDAGToDAGISel, DEBUG_TYPE, PASS_NAME, false, false) 42 43 NVPTXDAGToDAGISel::NVPTXDAGToDAGISel(NVPTXTargetMachine &tm, 44 CodeGenOpt::Level OptLevel) 45 : SelectionDAGISel(ID, tm, OptLevel), TM(tm) { 46 doMulWide = (OptLevel > 0); 47 } 48 49 bool NVPTXDAGToDAGISel::runOnMachineFunction(MachineFunction &MF) { 50 Subtarget = &MF.getSubtarget<NVPTXSubtarget>(); 51 return SelectionDAGISel::runOnMachineFunction(MF); 52 } 53 54 int NVPTXDAGToDAGISel::getDivF32Level() const { 55 return Subtarget->getTargetLowering()->getDivF32Level(); 56 } 57 58 bool NVPTXDAGToDAGISel::usePrecSqrtF32() const { 59 return Subtarget->getTargetLowering()->usePrecSqrtF32(); 60 } 61 62 bool NVPTXDAGToDAGISel::useF32FTZ() const { 63 return Subtarget->getTargetLowering()->useF32FTZ(*MF); 64 } 65 66 bool NVPTXDAGToDAGISel::allowFMA() const { 67 const NVPTXTargetLowering *TL = Subtarget->getTargetLowering(); 68 return TL->allowFMA(*MF, OptLevel); 69 } 70 71 bool NVPTXDAGToDAGISel::allowUnsafeFPMath() const { 72 const NVPTXTargetLowering *TL = Subtarget->getTargetLowering(); 73 return TL->allowUnsafeFPMath(*MF); 74 } 75 76 bool NVPTXDAGToDAGISel::useShortPointers() const { 77 return TM.useShortPointers(); 78 } 79 80 /// Select - Select instructions not customized! Used for 81 /// expanded, promoted and normal instructions. 82 void NVPTXDAGToDAGISel::Select(SDNode *N) { 83 84 if (N->isMachineOpcode()) { 85 N->setNodeId(-1); 86 return; // Already selected. 87 } 88 89 switch (N->getOpcode()) { 90 case ISD::LOAD: 91 case ISD::ATOMIC_LOAD: 92 if (tryLoad(N)) 93 return; 94 break; 95 case ISD::STORE: 96 case ISD::ATOMIC_STORE: 97 if (tryStore(N)) 98 return; 99 break; 100 case ISD::EXTRACT_VECTOR_ELT: 101 if (tryEXTRACT_VECTOR_ELEMENT(N)) 102 return; 103 break; 104 case NVPTXISD::SETP_F16X2: 105 SelectSETP_F16X2(N); 106 return; 107 108 case NVPTXISD::LoadV2: 109 case NVPTXISD::LoadV4: 110 if (tryLoadVector(N)) 111 return; 112 break; 113 case NVPTXISD::LDGV2: 114 case NVPTXISD::LDGV4: 115 case NVPTXISD::LDUV2: 116 case NVPTXISD::LDUV4: 117 if (tryLDGLDU(N)) 118 return; 119 break; 120 case NVPTXISD::StoreV2: 121 case NVPTXISD::StoreV4: 122 if (tryStoreVector(N)) 123 return; 124 break; 125 case NVPTXISD::LoadParam: 126 case NVPTXISD::LoadParamV2: 127 case NVPTXISD::LoadParamV4: 128 if (tryLoadParam(N)) 129 return; 130 break; 131 case NVPTXISD::StoreRetval: 132 case NVPTXISD::StoreRetvalV2: 133 case NVPTXISD::StoreRetvalV4: 134 if (tryStoreRetval(N)) 135 return; 136 break; 137 case NVPTXISD::StoreParam: 138 case NVPTXISD::StoreParamV2: 139 case NVPTXISD::StoreParamV4: 140 case NVPTXISD::StoreParamS32: 141 case NVPTXISD::StoreParamU32: 142 if (tryStoreParam(N)) 143 return; 144 break; 145 case ISD::INTRINSIC_WO_CHAIN: 146 if (tryIntrinsicNoChain(N)) 147 return; 148 break; 149 case ISD::INTRINSIC_W_CHAIN: 150 if (tryIntrinsicChain(N)) 151 return; 152 break; 153 case NVPTXISD::Tex1DFloatS32: 154 case NVPTXISD::Tex1DFloatFloat: 155 case NVPTXISD::Tex1DFloatFloatLevel: 156 case NVPTXISD::Tex1DFloatFloatGrad: 157 case NVPTXISD::Tex1DS32S32: 158 case NVPTXISD::Tex1DS32Float: 159 case NVPTXISD::Tex1DS32FloatLevel: 160 case NVPTXISD::Tex1DS32FloatGrad: 161 case NVPTXISD::Tex1DU32S32: 162 case NVPTXISD::Tex1DU32Float: 163 case NVPTXISD::Tex1DU32FloatLevel: 164 case NVPTXISD::Tex1DU32FloatGrad: 165 case NVPTXISD::Tex1DArrayFloatS32: 166 case NVPTXISD::Tex1DArrayFloatFloat: 167 case NVPTXISD::Tex1DArrayFloatFloatLevel: 168 case NVPTXISD::Tex1DArrayFloatFloatGrad: 169 case NVPTXISD::Tex1DArrayS32S32: 170 case NVPTXISD::Tex1DArrayS32Float: 171 case NVPTXISD::Tex1DArrayS32FloatLevel: 172 case NVPTXISD::Tex1DArrayS32FloatGrad: 173 case NVPTXISD::Tex1DArrayU32S32: 174 case NVPTXISD::Tex1DArrayU32Float: 175 case NVPTXISD::Tex1DArrayU32FloatLevel: 176 case NVPTXISD::Tex1DArrayU32FloatGrad: 177 case NVPTXISD::Tex2DFloatS32: 178 case NVPTXISD::Tex2DFloatFloat: 179 case NVPTXISD::Tex2DFloatFloatLevel: 180 case NVPTXISD::Tex2DFloatFloatGrad: 181 case NVPTXISD::Tex2DS32S32: 182 case NVPTXISD::Tex2DS32Float: 183 case NVPTXISD::Tex2DS32FloatLevel: 184 case NVPTXISD::Tex2DS32FloatGrad: 185 case NVPTXISD::Tex2DU32S32: 186 case NVPTXISD::Tex2DU32Float: 187 case NVPTXISD::Tex2DU32FloatLevel: 188 case NVPTXISD::Tex2DU32FloatGrad: 189 case NVPTXISD::Tex2DArrayFloatS32: 190 case NVPTXISD::Tex2DArrayFloatFloat: 191 case NVPTXISD::Tex2DArrayFloatFloatLevel: 192 case NVPTXISD::Tex2DArrayFloatFloatGrad: 193 case NVPTXISD::Tex2DArrayS32S32: 194 case NVPTXISD::Tex2DArrayS32Float: 195 case NVPTXISD::Tex2DArrayS32FloatLevel: 196 case NVPTXISD::Tex2DArrayS32FloatGrad: 197 case NVPTXISD::Tex2DArrayU32S32: 198 case NVPTXISD::Tex2DArrayU32Float: 199 case NVPTXISD::Tex2DArrayU32FloatLevel: 200 case NVPTXISD::Tex2DArrayU32FloatGrad: 201 case NVPTXISD::Tex3DFloatS32: 202 case NVPTXISD::Tex3DFloatFloat: 203 case NVPTXISD::Tex3DFloatFloatLevel: 204 case NVPTXISD::Tex3DFloatFloatGrad: 205 case NVPTXISD::Tex3DS32S32: 206 case NVPTXISD::Tex3DS32Float: 207 case NVPTXISD::Tex3DS32FloatLevel: 208 case NVPTXISD::Tex3DS32FloatGrad: 209 case NVPTXISD::Tex3DU32S32: 210 case NVPTXISD::Tex3DU32Float: 211 case NVPTXISD::Tex3DU32FloatLevel: 212 case NVPTXISD::Tex3DU32FloatGrad: 213 case NVPTXISD::TexCubeFloatFloat: 214 case NVPTXISD::TexCubeFloatFloatLevel: 215 case NVPTXISD::TexCubeS32Float: 216 case NVPTXISD::TexCubeS32FloatLevel: 217 case NVPTXISD::TexCubeU32Float: 218 case NVPTXISD::TexCubeU32FloatLevel: 219 case NVPTXISD::TexCubeArrayFloatFloat: 220 case NVPTXISD::TexCubeArrayFloatFloatLevel: 221 case NVPTXISD::TexCubeArrayS32Float: 222 case NVPTXISD::TexCubeArrayS32FloatLevel: 223 case NVPTXISD::TexCubeArrayU32Float: 224 case NVPTXISD::TexCubeArrayU32FloatLevel: 225 case NVPTXISD::Tld4R2DFloatFloat: 226 case NVPTXISD::Tld4G2DFloatFloat: 227 case NVPTXISD::Tld4B2DFloatFloat: 228 case NVPTXISD::Tld4A2DFloatFloat: 229 case NVPTXISD::Tld4R2DS64Float: 230 case NVPTXISD::Tld4G2DS64Float: 231 case NVPTXISD::Tld4B2DS64Float: 232 case NVPTXISD::Tld4A2DS64Float: 233 case NVPTXISD::Tld4R2DU64Float: 234 case NVPTXISD::Tld4G2DU64Float: 235 case NVPTXISD::Tld4B2DU64Float: 236 case NVPTXISD::Tld4A2DU64Float: 237 case NVPTXISD::TexUnified1DFloatS32: 238 case NVPTXISD::TexUnified1DFloatFloat: 239 case NVPTXISD::TexUnified1DFloatFloatLevel: 240 case NVPTXISD::TexUnified1DFloatFloatGrad: 241 case NVPTXISD::TexUnified1DS32S32: 242 case NVPTXISD::TexUnified1DS32Float: 243 case NVPTXISD::TexUnified1DS32FloatLevel: 244 case NVPTXISD::TexUnified1DS32FloatGrad: 245 case NVPTXISD::TexUnified1DU32S32: 246 case NVPTXISD::TexUnified1DU32Float: 247 case NVPTXISD::TexUnified1DU32FloatLevel: 248 case NVPTXISD::TexUnified1DU32FloatGrad: 249 case NVPTXISD::TexUnified1DArrayFloatS32: 250 case NVPTXISD::TexUnified1DArrayFloatFloat: 251 case NVPTXISD::TexUnified1DArrayFloatFloatLevel: 252 case NVPTXISD::TexUnified1DArrayFloatFloatGrad: 253 case NVPTXISD::TexUnified1DArrayS32S32: 254 case NVPTXISD::TexUnified1DArrayS32Float: 255 case NVPTXISD::TexUnified1DArrayS32FloatLevel: 256 case NVPTXISD::TexUnified1DArrayS32FloatGrad: 257 case NVPTXISD::TexUnified1DArrayU32S32: 258 case NVPTXISD::TexUnified1DArrayU32Float: 259 case NVPTXISD::TexUnified1DArrayU32FloatLevel: 260 case NVPTXISD::TexUnified1DArrayU32FloatGrad: 261 case NVPTXISD::TexUnified2DFloatS32: 262 case NVPTXISD::TexUnified2DFloatFloat: 263 case NVPTXISD::TexUnified2DFloatFloatLevel: 264 case NVPTXISD::TexUnified2DFloatFloatGrad: 265 case NVPTXISD::TexUnified2DS32S32: 266 case NVPTXISD::TexUnified2DS32Float: 267 case NVPTXISD::TexUnified2DS32FloatLevel: 268 case NVPTXISD::TexUnified2DS32FloatGrad: 269 case NVPTXISD::TexUnified2DU32S32: 270 case NVPTXISD::TexUnified2DU32Float: 271 case NVPTXISD::TexUnified2DU32FloatLevel: 272 case NVPTXISD::TexUnified2DU32FloatGrad: 273 case NVPTXISD::TexUnified2DArrayFloatS32: 274 case NVPTXISD::TexUnified2DArrayFloatFloat: 275 case NVPTXISD::TexUnified2DArrayFloatFloatLevel: 276 case NVPTXISD::TexUnified2DArrayFloatFloatGrad: 277 case NVPTXISD::TexUnified2DArrayS32S32: 278 case NVPTXISD::TexUnified2DArrayS32Float: 279 case NVPTXISD::TexUnified2DArrayS32FloatLevel: 280 case NVPTXISD::TexUnified2DArrayS32FloatGrad: 281 case NVPTXISD::TexUnified2DArrayU32S32: 282 case NVPTXISD::TexUnified2DArrayU32Float: 283 case NVPTXISD::TexUnified2DArrayU32FloatLevel: 284 case NVPTXISD::TexUnified2DArrayU32FloatGrad: 285 case NVPTXISD::TexUnified3DFloatS32: 286 case NVPTXISD::TexUnified3DFloatFloat: 287 case NVPTXISD::TexUnified3DFloatFloatLevel: 288 case NVPTXISD::TexUnified3DFloatFloatGrad: 289 case NVPTXISD::TexUnified3DS32S32: 290 case NVPTXISD::TexUnified3DS32Float: 291 case NVPTXISD::TexUnified3DS32FloatLevel: 292 case NVPTXISD::TexUnified3DS32FloatGrad: 293 case NVPTXISD::TexUnified3DU32S32: 294 case NVPTXISD::TexUnified3DU32Float: 295 case NVPTXISD::TexUnified3DU32FloatLevel: 296 case NVPTXISD::TexUnified3DU32FloatGrad: 297 case NVPTXISD::TexUnifiedCubeFloatFloat: 298 case NVPTXISD::TexUnifiedCubeFloatFloatLevel: 299 case NVPTXISD::TexUnifiedCubeS32Float: 300 case NVPTXISD::TexUnifiedCubeS32FloatLevel: 301 case NVPTXISD::TexUnifiedCubeU32Float: 302 case NVPTXISD::TexUnifiedCubeU32FloatLevel: 303 case NVPTXISD::TexUnifiedCubeArrayFloatFloat: 304 case NVPTXISD::TexUnifiedCubeArrayFloatFloatLevel: 305 case NVPTXISD::TexUnifiedCubeArrayS32Float: 306 case NVPTXISD::TexUnifiedCubeArrayS32FloatLevel: 307 case NVPTXISD::TexUnifiedCubeArrayU32Float: 308 case NVPTXISD::TexUnifiedCubeArrayU32FloatLevel: 309 case NVPTXISD::Tld4UnifiedR2DFloatFloat: 310 case NVPTXISD::Tld4UnifiedG2DFloatFloat: 311 case NVPTXISD::Tld4UnifiedB2DFloatFloat: 312 case NVPTXISD::Tld4UnifiedA2DFloatFloat: 313 case NVPTXISD::Tld4UnifiedR2DS64Float: 314 case NVPTXISD::Tld4UnifiedG2DS64Float: 315 case NVPTXISD::Tld4UnifiedB2DS64Float: 316 case NVPTXISD::Tld4UnifiedA2DS64Float: 317 case NVPTXISD::Tld4UnifiedR2DU64Float: 318 case NVPTXISD::Tld4UnifiedG2DU64Float: 319 case NVPTXISD::Tld4UnifiedB2DU64Float: 320 case NVPTXISD::Tld4UnifiedA2DU64Float: 321 if (tryTextureIntrinsic(N)) 322 return; 323 break; 324 case NVPTXISD::Suld1DI8Clamp: 325 case NVPTXISD::Suld1DI16Clamp: 326 case NVPTXISD::Suld1DI32Clamp: 327 case NVPTXISD::Suld1DI64Clamp: 328 case NVPTXISD::Suld1DV2I8Clamp: 329 case NVPTXISD::Suld1DV2I16Clamp: 330 case NVPTXISD::Suld1DV2I32Clamp: 331 case NVPTXISD::Suld1DV2I64Clamp: 332 case NVPTXISD::Suld1DV4I8Clamp: 333 case NVPTXISD::Suld1DV4I16Clamp: 334 case NVPTXISD::Suld1DV4I32Clamp: 335 case NVPTXISD::Suld1DArrayI8Clamp: 336 case NVPTXISD::Suld1DArrayI16Clamp: 337 case NVPTXISD::Suld1DArrayI32Clamp: 338 case NVPTXISD::Suld1DArrayI64Clamp: 339 case NVPTXISD::Suld1DArrayV2I8Clamp: 340 case NVPTXISD::Suld1DArrayV2I16Clamp: 341 case NVPTXISD::Suld1DArrayV2I32Clamp: 342 case NVPTXISD::Suld1DArrayV2I64Clamp: 343 case NVPTXISD::Suld1DArrayV4I8Clamp: 344 case NVPTXISD::Suld1DArrayV4I16Clamp: 345 case NVPTXISD::Suld1DArrayV4I32Clamp: 346 case NVPTXISD::Suld2DI8Clamp: 347 case NVPTXISD::Suld2DI16Clamp: 348 case NVPTXISD::Suld2DI32Clamp: 349 case NVPTXISD::Suld2DI64Clamp: 350 case NVPTXISD::Suld2DV2I8Clamp: 351 case NVPTXISD::Suld2DV2I16Clamp: 352 case NVPTXISD::Suld2DV2I32Clamp: 353 case NVPTXISD::Suld2DV2I64Clamp: 354 case NVPTXISD::Suld2DV4I8Clamp: 355 case NVPTXISD::Suld2DV4I16Clamp: 356 case NVPTXISD::Suld2DV4I32Clamp: 357 case NVPTXISD::Suld2DArrayI8Clamp: 358 case NVPTXISD::Suld2DArrayI16Clamp: 359 case NVPTXISD::Suld2DArrayI32Clamp: 360 case NVPTXISD::Suld2DArrayI64Clamp: 361 case NVPTXISD::Suld2DArrayV2I8Clamp: 362 case NVPTXISD::Suld2DArrayV2I16Clamp: 363 case NVPTXISD::Suld2DArrayV2I32Clamp: 364 case NVPTXISD::Suld2DArrayV2I64Clamp: 365 case NVPTXISD::Suld2DArrayV4I8Clamp: 366 case NVPTXISD::Suld2DArrayV4I16Clamp: 367 case NVPTXISD::Suld2DArrayV4I32Clamp: 368 case NVPTXISD::Suld3DI8Clamp: 369 case NVPTXISD::Suld3DI16Clamp: 370 case NVPTXISD::Suld3DI32Clamp: 371 case NVPTXISD::Suld3DI64Clamp: 372 case NVPTXISD::Suld3DV2I8Clamp: 373 case NVPTXISD::Suld3DV2I16Clamp: 374 case NVPTXISD::Suld3DV2I32Clamp: 375 case NVPTXISD::Suld3DV2I64Clamp: 376 case NVPTXISD::Suld3DV4I8Clamp: 377 case NVPTXISD::Suld3DV4I16Clamp: 378 case NVPTXISD::Suld3DV4I32Clamp: 379 case NVPTXISD::Suld1DI8Trap: 380 case NVPTXISD::Suld1DI16Trap: 381 case NVPTXISD::Suld1DI32Trap: 382 case NVPTXISD::Suld1DI64Trap: 383 case NVPTXISD::Suld1DV2I8Trap: 384 case NVPTXISD::Suld1DV2I16Trap: 385 case NVPTXISD::Suld1DV2I32Trap: 386 case NVPTXISD::Suld1DV2I64Trap: 387 case NVPTXISD::Suld1DV4I8Trap: 388 case NVPTXISD::Suld1DV4I16Trap: 389 case NVPTXISD::Suld1DV4I32Trap: 390 case NVPTXISD::Suld1DArrayI8Trap: 391 case NVPTXISD::Suld1DArrayI16Trap: 392 case NVPTXISD::Suld1DArrayI32Trap: 393 case NVPTXISD::Suld1DArrayI64Trap: 394 case NVPTXISD::Suld1DArrayV2I8Trap: 395 case NVPTXISD::Suld1DArrayV2I16Trap: 396 case NVPTXISD::Suld1DArrayV2I32Trap: 397 case NVPTXISD::Suld1DArrayV2I64Trap: 398 case NVPTXISD::Suld1DArrayV4I8Trap: 399 case NVPTXISD::Suld1DArrayV4I16Trap: 400 case NVPTXISD::Suld1DArrayV4I32Trap: 401 case NVPTXISD::Suld2DI8Trap: 402 case NVPTXISD::Suld2DI16Trap: 403 case NVPTXISD::Suld2DI32Trap: 404 case NVPTXISD::Suld2DI64Trap: 405 case NVPTXISD::Suld2DV2I8Trap: 406 case NVPTXISD::Suld2DV2I16Trap: 407 case NVPTXISD::Suld2DV2I32Trap: 408 case NVPTXISD::Suld2DV2I64Trap: 409 case NVPTXISD::Suld2DV4I8Trap: 410 case NVPTXISD::Suld2DV4I16Trap: 411 case NVPTXISD::Suld2DV4I32Trap: 412 case NVPTXISD::Suld2DArrayI8Trap: 413 case NVPTXISD::Suld2DArrayI16Trap: 414 case NVPTXISD::Suld2DArrayI32Trap: 415 case NVPTXISD::Suld2DArrayI64Trap: 416 case NVPTXISD::Suld2DArrayV2I8Trap: 417 case NVPTXISD::Suld2DArrayV2I16Trap: 418 case NVPTXISD::Suld2DArrayV2I32Trap: 419 case NVPTXISD::Suld2DArrayV2I64Trap: 420 case NVPTXISD::Suld2DArrayV4I8Trap: 421 case NVPTXISD::Suld2DArrayV4I16Trap: 422 case NVPTXISD::Suld2DArrayV4I32Trap: 423 case NVPTXISD::Suld3DI8Trap: 424 case NVPTXISD::Suld3DI16Trap: 425 case NVPTXISD::Suld3DI32Trap: 426 case NVPTXISD::Suld3DI64Trap: 427 case NVPTXISD::Suld3DV2I8Trap: 428 case NVPTXISD::Suld3DV2I16Trap: 429 case NVPTXISD::Suld3DV2I32Trap: 430 case NVPTXISD::Suld3DV2I64Trap: 431 case NVPTXISD::Suld3DV4I8Trap: 432 case NVPTXISD::Suld3DV4I16Trap: 433 case NVPTXISD::Suld3DV4I32Trap: 434 case NVPTXISD::Suld1DI8Zero: 435 case NVPTXISD::Suld1DI16Zero: 436 case NVPTXISD::Suld1DI32Zero: 437 case NVPTXISD::Suld1DI64Zero: 438 case NVPTXISD::Suld1DV2I8Zero: 439 case NVPTXISD::Suld1DV2I16Zero: 440 case NVPTXISD::Suld1DV2I32Zero: 441 case NVPTXISD::Suld1DV2I64Zero: 442 case NVPTXISD::Suld1DV4I8Zero: 443 case NVPTXISD::Suld1DV4I16Zero: 444 case NVPTXISD::Suld1DV4I32Zero: 445 case NVPTXISD::Suld1DArrayI8Zero: 446 case NVPTXISD::Suld1DArrayI16Zero: 447 case NVPTXISD::Suld1DArrayI32Zero: 448 case NVPTXISD::Suld1DArrayI64Zero: 449 case NVPTXISD::Suld1DArrayV2I8Zero: 450 case NVPTXISD::Suld1DArrayV2I16Zero: 451 case NVPTXISD::Suld1DArrayV2I32Zero: 452 case NVPTXISD::Suld1DArrayV2I64Zero: 453 case NVPTXISD::Suld1DArrayV4I8Zero: 454 case NVPTXISD::Suld1DArrayV4I16Zero: 455 case NVPTXISD::Suld1DArrayV4I32Zero: 456 case NVPTXISD::Suld2DI8Zero: 457 case NVPTXISD::Suld2DI16Zero: 458 case NVPTXISD::Suld2DI32Zero: 459 case NVPTXISD::Suld2DI64Zero: 460 case NVPTXISD::Suld2DV2I8Zero: 461 case NVPTXISD::Suld2DV2I16Zero: 462 case NVPTXISD::Suld2DV2I32Zero: 463 case NVPTXISD::Suld2DV2I64Zero: 464 case NVPTXISD::Suld2DV4I8Zero: 465 case NVPTXISD::Suld2DV4I16Zero: 466 case NVPTXISD::Suld2DV4I32Zero: 467 case NVPTXISD::Suld2DArrayI8Zero: 468 case NVPTXISD::Suld2DArrayI16Zero: 469 case NVPTXISD::Suld2DArrayI32Zero: 470 case NVPTXISD::Suld2DArrayI64Zero: 471 case NVPTXISD::Suld2DArrayV2I8Zero: 472 case NVPTXISD::Suld2DArrayV2I16Zero: 473 case NVPTXISD::Suld2DArrayV2I32Zero: 474 case NVPTXISD::Suld2DArrayV2I64Zero: 475 case NVPTXISD::Suld2DArrayV4I8Zero: 476 case NVPTXISD::Suld2DArrayV4I16Zero: 477 case NVPTXISD::Suld2DArrayV4I32Zero: 478 case NVPTXISD::Suld3DI8Zero: 479 case NVPTXISD::Suld3DI16Zero: 480 case NVPTXISD::Suld3DI32Zero: 481 case NVPTXISD::Suld3DI64Zero: 482 case NVPTXISD::Suld3DV2I8Zero: 483 case NVPTXISD::Suld3DV2I16Zero: 484 case NVPTXISD::Suld3DV2I32Zero: 485 case NVPTXISD::Suld3DV2I64Zero: 486 case NVPTXISD::Suld3DV4I8Zero: 487 case NVPTXISD::Suld3DV4I16Zero: 488 case NVPTXISD::Suld3DV4I32Zero: 489 if (trySurfaceIntrinsic(N)) 490 return; 491 break; 492 case ISD::AND: 493 case ISD::SRA: 494 case ISD::SRL: 495 // Try to select BFE 496 if (tryBFE(N)) 497 return; 498 break; 499 case ISD::ADDRSPACECAST: 500 SelectAddrSpaceCast(N); 501 return; 502 case ISD::ConstantFP: 503 if (tryConstantFP16(N)) 504 return; 505 break; 506 default: 507 break; 508 } 509 SelectCode(N); 510 } 511 512 bool NVPTXDAGToDAGISel::tryIntrinsicChain(SDNode *N) { 513 unsigned IID = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue(); 514 switch (IID) { 515 default: 516 return false; 517 case Intrinsic::nvvm_ldg_global_f: 518 case Intrinsic::nvvm_ldg_global_i: 519 case Intrinsic::nvvm_ldg_global_p: 520 case Intrinsic::nvvm_ldu_global_f: 521 case Intrinsic::nvvm_ldu_global_i: 522 case Intrinsic::nvvm_ldu_global_p: 523 return tryLDGLDU(N); 524 } 525 } 526 527 // There's no way to specify FP16 immediates in .f16 ops, so we have to 528 // load them into an .f16 register first. 529 bool NVPTXDAGToDAGISel::tryConstantFP16(SDNode *N) { 530 if (N->getValueType(0) != MVT::f16) 531 return false; 532 SDValue Val = CurDAG->getTargetConstantFP( 533 cast<ConstantFPSDNode>(N)->getValueAPF(), SDLoc(N), MVT::f16); 534 SDNode *LoadConstF16 = 535 CurDAG->getMachineNode(NVPTX::LOAD_CONST_F16, SDLoc(N), MVT::f16, Val); 536 ReplaceNode(N, LoadConstF16); 537 return true; 538 } 539 540 // Map ISD:CONDCODE value to appropriate CmpMode expected by 541 // NVPTXInstPrinter::printCmpMode() 542 static unsigned getPTXCmpMode(const CondCodeSDNode &CondCode, bool FTZ) { 543 using NVPTX::PTXCmpMode::CmpMode; 544 unsigned PTXCmpMode = [](ISD::CondCode CC) { 545 switch (CC) { 546 default: 547 llvm_unreachable("Unexpected condition code."); 548 case ISD::SETOEQ: 549 return CmpMode::EQ; 550 case ISD::SETOGT: 551 return CmpMode::GT; 552 case ISD::SETOGE: 553 return CmpMode::GE; 554 case ISD::SETOLT: 555 return CmpMode::LT; 556 case ISD::SETOLE: 557 return CmpMode::LE; 558 case ISD::SETONE: 559 return CmpMode::NE; 560 case ISD::SETO: 561 return CmpMode::NUM; 562 case ISD::SETUO: 563 return CmpMode::NotANumber; 564 case ISD::SETUEQ: 565 return CmpMode::EQU; 566 case ISD::SETUGT: 567 return CmpMode::GTU; 568 case ISD::SETUGE: 569 return CmpMode::GEU; 570 case ISD::SETULT: 571 return CmpMode::LTU; 572 case ISD::SETULE: 573 return CmpMode::LEU; 574 case ISD::SETUNE: 575 return CmpMode::NEU; 576 case ISD::SETEQ: 577 return CmpMode::EQ; 578 case ISD::SETGT: 579 return CmpMode::GT; 580 case ISD::SETGE: 581 return CmpMode::GE; 582 case ISD::SETLT: 583 return CmpMode::LT; 584 case ISD::SETLE: 585 return CmpMode::LE; 586 case ISD::SETNE: 587 return CmpMode::NE; 588 } 589 }(CondCode.get()); 590 591 if (FTZ) 592 PTXCmpMode |= NVPTX::PTXCmpMode::FTZ_FLAG; 593 594 return PTXCmpMode; 595 } 596 597 bool NVPTXDAGToDAGISel::SelectSETP_F16X2(SDNode *N) { 598 unsigned PTXCmpMode = 599 getPTXCmpMode(*cast<CondCodeSDNode>(N->getOperand(2)), useF32FTZ()); 600 SDLoc DL(N); 601 SDNode *SetP = CurDAG->getMachineNode( 602 NVPTX::SETP_f16x2rr, DL, MVT::i1, MVT::i1, N->getOperand(0), 603 N->getOperand(1), CurDAG->getTargetConstant(PTXCmpMode, DL, MVT::i32)); 604 ReplaceNode(N, SetP); 605 return true; 606 } 607 608 // Find all instances of extract_vector_elt that use this v2f16 vector 609 // and coalesce them into a scattering move instruction. 610 bool NVPTXDAGToDAGISel::tryEXTRACT_VECTOR_ELEMENT(SDNode *N) { 611 SDValue Vector = N->getOperand(0); 612 613 // We only care about f16x2 as it's the only real vector type we 614 // need to deal with. 615 if (Vector.getSimpleValueType() != MVT::v2f16) 616 return false; 617 618 // Find and record all uses of this vector that extract element 0 or 1. 619 SmallVector<SDNode *, 4> E0, E1; 620 for (auto *U : Vector.getNode()->uses()) { 621 if (U->getOpcode() != ISD::EXTRACT_VECTOR_ELT) 622 continue; 623 if (U->getOperand(0) != Vector) 624 continue; 625 if (const ConstantSDNode *IdxConst = 626 dyn_cast<ConstantSDNode>(U->getOperand(1))) { 627 if (IdxConst->getZExtValue() == 0) 628 E0.push_back(U); 629 else if (IdxConst->getZExtValue() == 1) 630 E1.push_back(U); 631 else 632 llvm_unreachable("Invalid vector index."); 633 } 634 } 635 636 // There's no point scattering f16x2 if we only ever access one 637 // element of it. 638 if (E0.empty() || E1.empty()) 639 return false; 640 641 unsigned Op = NVPTX::SplitF16x2; 642 // If the vector has been BITCAST'ed from i32, we can use original 643 // value directly and avoid register-to-register move. 644 SDValue Source = Vector; 645 if (Vector->getOpcode() == ISD::BITCAST) { 646 Op = NVPTX::SplitI32toF16x2; 647 Source = Vector->getOperand(0); 648 } 649 // Merge (f16 extractelt(V, 0), f16 extractelt(V,1)) 650 // into f16,f16 SplitF16x2(V) 651 SDNode *ScatterOp = 652 CurDAG->getMachineNode(Op, SDLoc(N), MVT::f16, MVT::f16, Source); 653 for (auto *Node : E0) 654 ReplaceUses(SDValue(Node, 0), SDValue(ScatterOp, 0)); 655 for (auto *Node : E1) 656 ReplaceUses(SDValue(Node, 0), SDValue(ScatterOp, 1)); 657 658 return true; 659 } 660 661 static unsigned int getCodeAddrSpace(MemSDNode *N) { 662 const Value *Src = N->getMemOperand()->getValue(); 663 664 if (!Src) 665 return NVPTX::PTXLdStInstCode::GENERIC; 666 667 if (auto *PT = dyn_cast<PointerType>(Src->getType())) { 668 switch (PT->getAddressSpace()) { 669 case llvm::ADDRESS_SPACE_LOCAL: return NVPTX::PTXLdStInstCode::LOCAL; 670 case llvm::ADDRESS_SPACE_GLOBAL: return NVPTX::PTXLdStInstCode::GLOBAL; 671 case llvm::ADDRESS_SPACE_SHARED: return NVPTX::PTXLdStInstCode::SHARED; 672 case llvm::ADDRESS_SPACE_GENERIC: return NVPTX::PTXLdStInstCode::GENERIC; 673 case llvm::ADDRESS_SPACE_PARAM: return NVPTX::PTXLdStInstCode::PARAM; 674 case llvm::ADDRESS_SPACE_CONST: return NVPTX::PTXLdStInstCode::CONSTANT; 675 default: break; 676 } 677 } 678 return NVPTX::PTXLdStInstCode::GENERIC; 679 } 680 681 static bool canLowerToLDG(MemSDNode *N, const NVPTXSubtarget &Subtarget, 682 unsigned CodeAddrSpace, MachineFunction *F) { 683 // We use ldg (i.e. ld.global.nc) for invariant loads from the global address 684 // space. 685 // 686 // We have two ways of identifying invariant loads: Loads may be explicitly 687 // marked as invariant, or we may infer them to be invariant. 688 // 689 // We currently infer invariance for loads from 690 // - constant global variables, and 691 // - kernel function pointer params that are noalias (i.e. __restrict) and 692 // never written to. 693 // 694 // TODO: Perform a more powerful invariance analysis (ideally IPO, and ideally 695 // not during the SelectionDAG phase). 696 // 697 // TODO: Infer invariance only at -O2. We still want to use ldg at -O0 for 698 // explicitly invariant loads because these are how clang tells us to use ldg 699 // when the user uses a builtin. 700 if (!Subtarget.hasLDG() || CodeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL) 701 return false; 702 703 if (N->isInvariant()) 704 return true; 705 706 bool IsKernelFn = isKernelFunction(F->getFunction()); 707 708 // We use getUnderlyingObjects() here instead of getUnderlyingObject() mainly 709 // because the former looks through phi nodes while the latter does not. We 710 // need to look through phi nodes to handle pointer induction variables. 711 SmallVector<const Value *, 8> Objs; 712 getUnderlyingObjects(N->getMemOperand()->getValue(), Objs); 713 714 return all_of(Objs, [&](const Value *V) { 715 if (auto *A = dyn_cast<const Argument>(V)) 716 return IsKernelFn && A->onlyReadsMemory() && A->hasNoAliasAttr(); 717 if (auto *GV = dyn_cast<const GlobalVariable>(V)) 718 return GV->isConstant(); 719 return false; 720 }); 721 } 722 723 bool NVPTXDAGToDAGISel::tryIntrinsicNoChain(SDNode *N) { 724 unsigned IID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue(); 725 switch (IID) { 726 default: 727 return false; 728 case Intrinsic::nvvm_texsurf_handle_internal: 729 SelectTexSurfHandle(N); 730 return true; 731 } 732 } 733 734 void NVPTXDAGToDAGISel::SelectTexSurfHandle(SDNode *N) { 735 // Op 0 is the intrinsic ID 736 SDValue Wrapper = N->getOperand(1); 737 SDValue GlobalVal = Wrapper.getOperand(0); 738 ReplaceNode(N, CurDAG->getMachineNode(NVPTX::texsurf_handles, SDLoc(N), 739 MVT::i64, GlobalVal)); 740 } 741 742 void NVPTXDAGToDAGISel::SelectAddrSpaceCast(SDNode *N) { 743 SDValue Src = N->getOperand(0); 744 AddrSpaceCastSDNode *CastN = cast<AddrSpaceCastSDNode>(N); 745 unsigned SrcAddrSpace = CastN->getSrcAddressSpace(); 746 unsigned DstAddrSpace = CastN->getDestAddressSpace(); 747 assert(SrcAddrSpace != DstAddrSpace && 748 "addrspacecast must be between different address spaces"); 749 750 if (DstAddrSpace == ADDRESS_SPACE_GENERIC) { 751 // Specific to generic 752 unsigned Opc; 753 switch (SrcAddrSpace) { 754 default: report_fatal_error("Bad address space in addrspacecast"); 755 case ADDRESS_SPACE_GLOBAL: 756 Opc = TM.is64Bit() ? NVPTX::cvta_global_yes_64 : NVPTX::cvta_global_yes; 757 break; 758 case ADDRESS_SPACE_SHARED: 759 Opc = TM.is64Bit() ? (useShortPointers() ? NVPTX::cvta_shared_yes_6432 760 : NVPTX::cvta_shared_yes_64) 761 : NVPTX::cvta_shared_yes; 762 break; 763 case ADDRESS_SPACE_CONST: 764 Opc = TM.is64Bit() ? (useShortPointers() ? NVPTX::cvta_const_yes_6432 765 : NVPTX::cvta_const_yes_64) 766 : NVPTX::cvta_const_yes; 767 break; 768 case ADDRESS_SPACE_LOCAL: 769 Opc = TM.is64Bit() ? (useShortPointers() ? NVPTX::cvta_local_yes_6432 770 : NVPTX::cvta_local_yes_64) 771 : NVPTX::cvta_local_yes; 772 break; 773 } 774 ReplaceNode(N, CurDAG->getMachineNode(Opc, SDLoc(N), N->getValueType(0), 775 Src)); 776 return; 777 } else { 778 // Generic to specific 779 if (SrcAddrSpace != 0) 780 report_fatal_error("Cannot cast between two non-generic address spaces"); 781 unsigned Opc; 782 switch (DstAddrSpace) { 783 default: report_fatal_error("Bad address space in addrspacecast"); 784 case ADDRESS_SPACE_GLOBAL: 785 Opc = TM.is64Bit() ? NVPTX::cvta_to_global_yes_64 786 : NVPTX::cvta_to_global_yes; 787 break; 788 case ADDRESS_SPACE_SHARED: 789 Opc = TM.is64Bit() ? (useShortPointers() ? NVPTX::cvta_to_shared_yes_3264 790 : NVPTX::cvta_to_shared_yes_64) 791 : NVPTX::cvta_to_shared_yes; 792 break; 793 case ADDRESS_SPACE_CONST: 794 Opc = TM.is64Bit() ? (useShortPointers() ? NVPTX::cvta_to_const_yes_3264 795 : NVPTX::cvta_to_const_yes_64) 796 : NVPTX::cvta_to_const_yes; 797 break; 798 case ADDRESS_SPACE_LOCAL: 799 Opc = TM.is64Bit() ? (useShortPointers() ? NVPTX::cvta_to_local_yes_3264 800 : NVPTX::cvta_to_local_yes_64) 801 : NVPTX::cvta_to_local_yes; 802 break; 803 case ADDRESS_SPACE_PARAM: 804 Opc = TM.is64Bit() ? NVPTX::nvvm_ptr_gen_to_param_64 805 : NVPTX::nvvm_ptr_gen_to_param; 806 break; 807 } 808 ReplaceNode(N, CurDAG->getMachineNode(Opc, SDLoc(N), N->getValueType(0), 809 Src)); 810 return; 811 } 812 } 813 814 // Helper function template to reduce amount of boilerplate code for 815 // opcode selection. 816 static std::optional<unsigned> 817 pickOpcodeForVT(MVT::SimpleValueType VT, unsigned Opcode_i8, 818 unsigned Opcode_i16, unsigned Opcode_i32, 819 std::optional<unsigned> Opcode_i64, unsigned Opcode_f16, 820 unsigned Opcode_f16x2, unsigned Opcode_f32, 821 std::optional<unsigned> Opcode_f64) { 822 switch (VT) { 823 case MVT::i1: 824 case MVT::i8: 825 return Opcode_i8; 826 case MVT::i16: 827 return Opcode_i16; 828 case MVT::i32: 829 return Opcode_i32; 830 case MVT::i64: 831 return Opcode_i64; 832 case MVT::f16: 833 case MVT::bf16: 834 return Opcode_f16; 835 case MVT::v2f16: 836 case MVT::v2bf16: 837 return Opcode_f16x2; 838 case MVT::f32: 839 return Opcode_f32; 840 case MVT::f64: 841 return Opcode_f64; 842 default: 843 return std::nullopt; 844 } 845 } 846 847 static int getLdStRegType(EVT VT) { 848 if (VT.isFloatingPoint()) 849 switch (VT.getSimpleVT().SimpleTy) { 850 case MVT::f16: 851 case MVT::bf16: 852 case MVT::v2f16: 853 case MVT::v2bf16: 854 return NVPTX::PTXLdStInstCode::Untyped; 855 default: 856 return NVPTX::PTXLdStInstCode::Float; 857 } 858 else 859 return NVPTX::PTXLdStInstCode::Unsigned; 860 } 861 862 bool NVPTXDAGToDAGISel::tryLoad(SDNode *N) { 863 SDLoc dl(N); 864 MemSDNode *LD = cast<MemSDNode>(N); 865 assert(LD->readMem() && "Expected load"); 866 LoadSDNode *PlainLoad = dyn_cast<LoadSDNode>(N); 867 EVT LoadedVT = LD->getMemoryVT(); 868 SDNode *NVPTXLD = nullptr; 869 870 // do not support pre/post inc/dec 871 if (PlainLoad && PlainLoad->isIndexed()) 872 return false; 873 874 if (!LoadedVT.isSimple()) 875 return false; 876 877 AtomicOrdering Ordering = LD->getSuccessOrdering(); 878 // In order to lower atomic loads with stronger guarantees we would need to 879 // use load.acquire or insert fences. However these features were only added 880 // with PTX ISA 6.0 / sm_70. 881 // TODO: Check if we can actually use the new instructions and implement them. 882 if (isStrongerThanMonotonic(Ordering)) 883 return false; 884 885 // Address Space Setting 886 unsigned int CodeAddrSpace = getCodeAddrSpace(LD); 887 if (canLowerToLDG(LD, *Subtarget, CodeAddrSpace, MF)) { 888 return tryLDGLDU(N); 889 } 890 891 unsigned int PointerSize = 892 CurDAG->getDataLayout().getPointerSizeInBits(LD->getAddressSpace()); 893 894 // Volatile Setting 895 // - .volatile is only available for .global and .shared 896 // - .volatile has the same memory synchronization semantics as .relaxed.sys 897 bool isVolatile = LD->isVolatile() || Ordering == AtomicOrdering::Monotonic; 898 if (CodeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL && 899 CodeAddrSpace != NVPTX::PTXLdStInstCode::SHARED && 900 CodeAddrSpace != NVPTX::PTXLdStInstCode::GENERIC) 901 isVolatile = false; 902 903 // Type Setting: fromType + fromTypeWidth 904 // 905 // Sign : ISD::SEXTLOAD 906 // Unsign : ISD::ZEXTLOAD, ISD::NON_EXTLOAD or ISD::EXTLOAD and the 907 // type is integer 908 // Float : ISD::NON_EXTLOAD or ISD::EXTLOAD and the type is float 909 MVT SimpleVT = LoadedVT.getSimpleVT(); 910 MVT ScalarVT = SimpleVT.getScalarType(); 911 // Read at least 8 bits (predicates are stored as 8-bit values) 912 unsigned fromTypeWidth = std::max(8U, (unsigned)ScalarVT.getSizeInBits()); 913 unsigned int fromType; 914 915 // Vector Setting 916 unsigned vecType = NVPTX::PTXLdStInstCode::Scalar; 917 if (SimpleVT.isVector()) { 918 assert((LoadedVT == MVT::v2f16 || LoadedVT == MVT::v2bf16) && 919 "Unexpected vector type"); 920 // v2f16/v2bf16 is loaded using ld.b32 921 fromTypeWidth = 32; 922 } 923 924 if (PlainLoad && (PlainLoad->getExtensionType() == ISD::SEXTLOAD)) 925 fromType = NVPTX::PTXLdStInstCode::Signed; 926 else 927 fromType = getLdStRegType(ScalarVT); 928 929 // Create the machine instruction DAG 930 SDValue Chain = N->getOperand(0); 931 SDValue N1 = N->getOperand(1); 932 SDValue Addr; 933 SDValue Offset, Base; 934 std::optional<unsigned> Opcode; 935 MVT::SimpleValueType TargetVT = LD->getSimpleValueType(0).SimpleTy; 936 937 if (SelectDirectAddr(N1, Addr)) { 938 Opcode = pickOpcodeForVT( 939 TargetVT, NVPTX::LD_i8_avar, NVPTX::LD_i16_avar, NVPTX::LD_i32_avar, 940 NVPTX::LD_i64_avar, NVPTX::LD_f16_avar, NVPTX::LD_f16x2_avar, 941 NVPTX::LD_f32_avar, NVPTX::LD_f64_avar); 942 if (!Opcode) 943 return false; 944 SDValue Ops[] = { getI32Imm(isVolatile, dl), getI32Imm(CodeAddrSpace, dl), 945 getI32Imm(vecType, dl), getI32Imm(fromType, dl), 946 getI32Imm(fromTypeWidth, dl), Addr, Chain }; 947 NVPTXLD = CurDAG->getMachineNode(*Opcode, dl, TargetVT, MVT::Other, Ops); 948 } else if (PointerSize == 64 ? SelectADDRsi64(N1.getNode(), N1, Base, Offset) 949 : SelectADDRsi(N1.getNode(), N1, Base, Offset)) { 950 Opcode = pickOpcodeForVT(TargetVT, NVPTX::LD_i8_asi, NVPTX::LD_i16_asi, 951 NVPTX::LD_i32_asi, NVPTX::LD_i64_asi, 952 NVPTX::LD_f16_asi, NVPTX::LD_f16x2_asi, 953 NVPTX::LD_f32_asi, NVPTX::LD_f64_asi); 954 if (!Opcode) 955 return false; 956 SDValue Ops[] = { getI32Imm(isVolatile, dl), getI32Imm(CodeAddrSpace, dl), 957 getI32Imm(vecType, dl), getI32Imm(fromType, dl), 958 getI32Imm(fromTypeWidth, dl), Base, Offset, Chain }; 959 NVPTXLD = CurDAG->getMachineNode(*Opcode, dl, TargetVT, MVT::Other, Ops); 960 } else if (PointerSize == 64 ? SelectADDRri64(N1.getNode(), N1, Base, Offset) 961 : SelectADDRri(N1.getNode(), N1, Base, Offset)) { 962 if (PointerSize == 64) 963 Opcode = pickOpcodeForVT( 964 TargetVT, NVPTX::LD_i8_ari_64, NVPTX::LD_i16_ari_64, 965 NVPTX::LD_i32_ari_64, NVPTX::LD_i64_ari_64, NVPTX::LD_f16_ari_64, 966 NVPTX::LD_f16x2_ari_64, NVPTX::LD_f32_ari_64, NVPTX::LD_f64_ari_64); 967 else 968 Opcode = pickOpcodeForVT( 969 TargetVT, NVPTX::LD_i8_ari, NVPTX::LD_i16_ari, NVPTX::LD_i32_ari, 970 NVPTX::LD_i64_ari, NVPTX::LD_f16_ari, NVPTX::LD_f16x2_ari, 971 NVPTX::LD_f32_ari, NVPTX::LD_f64_ari); 972 if (!Opcode) 973 return false; 974 SDValue Ops[] = { getI32Imm(isVolatile, dl), getI32Imm(CodeAddrSpace, dl), 975 getI32Imm(vecType, dl), getI32Imm(fromType, dl), 976 getI32Imm(fromTypeWidth, dl), Base, Offset, Chain }; 977 NVPTXLD = CurDAG->getMachineNode(*Opcode, dl, TargetVT, MVT::Other, Ops); 978 } else { 979 if (PointerSize == 64) 980 Opcode = pickOpcodeForVT( 981 TargetVT, NVPTX::LD_i8_areg_64, NVPTX::LD_i16_areg_64, 982 NVPTX::LD_i32_areg_64, NVPTX::LD_i64_areg_64, NVPTX::LD_f16_areg_64, 983 NVPTX::LD_f16x2_areg_64, NVPTX::LD_f32_areg_64, 984 NVPTX::LD_f64_areg_64); 985 else 986 Opcode = pickOpcodeForVT( 987 TargetVT, NVPTX::LD_i8_areg, NVPTX::LD_i16_areg, NVPTX::LD_i32_areg, 988 NVPTX::LD_i64_areg, NVPTX::LD_f16_areg, NVPTX::LD_f16x2_areg, 989 NVPTX::LD_f32_areg, NVPTX::LD_f64_areg); 990 if (!Opcode) 991 return false; 992 SDValue Ops[] = { getI32Imm(isVolatile, dl), getI32Imm(CodeAddrSpace, dl), 993 getI32Imm(vecType, dl), getI32Imm(fromType, dl), 994 getI32Imm(fromTypeWidth, dl), N1, Chain }; 995 NVPTXLD = CurDAG->getMachineNode(*Opcode, dl, TargetVT, MVT::Other, Ops); 996 } 997 998 if (!NVPTXLD) 999 return false; 1000 1001 MachineMemOperand *MemRef = cast<MemSDNode>(N)->getMemOperand(); 1002 CurDAG->setNodeMemRefs(cast<MachineSDNode>(NVPTXLD), {MemRef}); 1003 1004 ReplaceNode(N, NVPTXLD); 1005 return true; 1006 } 1007 1008 bool NVPTXDAGToDAGISel::tryLoadVector(SDNode *N) { 1009 1010 SDValue Chain = N->getOperand(0); 1011 SDValue Op1 = N->getOperand(1); 1012 SDValue Addr, Offset, Base; 1013 std::optional<unsigned> Opcode; 1014 SDLoc DL(N); 1015 SDNode *LD; 1016 MemSDNode *MemSD = cast<MemSDNode>(N); 1017 EVT LoadedVT = MemSD->getMemoryVT(); 1018 1019 if (!LoadedVT.isSimple()) 1020 return false; 1021 1022 // Address Space Setting 1023 unsigned int CodeAddrSpace = getCodeAddrSpace(MemSD); 1024 if (canLowerToLDG(MemSD, *Subtarget, CodeAddrSpace, MF)) { 1025 return tryLDGLDU(N); 1026 } 1027 1028 unsigned int PointerSize = 1029 CurDAG->getDataLayout().getPointerSizeInBits(MemSD->getAddressSpace()); 1030 1031 // Volatile Setting 1032 // - .volatile is only availalble for .global and .shared 1033 bool IsVolatile = MemSD->isVolatile(); 1034 if (CodeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL && 1035 CodeAddrSpace != NVPTX::PTXLdStInstCode::SHARED && 1036 CodeAddrSpace != NVPTX::PTXLdStInstCode::GENERIC) 1037 IsVolatile = false; 1038 1039 // Vector Setting 1040 MVT SimpleVT = LoadedVT.getSimpleVT(); 1041 1042 // Type Setting: fromType + fromTypeWidth 1043 // 1044 // Sign : ISD::SEXTLOAD 1045 // Unsign : ISD::ZEXTLOAD, ISD::NON_EXTLOAD or ISD::EXTLOAD and the 1046 // type is integer 1047 // Float : ISD::NON_EXTLOAD or ISD::EXTLOAD and the type is float 1048 MVT ScalarVT = SimpleVT.getScalarType(); 1049 // Read at least 8 bits (predicates are stored as 8-bit values) 1050 unsigned FromTypeWidth = std::max(8U, (unsigned)ScalarVT.getSizeInBits()); 1051 unsigned int FromType; 1052 // The last operand holds the original LoadSDNode::getExtensionType() value 1053 unsigned ExtensionType = cast<ConstantSDNode>( 1054 N->getOperand(N->getNumOperands() - 1))->getZExtValue(); 1055 if (ExtensionType == ISD::SEXTLOAD) 1056 FromType = NVPTX::PTXLdStInstCode::Signed; 1057 else 1058 FromType = getLdStRegType(ScalarVT); 1059 1060 unsigned VecType; 1061 1062 switch (N->getOpcode()) { 1063 case NVPTXISD::LoadV2: 1064 VecType = NVPTX::PTXLdStInstCode::V2; 1065 break; 1066 case NVPTXISD::LoadV4: 1067 VecType = NVPTX::PTXLdStInstCode::V4; 1068 break; 1069 default: 1070 return false; 1071 } 1072 1073 EVT EltVT = N->getValueType(0); 1074 1075 // v8f16 is a special case. PTX doesn't have ld.v8.f16 1076 // instruction. Instead, we split the vector into v2f16 chunks and 1077 // load them with ld.v4.b32. 1078 if (EltVT == MVT::v2f16 || EltVT == MVT::v2bf16) { 1079 assert(N->getOpcode() == NVPTXISD::LoadV4 && "Unexpected load opcode."); 1080 EltVT = MVT::i32; 1081 FromType = NVPTX::PTXLdStInstCode::Untyped; 1082 FromTypeWidth = 32; 1083 } 1084 1085 if (SelectDirectAddr(Op1, Addr)) { 1086 switch (N->getOpcode()) { 1087 default: 1088 return false; 1089 case NVPTXISD::LoadV2: 1090 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, 1091 NVPTX::LDV_i8_v2_avar, NVPTX::LDV_i16_v2_avar, 1092 NVPTX::LDV_i32_v2_avar, NVPTX::LDV_i64_v2_avar, 1093 NVPTX::LDV_f16_v2_avar, NVPTX::LDV_f16x2_v2_avar, 1094 NVPTX::LDV_f32_v2_avar, NVPTX::LDV_f64_v2_avar); 1095 break; 1096 case NVPTXISD::LoadV4: 1097 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, 1098 NVPTX::LDV_i8_v4_avar, NVPTX::LDV_i16_v4_avar, 1099 NVPTX::LDV_i32_v4_avar, std::nullopt, 1100 NVPTX::LDV_f16_v4_avar, NVPTX::LDV_f16x2_v4_avar, 1101 NVPTX::LDV_f32_v4_avar, std::nullopt); 1102 break; 1103 } 1104 if (!Opcode) 1105 return false; 1106 SDValue Ops[] = { getI32Imm(IsVolatile, DL), getI32Imm(CodeAddrSpace, DL), 1107 getI32Imm(VecType, DL), getI32Imm(FromType, DL), 1108 getI32Imm(FromTypeWidth, DL), Addr, Chain }; 1109 LD = CurDAG->getMachineNode(*Opcode, DL, N->getVTList(), Ops); 1110 } else if (PointerSize == 64 1111 ? SelectADDRsi64(Op1.getNode(), Op1, Base, Offset) 1112 : SelectADDRsi(Op1.getNode(), Op1, Base, Offset)) { 1113 switch (N->getOpcode()) { 1114 default: 1115 return false; 1116 case NVPTXISD::LoadV2: 1117 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, 1118 NVPTX::LDV_i8_v2_asi, NVPTX::LDV_i16_v2_asi, 1119 NVPTX::LDV_i32_v2_asi, NVPTX::LDV_i64_v2_asi, 1120 NVPTX::LDV_f16_v2_asi, NVPTX::LDV_f16x2_v2_asi, 1121 NVPTX::LDV_f32_v2_asi, NVPTX::LDV_f64_v2_asi); 1122 break; 1123 case NVPTXISD::LoadV4: 1124 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, 1125 NVPTX::LDV_i8_v4_asi, NVPTX::LDV_i16_v4_asi, 1126 NVPTX::LDV_i32_v4_asi, std::nullopt, 1127 NVPTX::LDV_f16_v4_asi, NVPTX::LDV_f16x2_v4_asi, 1128 NVPTX::LDV_f32_v4_asi, std::nullopt); 1129 break; 1130 } 1131 if (!Opcode) 1132 return false; 1133 SDValue Ops[] = { getI32Imm(IsVolatile, DL), getI32Imm(CodeAddrSpace, DL), 1134 getI32Imm(VecType, DL), getI32Imm(FromType, DL), 1135 getI32Imm(FromTypeWidth, DL), Base, Offset, Chain }; 1136 LD = CurDAG->getMachineNode(*Opcode, DL, N->getVTList(), Ops); 1137 } else if (PointerSize == 64 1138 ? SelectADDRri64(Op1.getNode(), Op1, Base, Offset) 1139 : SelectADDRri(Op1.getNode(), Op1, Base, Offset)) { 1140 if (PointerSize == 64) { 1141 switch (N->getOpcode()) { 1142 default: 1143 return false; 1144 case NVPTXISD::LoadV2: 1145 Opcode = pickOpcodeForVT( 1146 EltVT.getSimpleVT().SimpleTy, NVPTX::LDV_i8_v2_ari_64, 1147 NVPTX::LDV_i16_v2_ari_64, NVPTX::LDV_i32_v2_ari_64, 1148 NVPTX::LDV_i64_v2_ari_64, NVPTX::LDV_f16_v2_ari_64, 1149 NVPTX::LDV_f16x2_v2_ari_64, NVPTX::LDV_f32_v2_ari_64, 1150 NVPTX::LDV_f64_v2_ari_64); 1151 break; 1152 case NVPTXISD::LoadV4: 1153 Opcode = pickOpcodeForVT( 1154 EltVT.getSimpleVT().SimpleTy, NVPTX::LDV_i8_v4_ari_64, 1155 NVPTX::LDV_i16_v4_ari_64, NVPTX::LDV_i32_v4_ari_64, std::nullopt, 1156 NVPTX::LDV_f16_v4_ari_64, NVPTX::LDV_f16x2_v4_ari_64, 1157 NVPTX::LDV_f32_v4_ari_64, std::nullopt); 1158 break; 1159 } 1160 } else { 1161 switch (N->getOpcode()) { 1162 default: 1163 return false; 1164 case NVPTXISD::LoadV2: 1165 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, 1166 NVPTX::LDV_i8_v2_ari, NVPTX::LDV_i16_v2_ari, 1167 NVPTX::LDV_i32_v2_ari, NVPTX::LDV_i64_v2_ari, 1168 NVPTX::LDV_f16_v2_ari, NVPTX::LDV_f16x2_v2_ari, 1169 NVPTX::LDV_f32_v2_ari, NVPTX::LDV_f64_v2_ari); 1170 break; 1171 case NVPTXISD::LoadV4: 1172 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, 1173 NVPTX::LDV_i8_v4_ari, NVPTX::LDV_i16_v4_ari, 1174 NVPTX::LDV_i32_v4_ari, std::nullopt, 1175 NVPTX::LDV_f16_v4_ari, NVPTX::LDV_f16x2_v4_ari, 1176 NVPTX::LDV_f32_v4_ari, std::nullopt); 1177 break; 1178 } 1179 } 1180 if (!Opcode) 1181 return false; 1182 SDValue Ops[] = { getI32Imm(IsVolatile, DL), getI32Imm(CodeAddrSpace, DL), 1183 getI32Imm(VecType, DL), getI32Imm(FromType, DL), 1184 getI32Imm(FromTypeWidth, DL), Base, Offset, Chain }; 1185 1186 LD = CurDAG->getMachineNode(*Opcode, DL, N->getVTList(), Ops); 1187 } else { 1188 if (PointerSize == 64) { 1189 switch (N->getOpcode()) { 1190 default: 1191 return false; 1192 case NVPTXISD::LoadV2: 1193 Opcode = pickOpcodeForVT( 1194 EltVT.getSimpleVT().SimpleTy, NVPTX::LDV_i8_v2_areg_64, 1195 NVPTX::LDV_i16_v2_areg_64, NVPTX::LDV_i32_v2_areg_64, 1196 NVPTX::LDV_i64_v2_areg_64, NVPTX::LDV_f16_v2_areg_64, 1197 NVPTX::LDV_f16x2_v2_areg_64, NVPTX::LDV_f32_v2_areg_64, 1198 NVPTX::LDV_f64_v2_areg_64); 1199 break; 1200 case NVPTXISD::LoadV4: 1201 Opcode = pickOpcodeForVT( 1202 EltVT.getSimpleVT().SimpleTy, NVPTX::LDV_i8_v4_areg_64, 1203 NVPTX::LDV_i16_v4_areg_64, NVPTX::LDV_i32_v4_areg_64, std::nullopt, 1204 NVPTX::LDV_f16_v4_areg_64, NVPTX::LDV_f16x2_v4_areg_64, 1205 NVPTX::LDV_f32_v4_areg_64, std::nullopt); 1206 break; 1207 } 1208 } else { 1209 switch (N->getOpcode()) { 1210 default: 1211 return false; 1212 case NVPTXISD::LoadV2: 1213 Opcode = 1214 pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, NVPTX::LDV_i8_v2_areg, 1215 NVPTX::LDV_i16_v2_areg, NVPTX::LDV_i32_v2_areg, 1216 NVPTX::LDV_i64_v2_areg, NVPTX::LDV_f16_v2_areg, 1217 NVPTX::LDV_f16x2_v2_areg, NVPTX::LDV_f32_v2_areg, 1218 NVPTX::LDV_f64_v2_areg); 1219 break; 1220 case NVPTXISD::LoadV4: 1221 Opcode = pickOpcodeForVT( 1222 EltVT.getSimpleVT().SimpleTy, NVPTX::LDV_i8_v4_areg, 1223 NVPTX::LDV_i16_v4_areg, NVPTX::LDV_i32_v4_areg, std::nullopt, 1224 NVPTX::LDV_f16_v4_areg, NVPTX::LDV_f16x2_v4_areg, 1225 NVPTX::LDV_f32_v4_areg, std::nullopt); 1226 break; 1227 } 1228 } 1229 if (!Opcode) 1230 return false; 1231 SDValue Ops[] = { getI32Imm(IsVolatile, DL), getI32Imm(CodeAddrSpace, DL), 1232 getI32Imm(VecType, DL), getI32Imm(FromType, DL), 1233 getI32Imm(FromTypeWidth, DL), Op1, Chain }; 1234 LD = CurDAG->getMachineNode(*Opcode, DL, N->getVTList(), Ops); 1235 } 1236 1237 MachineMemOperand *MemRef = cast<MemSDNode>(N)->getMemOperand(); 1238 CurDAG->setNodeMemRefs(cast<MachineSDNode>(LD), {MemRef}); 1239 1240 ReplaceNode(N, LD); 1241 return true; 1242 } 1243 1244 bool NVPTXDAGToDAGISel::tryLDGLDU(SDNode *N) { 1245 1246 SDValue Chain = N->getOperand(0); 1247 SDValue Op1; 1248 MemSDNode *Mem; 1249 bool IsLDG = true; 1250 1251 // If this is an LDG intrinsic, the address is the third operand. If its an 1252 // LDG/LDU SD node (from custom vector handling), then its the second operand 1253 if (N->getOpcode() == ISD::INTRINSIC_W_CHAIN) { 1254 Op1 = N->getOperand(2); 1255 Mem = cast<MemIntrinsicSDNode>(N); 1256 unsigned IID = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue(); 1257 switch (IID) { 1258 default: 1259 return false; 1260 case Intrinsic::nvvm_ldg_global_f: 1261 case Intrinsic::nvvm_ldg_global_i: 1262 case Intrinsic::nvvm_ldg_global_p: 1263 IsLDG = true; 1264 break; 1265 case Intrinsic::nvvm_ldu_global_f: 1266 case Intrinsic::nvvm_ldu_global_i: 1267 case Intrinsic::nvvm_ldu_global_p: 1268 IsLDG = false; 1269 break; 1270 } 1271 } else { 1272 Op1 = N->getOperand(1); 1273 Mem = cast<MemSDNode>(N); 1274 } 1275 1276 std::optional<unsigned> Opcode; 1277 SDLoc DL(N); 1278 SDNode *LD; 1279 SDValue Base, Offset, Addr; 1280 1281 EVT EltVT = Mem->getMemoryVT(); 1282 unsigned NumElts = 1; 1283 if (EltVT.isVector()) { 1284 NumElts = EltVT.getVectorNumElements(); 1285 EltVT = EltVT.getVectorElementType(); 1286 // vectors of f16 are loaded/stored as multiples of v2f16 elements. 1287 if (EltVT == MVT::f16 && N->getValueType(0) == MVT::v2f16) { 1288 assert(NumElts % 2 == 0 && "Vector must have even number of elements"); 1289 EltVT = MVT::v2f16; 1290 NumElts /= 2; 1291 } 1292 } 1293 1294 // Build the "promoted" result VTList for the load. If we are really loading 1295 // i8s, then the return type will be promoted to i16 since we do not expose 1296 // 8-bit registers in NVPTX. 1297 EVT NodeVT = (EltVT == MVT::i8) ? MVT::i16 : EltVT; 1298 SmallVector<EVT, 5> InstVTs; 1299 for (unsigned i = 0; i != NumElts; ++i) { 1300 InstVTs.push_back(NodeVT); 1301 } 1302 InstVTs.push_back(MVT::Other); 1303 SDVTList InstVTList = CurDAG->getVTList(InstVTs); 1304 1305 if (SelectDirectAddr(Op1, Addr)) { 1306 switch (N->getOpcode()) { 1307 default: 1308 return false; 1309 case ISD::LOAD: 1310 case ISD::INTRINSIC_W_CHAIN: 1311 if (IsLDG) 1312 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, 1313 NVPTX::INT_PTX_LDG_GLOBAL_i8avar, 1314 NVPTX::INT_PTX_LDG_GLOBAL_i16avar, 1315 NVPTX::INT_PTX_LDG_GLOBAL_i32avar, 1316 NVPTX::INT_PTX_LDG_GLOBAL_i64avar, 1317 NVPTX::INT_PTX_LDG_GLOBAL_f16avar, 1318 NVPTX::INT_PTX_LDG_GLOBAL_f16x2avar, 1319 NVPTX::INT_PTX_LDG_GLOBAL_f32avar, 1320 NVPTX::INT_PTX_LDG_GLOBAL_f64avar); 1321 else 1322 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, 1323 NVPTX::INT_PTX_LDU_GLOBAL_i8avar, 1324 NVPTX::INT_PTX_LDU_GLOBAL_i16avar, 1325 NVPTX::INT_PTX_LDU_GLOBAL_i32avar, 1326 NVPTX::INT_PTX_LDU_GLOBAL_i64avar, 1327 NVPTX::INT_PTX_LDU_GLOBAL_f16avar, 1328 NVPTX::INT_PTX_LDU_GLOBAL_f16x2avar, 1329 NVPTX::INT_PTX_LDU_GLOBAL_f32avar, 1330 NVPTX::INT_PTX_LDU_GLOBAL_f64avar); 1331 break; 1332 case NVPTXISD::LoadV2: 1333 case NVPTXISD::LDGV2: 1334 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, 1335 NVPTX::INT_PTX_LDG_G_v2i8_ELE_avar, 1336 NVPTX::INT_PTX_LDG_G_v2i16_ELE_avar, 1337 NVPTX::INT_PTX_LDG_G_v2i32_ELE_avar, 1338 NVPTX::INT_PTX_LDG_G_v2i64_ELE_avar, 1339 NVPTX::INT_PTX_LDG_G_v2f16_ELE_avar, 1340 NVPTX::INT_PTX_LDG_G_v2f16x2_ELE_avar, 1341 NVPTX::INT_PTX_LDG_G_v2f32_ELE_avar, 1342 NVPTX::INT_PTX_LDG_G_v2f64_ELE_avar); 1343 break; 1344 case NVPTXISD::LDUV2: 1345 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, 1346 NVPTX::INT_PTX_LDU_G_v2i8_ELE_avar, 1347 NVPTX::INT_PTX_LDU_G_v2i16_ELE_avar, 1348 NVPTX::INT_PTX_LDU_G_v2i32_ELE_avar, 1349 NVPTX::INT_PTX_LDU_G_v2i64_ELE_avar, 1350 NVPTX::INT_PTX_LDU_G_v2f16_ELE_avar, 1351 NVPTX::INT_PTX_LDU_G_v2f16x2_ELE_avar, 1352 NVPTX::INT_PTX_LDU_G_v2f32_ELE_avar, 1353 NVPTX::INT_PTX_LDU_G_v2f64_ELE_avar); 1354 break; 1355 case NVPTXISD::LoadV4: 1356 case NVPTXISD::LDGV4: 1357 Opcode = pickOpcodeForVT( 1358 EltVT.getSimpleVT().SimpleTy, NVPTX::INT_PTX_LDG_G_v4i8_ELE_avar, 1359 NVPTX::INT_PTX_LDG_G_v4i16_ELE_avar, 1360 NVPTX::INT_PTX_LDG_G_v4i32_ELE_avar, std::nullopt, 1361 NVPTX::INT_PTX_LDG_G_v4f16_ELE_avar, 1362 NVPTX::INT_PTX_LDG_G_v4f16x2_ELE_avar, 1363 NVPTX::INT_PTX_LDG_G_v4f32_ELE_avar, std::nullopt); 1364 break; 1365 case NVPTXISD::LDUV4: 1366 Opcode = pickOpcodeForVT( 1367 EltVT.getSimpleVT().SimpleTy, NVPTX::INT_PTX_LDU_G_v4i8_ELE_avar, 1368 NVPTX::INT_PTX_LDU_G_v4i16_ELE_avar, 1369 NVPTX::INT_PTX_LDU_G_v4i32_ELE_avar, std::nullopt, 1370 NVPTX::INT_PTX_LDU_G_v4f16_ELE_avar, 1371 NVPTX::INT_PTX_LDU_G_v4f16x2_ELE_avar, 1372 NVPTX::INT_PTX_LDU_G_v4f32_ELE_avar, std::nullopt); 1373 break; 1374 } 1375 if (!Opcode) 1376 return false; 1377 SDValue Ops[] = { Addr, Chain }; 1378 LD = CurDAG->getMachineNode(*Opcode, DL, InstVTList, Ops); 1379 } else if (TM.is64Bit() ? SelectADDRri64(Op1.getNode(), Op1, Base, Offset) 1380 : SelectADDRri(Op1.getNode(), Op1, Base, Offset)) { 1381 if (TM.is64Bit()) { 1382 switch (N->getOpcode()) { 1383 default: 1384 return false; 1385 case ISD::LOAD: 1386 case ISD::INTRINSIC_W_CHAIN: 1387 if (IsLDG) 1388 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, 1389 NVPTX::INT_PTX_LDG_GLOBAL_i8ari64, 1390 NVPTX::INT_PTX_LDG_GLOBAL_i16ari64, 1391 NVPTX::INT_PTX_LDG_GLOBAL_i32ari64, 1392 NVPTX::INT_PTX_LDG_GLOBAL_i64ari64, 1393 NVPTX::INT_PTX_LDG_GLOBAL_f16ari64, 1394 NVPTX::INT_PTX_LDG_GLOBAL_f16x2ari64, 1395 NVPTX::INT_PTX_LDG_GLOBAL_f32ari64, 1396 NVPTX::INT_PTX_LDG_GLOBAL_f64ari64); 1397 else 1398 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, 1399 NVPTX::INT_PTX_LDU_GLOBAL_i8ari64, 1400 NVPTX::INT_PTX_LDU_GLOBAL_i16ari64, 1401 NVPTX::INT_PTX_LDU_GLOBAL_i32ari64, 1402 NVPTX::INT_PTX_LDU_GLOBAL_i64ari64, 1403 NVPTX::INT_PTX_LDU_GLOBAL_f16ari64, 1404 NVPTX::INT_PTX_LDU_GLOBAL_f16x2ari64, 1405 NVPTX::INT_PTX_LDU_GLOBAL_f32ari64, 1406 NVPTX::INT_PTX_LDU_GLOBAL_f64ari64); 1407 break; 1408 case NVPTXISD::LoadV2: 1409 case NVPTXISD::LDGV2: 1410 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, 1411 NVPTX::INT_PTX_LDG_G_v2i8_ELE_ari64, 1412 NVPTX::INT_PTX_LDG_G_v2i16_ELE_ari64, 1413 NVPTX::INT_PTX_LDG_G_v2i32_ELE_ari64, 1414 NVPTX::INT_PTX_LDG_G_v2i64_ELE_ari64, 1415 NVPTX::INT_PTX_LDG_G_v2f16_ELE_ari64, 1416 NVPTX::INT_PTX_LDG_G_v2f16x2_ELE_ari64, 1417 NVPTX::INT_PTX_LDG_G_v2f32_ELE_ari64, 1418 NVPTX::INT_PTX_LDG_G_v2f64_ELE_ari64); 1419 break; 1420 case NVPTXISD::LDUV2: 1421 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, 1422 NVPTX::INT_PTX_LDU_G_v2i8_ELE_ari64, 1423 NVPTX::INT_PTX_LDU_G_v2i16_ELE_ari64, 1424 NVPTX::INT_PTX_LDU_G_v2i32_ELE_ari64, 1425 NVPTX::INT_PTX_LDU_G_v2i64_ELE_ari64, 1426 NVPTX::INT_PTX_LDU_G_v2f16_ELE_ari64, 1427 NVPTX::INT_PTX_LDU_G_v2f16x2_ELE_ari64, 1428 NVPTX::INT_PTX_LDU_G_v2f32_ELE_ari64, 1429 NVPTX::INT_PTX_LDU_G_v2f64_ELE_ari64); 1430 break; 1431 case NVPTXISD::LoadV4: 1432 case NVPTXISD::LDGV4: 1433 Opcode = pickOpcodeForVT( 1434 EltVT.getSimpleVT().SimpleTy, NVPTX::INT_PTX_LDG_G_v4i8_ELE_ari64, 1435 NVPTX::INT_PTX_LDG_G_v4i16_ELE_ari64, 1436 NVPTX::INT_PTX_LDG_G_v4i32_ELE_ari64, std::nullopt, 1437 NVPTX::INT_PTX_LDG_G_v4f16_ELE_ari64, 1438 NVPTX::INT_PTX_LDG_G_v4f16x2_ELE_ari64, 1439 NVPTX::INT_PTX_LDG_G_v4f32_ELE_ari64, std::nullopt); 1440 break; 1441 case NVPTXISD::LDUV4: 1442 Opcode = pickOpcodeForVT( 1443 EltVT.getSimpleVT().SimpleTy, NVPTX::INT_PTX_LDU_G_v4i8_ELE_ari64, 1444 NVPTX::INT_PTX_LDU_G_v4i16_ELE_ari64, 1445 NVPTX::INT_PTX_LDU_G_v4i32_ELE_ari64, std::nullopt, 1446 NVPTX::INT_PTX_LDU_G_v4f16_ELE_ari64, 1447 NVPTX::INT_PTX_LDU_G_v4f16x2_ELE_ari64, 1448 NVPTX::INT_PTX_LDU_G_v4f32_ELE_ari64, std::nullopt); 1449 break; 1450 } 1451 } else { 1452 switch (N->getOpcode()) { 1453 default: 1454 return false; 1455 case ISD::LOAD: 1456 case ISD::INTRINSIC_W_CHAIN: 1457 if (IsLDG) 1458 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, 1459 NVPTX::INT_PTX_LDG_GLOBAL_i8ari, 1460 NVPTX::INT_PTX_LDG_GLOBAL_i16ari, 1461 NVPTX::INT_PTX_LDG_GLOBAL_i32ari, 1462 NVPTX::INT_PTX_LDG_GLOBAL_i64ari, 1463 NVPTX::INT_PTX_LDG_GLOBAL_f16ari, 1464 NVPTX::INT_PTX_LDG_GLOBAL_f16x2ari, 1465 NVPTX::INT_PTX_LDG_GLOBAL_f32ari, 1466 NVPTX::INT_PTX_LDG_GLOBAL_f64ari); 1467 else 1468 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, 1469 NVPTX::INT_PTX_LDU_GLOBAL_i8ari, 1470 NVPTX::INT_PTX_LDU_GLOBAL_i16ari, 1471 NVPTX::INT_PTX_LDU_GLOBAL_i32ari, 1472 NVPTX::INT_PTX_LDU_GLOBAL_i64ari, 1473 NVPTX::INT_PTX_LDU_GLOBAL_f16ari, 1474 NVPTX::INT_PTX_LDU_GLOBAL_f16x2ari, 1475 NVPTX::INT_PTX_LDU_GLOBAL_f32ari, 1476 NVPTX::INT_PTX_LDU_GLOBAL_f64ari); 1477 break; 1478 case NVPTXISD::LoadV2: 1479 case NVPTXISD::LDGV2: 1480 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, 1481 NVPTX::INT_PTX_LDG_G_v2i8_ELE_ari32, 1482 NVPTX::INT_PTX_LDG_G_v2i16_ELE_ari32, 1483 NVPTX::INT_PTX_LDG_G_v2i32_ELE_ari32, 1484 NVPTX::INT_PTX_LDG_G_v2i64_ELE_ari32, 1485 NVPTX::INT_PTX_LDG_G_v2f16_ELE_ari32, 1486 NVPTX::INT_PTX_LDG_G_v2f16x2_ELE_ari32, 1487 NVPTX::INT_PTX_LDG_G_v2f32_ELE_ari32, 1488 NVPTX::INT_PTX_LDG_G_v2f64_ELE_ari32); 1489 break; 1490 case NVPTXISD::LDUV2: 1491 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, 1492 NVPTX::INT_PTX_LDU_G_v2i8_ELE_ari32, 1493 NVPTX::INT_PTX_LDU_G_v2i16_ELE_ari32, 1494 NVPTX::INT_PTX_LDU_G_v2i32_ELE_ari32, 1495 NVPTX::INT_PTX_LDU_G_v2i64_ELE_ari32, 1496 NVPTX::INT_PTX_LDU_G_v2f16_ELE_ari32, 1497 NVPTX::INT_PTX_LDU_G_v2f16x2_ELE_ari32, 1498 NVPTX::INT_PTX_LDU_G_v2f32_ELE_ari32, 1499 NVPTX::INT_PTX_LDU_G_v2f64_ELE_ari32); 1500 break; 1501 case NVPTXISD::LoadV4: 1502 case NVPTXISD::LDGV4: 1503 Opcode = pickOpcodeForVT( 1504 EltVT.getSimpleVT().SimpleTy, NVPTX::INT_PTX_LDG_G_v4i8_ELE_ari32, 1505 NVPTX::INT_PTX_LDG_G_v4i16_ELE_ari32, 1506 NVPTX::INT_PTX_LDG_G_v4i32_ELE_ari32, std::nullopt, 1507 NVPTX::INT_PTX_LDG_G_v4f16_ELE_ari32, 1508 NVPTX::INT_PTX_LDG_G_v4f16x2_ELE_ari32, 1509 NVPTX::INT_PTX_LDG_G_v4f32_ELE_ari32, std::nullopt); 1510 break; 1511 case NVPTXISD::LDUV4: 1512 Opcode = pickOpcodeForVT( 1513 EltVT.getSimpleVT().SimpleTy, NVPTX::INT_PTX_LDU_G_v4i8_ELE_ari32, 1514 NVPTX::INT_PTX_LDU_G_v4i16_ELE_ari32, 1515 NVPTX::INT_PTX_LDU_G_v4i32_ELE_ari32, std::nullopt, 1516 NVPTX::INT_PTX_LDU_G_v4f16_ELE_ari32, 1517 NVPTX::INT_PTX_LDU_G_v4f16x2_ELE_ari32, 1518 NVPTX::INT_PTX_LDU_G_v4f32_ELE_ari32, std::nullopt); 1519 break; 1520 } 1521 } 1522 if (!Opcode) 1523 return false; 1524 SDValue Ops[] = {Base, Offset, Chain}; 1525 LD = CurDAG->getMachineNode(*Opcode, DL, InstVTList, Ops); 1526 } else { 1527 if (TM.is64Bit()) { 1528 switch (N->getOpcode()) { 1529 default: 1530 return false; 1531 case ISD::LOAD: 1532 case ISD::INTRINSIC_W_CHAIN: 1533 if (IsLDG) 1534 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, 1535 NVPTX::INT_PTX_LDG_GLOBAL_i8areg64, 1536 NVPTX::INT_PTX_LDG_GLOBAL_i16areg64, 1537 NVPTX::INT_PTX_LDG_GLOBAL_i32areg64, 1538 NVPTX::INT_PTX_LDG_GLOBAL_i64areg64, 1539 NVPTX::INT_PTX_LDG_GLOBAL_f16areg64, 1540 NVPTX::INT_PTX_LDG_GLOBAL_f16x2areg64, 1541 NVPTX::INT_PTX_LDG_GLOBAL_f32areg64, 1542 NVPTX::INT_PTX_LDG_GLOBAL_f64areg64); 1543 else 1544 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, 1545 NVPTX::INT_PTX_LDU_GLOBAL_i8areg64, 1546 NVPTX::INT_PTX_LDU_GLOBAL_i16areg64, 1547 NVPTX::INT_PTX_LDU_GLOBAL_i32areg64, 1548 NVPTX::INT_PTX_LDU_GLOBAL_i64areg64, 1549 NVPTX::INT_PTX_LDU_GLOBAL_f16areg64, 1550 NVPTX::INT_PTX_LDU_GLOBAL_f16x2areg64, 1551 NVPTX::INT_PTX_LDU_GLOBAL_f32areg64, 1552 NVPTX::INT_PTX_LDU_GLOBAL_f64areg64); 1553 break; 1554 case NVPTXISD::LoadV2: 1555 case NVPTXISD::LDGV2: 1556 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, 1557 NVPTX::INT_PTX_LDG_G_v2i8_ELE_areg64, 1558 NVPTX::INT_PTX_LDG_G_v2i16_ELE_areg64, 1559 NVPTX::INT_PTX_LDG_G_v2i32_ELE_areg64, 1560 NVPTX::INT_PTX_LDG_G_v2i64_ELE_areg64, 1561 NVPTX::INT_PTX_LDG_G_v2f16_ELE_areg64, 1562 NVPTX::INT_PTX_LDG_G_v2f16x2_ELE_areg64, 1563 NVPTX::INT_PTX_LDG_G_v2f32_ELE_areg64, 1564 NVPTX::INT_PTX_LDG_G_v2f64_ELE_areg64); 1565 break; 1566 case NVPTXISD::LDUV2: 1567 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, 1568 NVPTX::INT_PTX_LDU_G_v2i8_ELE_areg64, 1569 NVPTX::INT_PTX_LDU_G_v2i16_ELE_areg64, 1570 NVPTX::INT_PTX_LDU_G_v2i32_ELE_areg64, 1571 NVPTX::INT_PTX_LDU_G_v2i64_ELE_areg64, 1572 NVPTX::INT_PTX_LDU_G_v2f16_ELE_areg64, 1573 NVPTX::INT_PTX_LDU_G_v2f16x2_ELE_areg64, 1574 NVPTX::INT_PTX_LDU_G_v2f32_ELE_areg64, 1575 NVPTX::INT_PTX_LDU_G_v2f64_ELE_areg64); 1576 break; 1577 case NVPTXISD::LoadV4: 1578 case NVPTXISD::LDGV4: 1579 Opcode = pickOpcodeForVT( 1580 EltVT.getSimpleVT().SimpleTy, NVPTX::INT_PTX_LDG_G_v4i8_ELE_areg64, 1581 NVPTX::INT_PTX_LDG_G_v4i16_ELE_areg64, 1582 NVPTX::INT_PTX_LDG_G_v4i32_ELE_areg64, std::nullopt, 1583 NVPTX::INT_PTX_LDG_G_v4f16_ELE_areg64, 1584 NVPTX::INT_PTX_LDG_G_v4f16x2_ELE_areg64, 1585 NVPTX::INT_PTX_LDG_G_v4f32_ELE_areg64, std::nullopt); 1586 break; 1587 case NVPTXISD::LDUV4: 1588 Opcode = pickOpcodeForVT( 1589 EltVT.getSimpleVT().SimpleTy, NVPTX::INT_PTX_LDU_G_v4i8_ELE_areg64, 1590 NVPTX::INT_PTX_LDU_G_v4i16_ELE_areg64, 1591 NVPTX::INT_PTX_LDU_G_v4i32_ELE_areg64, std::nullopt, 1592 NVPTX::INT_PTX_LDU_G_v4f16_ELE_areg64, 1593 NVPTX::INT_PTX_LDU_G_v4f16x2_ELE_areg64, 1594 NVPTX::INT_PTX_LDU_G_v4f32_ELE_areg64, std::nullopt); 1595 break; 1596 } 1597 } else { 1598 switch (N->getOpcode()) { 1599 default: 1600 return false; 1601 case ISD::LOAD: 1602 case ISD::INTRINSIC_W_CHAIN: 1603 if (IsLDG) 1604 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, 1605 NVPTX::INT_PTX_LDG_GLOBAL_i8areg, 1606 NVPTX::INT_PTX_LDG_GLOBAL_i16areg, 1607 NVPTX::INT_PTX_LDG_GLOBAL_i32areg, 1608 NVPTX::INT_PTX_LDG_GLOBAL_i64areg, 1609 NVPTX::INT_PTX_LDG_GLOBAL_f16areg, 1610 NVPTX::INT_PTX_LDG_GLOBAL_f16x2areg, 1611 NVPTX::INT_PTX_LDG_GLOBAL_f32areg, 1612 NVPTX::INT_PTX_LDG_GLOBAL_f64areg); 1613 else 1614 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, 1615 NVPTX::INT_PTX_LDU_GLOBAL_i8areg, 1616 NVPTX::INT_PTX_LDU_GLOBAL_i16areg, 1617 NVPTX::INT_PTX_LDU_GLOBAL_i32areg, 1618 NVPTX::INT_PTX_LDU_GLOBAL_i64areg, 1619 NVPTX::INT_PTX_LDU_GLOBAL_f16areg, 1620 NVPTX::INT_PTX_LDU_GLOBAL_f16x2areg, 1621 NVPTX::INT_PTX_LDU_GLOBAL_f32areg, 1622 NVPTX::INT_PTX_LDU_GLOBAL_f64areg); 1623 break; 1624 case NVPTXISD::LoadV2: 1625 case NVPTXISD::LDGV2: 1626 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, 1627 NVPTX::INT_PTX_LDG_G_v2i8_ELE_areg32, 1628 NVPTX::INT_PTX_LDG_G_v2i16_ELE_areg32, 1629 NVPTX::INT_PTX_LDG_G_v2i32_ELE_areg32, 1630 NVPTX::INT_PTX_LDG_G_v2i64_ELE_areg32, 1631 NVPTX::INT_PTX_LDG_G_v2f16_ELE_areg32, 1632 NVPTX::INT_PTX_LDG_G_v2f16x2_ELE_areg32, 1633 NVPTX::INT_PTX_LDG_G_v2f32_ELE_areg32, 1634 NVPTX::INT_PTX_LDG_G_v2f64_ELE_areg32); 1635 break; 1636 case NVPTXISD::LDUV2: 1637 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, 1638 NVPTX::INT_PTX_LDU_G_v2i8_ELE_areg32, 1639 NVPTX::INT_PTX_LDU_G_v2i16_ELE_areg32, 1640 NVPTX::INT_PTX_LDU_G_v2i32_ELE_areg32, 1641 NVPTX::INT_PTX_LDU_G_v2i64_ELE_areg32, 1642 NVPTX::INT_PTX_LDU_G_v2f16_ELE_areg32, 1643 NVPTX::INT_PTX_LDU_G_v2f16x2_ELE_areg32, 1644 NVPTX::INT_PTX_LDU_G_v2f32_ELE_areg32, 1645 NVPTX::INT_PTX_LDU_G_v2f64_ELE_areg32); 1646 break; 1647 case NVPTXISD::LoadV4: 1648 case NVPTXISD::LDGV4: 1649 Opcode = pickOpcodeForVT( 1650 EltVT.getSimpleVT().SimpleTy, NVPTX::INT_PTX_LDG_G_v4i8_ELE_areg32, 1651 NVPTX::INT_PTX_LDG_G_v4i16_ELE_areg32, 1652 NVPTX::INT_PTX_LDG_G_v4i32_ELE_areg32, std::nullopt, 1653 NVPTX::INT_PTX_LDG_G_v4f16_ELE_areg32, 1654 NVPTX::INT_PTX_LDG_G_v4f16x2_ELE_areg32, 1655 NVPTX::INT_PTX_LDG_G_v4f32_ELE_areg32, std::nullopt); 1656 break; 1657 case NVPTXISD::LDUV4: 1658 Opcode = pickOpcodeForVT( 1659 EltVT.getSimpleVT().SimpleTy, NVPTX::INT_PTX_LDU_G_v4i8_ELE_areg32, 1660 NVPTX::INT_PTX_LDU_G_v4i16_ELE_areg32, 1661 NVPTX::INT_PTX_LDU_G_v4i32_ELE_areg32, std::nullopt, 1662 NVPTX::INT_PTX_LDU_G_v4f16_ELE_areg32, 1663 NVPTX::INT_PTX_LDU_G_v4f16x2_ELE_areg32, 1664 NVPTX::INT_PTX_LDU_G_v4f32_ELE_areg32, std::nullopt); 1665 break; 1666 } 1667 } 1668 if (!Opcode) 1669 return false; 1670 SDValue Ops[] = { Op1, Chain }; 1671 LD = CurDAG->getMachineNode(*Opcode, DL, InstVTList, Ops); 1672 } 1673 1674 // For automatic generation of LDG (through SelectLoad[Vector], not the 1675 // intrinsics), we may have an extending load like: 1676 // 1677 // i32,ch = load<LD1[%data1(addrspace=1)], zext from i8> t0, t7, undef:i64 1678 // 1679 // In this case, the matching logic above will select a load for the original 1680 // memory type (in this case, i8) and our types will not match (the node needs 1681 // to return an i32 in this case). Our LDG/LDU nodes do not support the 1682 // concept of sign-/zero-extension, so emulate it here by adding an explicit 1683 // CVT instruction. Ptxas should clean up any redundancies here. 1684 1685 EVT OrigType = N->getValueType(0); 1686 LoadSDNode *LdNode = dyn_cast<LoadSDNode>(N); 1687 1688 if (OrigType != EltVT && LdNode) { 1689 // We have an extending-load. The instruction we selected operates on the 1690 // smaller type, but the SDNode we are replacing has the larger type. We 1691 // need to emit a CVT to make the types match. 1692 bool IsSigned = LdNode->getExtensionType() == ISD::SEXTLOAD; 1693 unsigned CvtOpc = GetConvertOpcode(OrigType.getSimpleVT(), 1694 EltVT.getSimpleVT(), IsSigned); 1695 1696 // For each output value, apply the manual sign/zero-extension and make sure 1697 // all users of the load go through that CVT. 1698 for (unsigned i = 0; i != NumElts; ++i) { 1699 SDValue Res(LD, i); 1700 SDValue OrigVal(N, i); 1701 1702 SDNode *CvtNode = 1703 CurDAG->getMachineNode(CvtOpc, DL, OrigType, Res, 1704 CurDAG->getTargetConstant(NVPTX::PTXCvtMode::NONE, 1705 DL, MVT::i32)); 1706 ReplaceUses(OrigVal, SDValue(CvtNode, 0)); 1707 } 1708 } 1709 1710 ReplaceNode(N, LD); 1711 return true; 1712 } 1713 1714 bool NVPTXDAGToDAGISel::tryStore(SDNode *N) { 1715 SDLoc dl(N); 1716 MemSDNode *ST = cast<MemSDNode>(N); 1717 assert(ST->writeMem() && "Expected store"); 1718 StoreSDNode *PlainStore = dyn_cast<StoreSDNode>(N); 1719 AtomicSDNode *AtomicStore = dyn_cast<AtomicSDNode>(N); 1720 assert((PlainStore || AtomicStore) && "Expected store"); 1721 EVT StoreVT = ST->getMemoryVT(); 1722 SDNode *NVPTXST = nullptr; 1723 1724 // do not support pre/post inc/dec 1725 if (PlainStore && PlainStore->isIndexed()) 1726 return false; 1727 1728 if (!StoreVT.isSimple()) 1729 return false; 1730 1731 AtomicOrdering Ordering = ST->getSuccessOrdering(); 1732 // In order to lower atomic loads with stronger guarantees we would need to 1733 // use store.release or insert fences. However these features were only added 1734 // with PTX ISA 6.0 / sm_70. 1735 // TODO: Check if we can actually use the new instructions and implement them. 1736 if (isStrongerThanMonotonic(Ordering)) 1737 return false; 1738 1739 // Address Space Setting 1740 unsigned int CodeAddrSpace = getCodeAddrSpace(ST); 1741 unsigned int PointerSize = 1742 CurDAG->getDataLayout().getPointerSizeInBits(ST->getAddressSpace()); 1743 1744 // Volatile Setting 1745 // - .volatile is only available for .global and .shared 1746 // - .volatile has the same memory synchronization semantics as .relaxed.sys 1747 bool isVolatile = ST->isVolatile() || Ordering == AtomicOrdering::Monotonic; 1748 if (CodeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL && 1749 CodeAddrSpace != NVPTX::PTXLdStInstCode::SHARED && 1750 CodeAddrSpace != NVPTX::PTXLdStInstCode::GENERIC) 1751 isVolatile = false; 1752 1753 // Vector Setting 1754 MVT SimpleVT = StoreVT.getSimpleVT(); 1755 unsigned vecType = NVPTX::PTXLdStInstCode::Scalar; 1756 1757 // Type Setting: toType + toTypeWidth 1758 // - for integer type, always use 'u' 1759 // 1760 MVT ScalarVT = SimpleVT.getScalarType(); 1761 unsigned toTypeWidth = ScalarVT.getSizeInBits(); 1762 if (SimpleVT.isVector()) { 1763 assert((StoreVT == MVT::v2f16 || StoreVT == MVT::v2bf16) && 1764 "Unexpected vector type"); 1765 // v2f16 is stored using st.b32 1766 toTypeWidth = 32; 1767 } 1768 1769 unsigned int toType = getLdStRegType(ScalarVT); 1770 1771 // Create the machine instruction DAG 1772 SDValue Chain = ST->getChain(); 1773 SDValue Value = PlainStore ? PlainStore->getValue() : AtomicStore->getVal(); 1774 SDValue BasePtr = ST->getBasePtr(); 1775 SDValue Addr; 1776 SDValue Offset, Base; 1777 std::optional<unsigned> Opcode; 1778 MVT::SimpleValueType SourceVT = 1779 Value.getNode()->getSimpleValueType(0).SimpleTy; 1780 1781 if (SelectDirectAddr(BasePtr, Addr)) { 1782 Opcode = pickOpcodeForVT(SourceVT, NVPTX::ST_i8_avar, NVPTX::ST_i16_avar, 1783 NVPTX::ST_i32_avar, NVPTX::ST_i64_avar, 1784 NVPTX::ST_f16_avar, NVPTX::ST_f16x2_avar, 1785 NVPTX::ST_f32_avar, NVPTX::ST_f64_avar); 1786 if (!Opcode) 1787 return false; 1788 SDValue Ops[] = {Value, 1789 getI32Imm(isVolatile, dl), 1790 getI32Imm(CodeAddrSpace, dl), 1791 getI32Imm(vecType, dl), 1792 getI32Imm(toType, dl), 1793 getI32Imm(toTypeWidth, dl), 1794 Addr, 1795 Chain}; 1796 NVPTXST = CurDAG->getMachineNode(*Opcode, dl, MVT::Other, Ops); 1797 } else if (PointerSize == 64 1798 ? SelectADDRsi64(BasePtr.getNode(), BasePtr, Base, Offset) 1799 : SelectADDRsi(BasePtr.getNode(), BasePtr, Base, Offset)) { 1800 Opcode = pickOpcodeForVT(SourceVT, NVPTX::ST_i8_asi, NVPTX::ST_i16_asi, 1801 NVPTX::ST_i32_asi, NVPTX::ST_i64_asi, 1802 NVPTX::ST_f16_asi, NVPTX::ST_f16x2_asi, 1803 NVPTX::ST_f32_asi, NVPTX::ST_f64_asi); 1804 if (!Opcode) 1805 return false; 1806 SDValue Ops[] = {Value, 1807 getI32Imm(isVolatile, dl), 1808 getI32Imm(CodeAddrSpace, dl), 1809 getI32Imm(vecType, dl), 1810 getI32Imm(toType, dl), 1811 getI32Imm(toTypeWidth, dl), 1812 Base, 1813 Offset, 1814 Chain}; 1815 NVPTXST = CurDAG->getMachineNode(*Opcode, dl, MVT::Other, Ops); 1816 } else if (PointerSize == 64 1817 ? SelectADDRri64(BasePtr.getNode(), BasePtr, Base, Offset) 1818 : SelectADDRri(BasePtr.getNode(), BasePtr, Base, Offset)) { 1819 if (PointerSize == 64) 1820 Opcode = pickOpcodeForVT( 1821 SourceVT, NVPTX::ST_i8_ari_64, NVPTX::ST_i16_ari_64, 1822 NVPTX::ST_i32_ari_64, NVPTX::ST_i64_ari_64, NVPTX::ST_f16_ari_64, 1823 NVPTX::ST_f16x2_ari_64, NVPTX::ST_f32_ari_64, NVPTX::ST_f64_ari_64); 1824 else 1825 Opcode = pickOpcodeForVT(SourceVT, NVPTX::ST_i8_ari, NVPTX::ST_i16_ari, 1826 NVPTX::ST_i32_ari, NVPTX::ST_i64_ari, 1827 NVPTX::ST_f16_ari, NVPTX::ST_f16x2_ari, 1828 NVPTX::ST_f32_ari, NVPTX::ST_f64_ari); 1829 if (!Opcode) 1830 return false; 1831 1832 SDValue Ops[] = {Value, 1833 getI32Imm(isVolatile, dl), 1834 getI32Imm(CodeAddrSpace, dl), 1835 getI32Imm(vecType, dl), 1836 getI32Imm(toType, dl), 1837 getI32Imm(toTypeWidth, dl), 1838 Base, 1839 Offset, 1840 Chain}; 1841 NVPTXST = CurDAG->getMachineNode(*Opcode, dl, MVT::Other, Ops); 1842 } else { 1843 if (PointerSize == 64) 1844 Opcode = 1845 pickOpcodeForVT(SourceVT, NVPTX::ST_i8_areg_64, NVPTX::ST_i16_areg_64, 1846 NVPTX::ST_i32_areg_64, NVPTX::ST_i64_areg_64, 1847 NVPTX::ST_f16_areg_64, NVPTX::ST_f16x2_areg_64, 1848 NVPTX::ST_f32_areg_64, NVPTX::ST_f64_areg_64); 1849 else 1850 Opcode = pickOpcodeForVT(SourceVT, NVPTX::ST_i8_areg, NVPTX::ST_i16_areg, 1851 NVPTX::ST_i32_areg, NVPTX::ST_i64_areg, 1852 NVPTX::ST_f16_areg, NVPTX::ST_f16x2_areg, 1853 NVPTX::ST_f32_areg, NVPTX::ST_f64_areg); 1854 if (!Opcode) 1855 return false; 1856 SDValue Ops[] = {Value, 1857 getI32Imm(isVolatile, dl), 1858 getI32Imm(CodeAddrSpace, dl), 1859 getI32Imm(vecType, dl), 1860 getI32Imm(toType, dl), 1861 getI32Imm(toTypeWidth, dl), 1862 BasePtr, 1863 Chain}; 1864 NVPTXST = CurDAG->getMachineNode(*Opcode, dl, MVT::Other, Ops); 1865 } 1866 1867 if (!NVPTXST) 1868 return false; 1869 1870 MachineMemOperand *MemRef = cast<MemSDNode>(N)->getMemOperand(); 1871 CurDAG->setNodeMemRefs(cast<MachineSDNode>(NVPTXST), {MemRef}); 1872 ReplaceNode(N, NVPTXST); 1873 return true; 1874 } 1875 1876 bool NVPTXDAGToDAGISel::tryStoreVector(SDNode *N) { 1877 SDValue Chain = N->getOperand(0); 1878 SDValue Op1 = N->getOperand(1); 1879 SDValue Addr, Offset, Base; 1880 std::optional<unsigned> Opcode; 1881 SDLoc DL(N); 1882 SDNode *ST; 1883 EVT EltVT = Op1.getValueType(); 1884 MemSDNode *MemSD = cast<MemSDNode>(N); 1885 EVT StoreVT = MemSD->getMemoryVT(); 1886 1887 // Address Space Setting 1888 unsigned CodeAddrSpace = getCodeAddrSpace(MemSD); 1889 if (CodeAddrSpace == NVPTX::PTXLdStInstCode::CONSTANT) { 1890 report_fatal_error("Cannot store to pointer that points to constant " 1891 "memory space"); 1892 } 1893 unsigned int PointerSize = 1894 CurDAG->getDataLayout().getPointerSizeInBits(MemSD->getAddressSpace()); 1895 1896 // Volatile Setting 1897 // - .volatile is only availalble for .global and .shared 1898 bool IsVolatile = MemSD->isVolatile(); 1899 if (CodeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL && 1900 CodeAddrSpace != NVPTX::PTXLdStInstCode::SHARED && 1901 CodeAddrSpace != NVPTX::PTXLdStInstCode::GENERIC) 1902 IsVolatile = false; 1903 1904 // Type Setting: toType + toTypeWidth 1905 // - for integer type, always use 'u' 1906 assert(StoreVT.isSimple() && "Store value is not simple"); 1907 MVT ScalarVT = StoreVT.getSimpleVT().getScalarType(); 1908 unsigned ToTypeWidth = ScalarVT.getSizeInBits(); 1909 unsigned ToType = getLdStRegType(ScalarVT); 1910 1911 SmallVector<SDValue, 12> StOps; 1912 SDValue N2; 1913 unsigned VecType; 1914 1915 switch (N->getOpcode()) { 1916 case NVPTXISD::StoreV2: 1917 VecType = NVPTX::PTXLdStInstCode::V2; 1918 StOps.push_back(N->getOperand(1)); 1919 StOps.push_back(N->getOperand(2)); 1920 N2 = N->getOperand(3); 1921 break; 1922 case NVPTXISD::StoreV4: 1923 VecType = NVPTX::PTXLdStInstCode::V4; 1924 StOps.push_back(N->getOperand(1)); 1925 StOps.push_back(N->getOperand(2)); 1926 StOps.push_back(N->getOperand(3)); 1927 StOps.push_back(N->getOperand(4)); 1928 N2 = N->getOperand(5); 1929 break; 1930 default: 1931 return false; 1932 } 1933 1934 // v8f16 is a special case. PTX doesn't have st.v8.f16 1935 // instruction. Instead, we split the vector into v2f16 chunks and 1936 // store them with st.v4.b32. 1937 if (EltVT == MVT::v2f16 || EltVT == MVT::v2bf16) { 1938 assert(N->getOpcode() == NVPTXISD::StoreV4 && "Unexpected load opcode."); 1939 EltVT = MVT::i32; 1940 ToType = NVPTX::PTXLdStInstCode::Untyped; 1941 ToTypeWidth = 32; 1942 } 1943 1944 StOps.push_back(getI32Imm(IsVolatile, DL)); 1945 StOps.push_back(getI32Imm(CodeAddrSpace, DL)); 1946 StOps.push_back(getI32Imm(VecType, DL)); 1947 StOps.push_back(getI32Imm(ToType, DL)); 1948 StOps.push_back(getI32Imm(ToTypeWidth, DL)); 1949 1950 if (SelectDirectAddr(N2, Addr)) { 1951 switch (N->getOpcode()) { 1952 default: 1953 return false; 1954 case NVPTXISD::StoreV2: 1955 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, 1956 NVPTX::STV_i8_v2_avar, NVPTX::STV_i16_v2_avar, 1957 NVPTX::STV_i32_v2_avar, NVPTX::STV_i64_v2_avar, 1958 NVPTX::STV_f16_v2_avar, NVPTX::STV_f16x2_v2_avar, 1959 NVPTX::STV_f32_v2_avar, NVPTX::STV_f64_v2_avar); 1960 break; 1961 case NVPTXISD::StoreV4: 1962 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, 1963 NVPTX::STV_i8_v4_avar, NVPTX::STV_i16_v4_avar, 1964 NVPTX::STV_i32_v4_avar, std::nullopt, 1965 NVPTX::STV_f16_v4_avar, NVPTX::STV_f16x2_v4_avar, 1966 NVPTX::STV_f32_v4_avar, std::nullopt); 1967 break; 1968 } 1969 StOps.push_back(Addr); 1970 } else if (PointerSize == 64 ? SelectADDRsi64(N2.getNode(), N2, Base, Offset) 1971 : SelectADDRsi(N2.getNode(), N2, Base, Offset)) { 1972 switch (N->getOpcode()) { 1973 default: 1974 return false; 1975 case NVPTXISD::StoreV2: 1976 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, 1977 NVPTX::STV_i8_v2_asi, NVPTX::STV_i16_v2_asi, 1978 NVPTX::STV_i32_v2_asi, NVPTX::STV_i64_v2_asi, 1979 NVPTX::STV_f16_v2_asi, NVPTX::STV_f16x2_v2_asi, 1980 NVPTX::STV_f32_v2_asi, NVPTX::STV_f64_v2_asi); 1981 break; 1982 case NVPTXISD::StoreV4: 1983 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, 1984 NVPTX::STV_i8_v4_asi, NVPTX::STV_i16_v4_asi, 1985 NVPTX::STV_i32_v4_asi, std::nullopt, 1986 NVPTX::STV_f16_v4_asi, NVPTX::STV_f16x2_v4_asi, 1987 NVPTX::STV_f32_v4_asi, std::nullopt); 1988 break; 1989 } 1990 StOps.push_back(Base); 1991 StOps.push_back(Offset); 1992 } else if (PointerSize == 64 ? SelectADDRri64(N2.getNode(), N2, Base, Offset) 1993 : SelectADDRri(N2.getNode(), N2, Base, Offset)) { 1994 if (PointerSize == 64) { 1995 switch (N->getOpcode()) { 1996 default: 1997 return false; 1998 case NVPTXISD::StoreV2: 1999 Opcode = pickOpcodeForVT( 2000 EltVT.getSimpleVT().SimpleTy, NVPTX::STV_i8_v2_ari_64, 2001 NVPTX::STV_i16_v2_ari_64, NVPTX::STV_i32_v2_ari_64, 2002 NVPTX::STV_i64_v2_ari_64, NVPTX::STV_f16_v2_ari_64, 2003 NVPTX::STV_f16x2_v2_ari_64, NVPTX::STV_f32_v2_ari_64, 2004 NVPTX::STV_f64_v2_ari_64); 2005 break; 2006 case NVPTXISD::StoreV4: 2007 Opcode = pickOpcodeForVT( 2008 EltVT.getSimpleVT().SimpleTy, NVPTX::STV_i8_v4_ari_64, 2009 NVPTX::STV_i16_v4_ari_64, NVPTX::STV_i32_v4_ari_64, std::nullopt, 2010 NVPTX::STV_f16_v4_ari_64, NVPTX::STV_f16x2_v4_ari_64, 2011 NVPTX::STV_f32_v4_ari_64, std::nullopt); 2012 break; 2013 } 2014 } else { 2015 switch (N->getOpcode()) { 2016 default: 2017 return false; 2018 case NVPTXISD::StoreV2: 2019 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, 2020 NVPTX::STV_i8_v2_ari, NVPTX::STV_i16_v2_ari, 2021 NVPTX::STV_i32_v2_ari, NVPTX::STV_i64_v2_ari, 2022 NVPTX::STV_f16_v2_ari, NVPTX::STV_f16x2_v2_ari, 2023 NVPTX::STV_f32_v2_ari, NVPTX::STV_f64_v2_ari); 2024 break; 2025 case NVPTXISD::StoreV4: 2026 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, 2027 NVPTX::STV_i8_v4_ari, NVPTX::STV_i16_v4_ari, 2028 NVPTX::STV_i32_v4_ari, std::nullopt, 2029 NVPTX::STV_f16_v4_ari, NVPTX::STV_f16x2_v4_ari, 2030 NVPTX::STV_f32_v4_ari, std::nullopt); 2031 break; 2032 } 2033 } 2034 StOps.push_back(Base); 2035 StOps.push_back(Offset); 2036 } else { 2037 if (PointerSize == 64) { 2038 switch (N->getOpcode()) { 2039 default: 2040 return false; 2041 case NVPTXISD::StoreV2: 2042 Opcode = pickOpcodeForVT( 2043 EltVT.getSimpleVT().SimpleTy, NVPTX::STV_i8_v2_areg_64, 2044 NVPTX::STV_i16_v2_areg_64, NVPTX::STV_i32_v2_areg_64, 2045 NVPTX::STV_i64_v2_areg_64, NVPTX::STV_f16_v2_areg_64, 2046 NVPTX::STV_f16x2_v2_areg_64, NVPTX::STV_f32_v2_areg_64, 2047 NVPTX::STV_f64_v2_areg_64); 2048 break; 2049 case NVPTXISD::StoreV4: 2050 Opcode = pickOpcodeForVT( 2051 EltVT.getSimpleVT().SimpleTy, NVPTX::STV_i8_v4_areg_64, 2052 NVPTX::STV_i16_v4_areg_64, NVPTX::STV_i32_v4_areg_64, std::nullopt, 2053 NVPTX::STV_f16_v4_areg_64, NVPTX::STV_f16x2_v4_areg_64, 2054 NVPTX::STV_f32_v4_areg_64, std::nullopt); 2055 break; 2056 } 2057 } else { 2058 switch (N->getOpcode()) { 2059 default: 2060 return false; 2061 case NVPTXISD::StoreV2: 2062 Opcode = 2063 pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, NVPTX::STV_i8_v2_areg, 2064 NVPTX::STV_i16_v2_areg, NVPTX::STV_i32_v2_areg, 2065 NVPTX::STV_i64_v2_areg, NVPTX::STV_f16_v2_areg, 2066 NVPTX::STV_f16x2_v2_areg, NVPTX::STV_f32_v2_areg, 2067 NVPTX::STV_f64_v2_areg); 2068 break; 2069 case NVPTXISD::StoreV4: 2070 Opcode = pickOpcodeForVT( 2071 EltVT.getSimpleVT().SimpleTy, NVPTX::STV_i8_v4_areg, 2072 NVPTX::STV_i16_v4_areg, NVPTX::STV_i32_v4_areg, std::nullopt, 2073 NVPTX::STV_f16_v4_areg, NVPTX::STV_f16x2_v4_areg, 2074 NVPTX::STV_f32_v4_areg, std::nullopt); 2075 break; 2076 } 2077 } 2078 StOps.push_back(N2); 2079 } 2080 2081 if (!Opcode) 2082 return false; 2083 2084 StOps.push_back(Chain); 2085 2086 ST = CurDAG->getMachineNode(*Opcode, DL, MVT::Other, StOps); 2087 2088 MachineMemOperand *MemRef = cast<MemSDNode>(N)->getMemOperand(); 2089 CurDAG->setNodeMemRefs(cast<MachineSDNode>(ST), {MemRef}); 2090 2091 ReplaceNode(N, ST); 2092 return true; 2093 } 2094 2095 bool NVPTXDAGToDAGISel::tryLoadParam(SDNode *Node) { 2096 SDValue Chain = Node->getOperand(0); 2097 SDValue Offset = Node->getOperand(2); 2098 SDValue Flag = Node->getOperand(3); 2099 SDLoc DL(Node); 2100 MemSDNode *Mem = cast<MemSDNode>(Node); 2101 2102 unsigned VecSize; 2103 switch (Node->getOpcode()) { 2104 default: 2105 return false; 2106 case NVPTXISD::LoadParam: 2107 VecSize = 1; 2108 break; 2109 case NVPTXISD::LoadParamV2: 2110 VecSize = 2; 2111 break; 2112 case NVPTXISD::LoadParamV4: 2113 VecSize = 4; 2114 break; 2115 } 2116 2117 EVT EltVT = Node->getValueType(0); 2118 EVT MemVT = Mem->getMemoryVT(); 2119 2120 std::optional<unsigned> Opcode; 2121 2122 switch (VecSize) { 2123 default: 2124 return false; 2125 case 1: 2126 Opcode = pickOpcodeForVT(MemVT.getSimpleVT().SimpleTy, 2127 NVPTX::LoadParamMemI8, NVPTX::LoadParamMemI16, 2128 NVPTX::LoadParamMemI32, NVPTX::LoadParamMemI64, 2129 NVPTX::LoadParamMemF16, NVPTX::LoadParamMemF16x2, 2130 NVPTX::LoadParamMemF32, NVPTX::LoadParamMemF64); 2131 break; 2132 case 2: 2133 Opcode = 2134 pickOpcodeForVT(MemVT.getSimpleVT().SimpleTy, NVPTX::LoadParamMemV2I8, 2135 NVPTX::LoadParamMemV2I16, NVPTX::LoadParamMemV2I32, 2136 NVPTX::LoadParamMemV2I64, NVPTX::LoadParamMemV2F16, 2137 NVPTX::LoadParamMemV2F16x2, NVPTX::LoadParamMemV2F32, 2138 NVPTX::LoadParamMemV2F64); 2139 break; 2140 case 4: 2141 Opcode = pickOpcodeForVT( 2142 MemVT.getSimpleVT().SimpleTy, NVPTX::LoadParamMemV4I8, 2143 NVPTX::LoadParamMemV4I16, NVPTX::LoadParamMemV4I32, std::nullopt, 2144 NVPTX::LoadParamMemV4F16, NVPTX::LoadParamMemV4F16x2, 2145 NVPTX::LoadParamMemV4F32, std::nullopt); 2146 break; 2147 } 2148 if (!Opcode) 2149 return false; 2150 2151 SDVTList VTs; 2152 if (VecSize == 1) { 2153 VTs = CurDAG->getVTList(EltVT, MVT::Other, MVT::Glue); 2154 } else if (VecSize == 2) { 2155 VTs = CurDAG->getVTList(EltVT, EltVT, MVT::Other, MVT::Glue); 2156 } else { 2157 EVT EVTs[] = { EltVT, EltVT, EltVT, EltVT, MVT::Other, MVT::Glue }; 2158 VTs = CurDAG->getVTList(EVTs); 2159 } 2160 2161 unsigned OffsetVal = cast<ConstantSDNode>(Offset)->getZExtValue(); 2162 2163 SmallVector<SDValue, 2> Ops; 2164 Ops.push_back(CurDAG->getTargetConstant(OffsetVal, DL, MVT::i32)); 2165 Ops.push_back(Chain); 2166 Ops.push_back(Flag); 2167 2168 ReplaceNode(Node, CurDAG->getMachineNode(*Opcode, DL, VTs, Ops)); 2169 return true; 2170 } 2171 2172 bool NVPTXDAGToDAGISel::tryStoreRetval(SDNode *N) { 2173 SDLoc DL(N); 2174 SDValue Chain = N->getOperand(0); 2175 SDValue Offset = N->getOperand(1); 2176 unsigned OffsetVal = cast<ConstantSDNode>(Offset)->getZExtValue(); 2177 MemSDNode *Mem = cast<MemSDNode>(N); 2178 2179 // How many elements do we have? 2180 unsigned NumElts = 1; 2181 switch (N->getOpcode()) { 2182 default: 2183 return false; 2184 case NVPTXISD::StoreRetval: 2185 NumElts = 1; 2186 break; 2187 case NVPTXISD::StoreRetvalV2: 2188 NumElts = 2; 2189 break; 2190 case NVPTXISD::StoreRetvalV4: 2191 NumElts = 4; 2192 break; 2193 } 2194 2195 // Build vector of operands 2196 SmallVector<SDValue, 6> Ops; 2197 for (unsigned i = 0; i < NumElts; ++i) 2198 Ops.push_back(N->getOperand(i + 2)); 2199 Ops.push_back(CurDAG->getTargetConstant(OffsetVal, DL, MVT::i32)); 2200 Ops.push_back(Chain); 2201 2202 // Determine target opcode 2203 // If we have an i1, use an 8-bit store. The lowering code in 2204 // NVPTXISelLowering will have already emitted an upcast. 2205 std::optional<unsigned> Opcode = 0; 2206 switch (NumElts) { 2207 default: 2208 return false; 2209 case 1: 2210 Opcode = pickOpcodeForVT(Mem->getMemoryVT().getSimpleVT().SimpleTy, 2211 NVPTX::StoreRetvalI8, NVPTX::StoreRetvalI16, 2212 NVPTX::StoreRetvalI32, NVPTX::StoreRetvalI64, 2213 NVPTX::StoreRetvalF16, NVPTX::StoreRetvalF16x2, 2214 NVPTX::StoreRetvalF32, NVPTX::StoreRetvalF64); 2215 break; 2216 case 2: 2217 Opcode = pickOpcodeForVT(Mem->getMemoryVT().getSimpleVT().SimpleTy, 2218 NVPTX::StoreRetvalV2I8, NVPTX::StoreRetvalV2I16, 2219 NVPTX::StoreRetvalV2I32, NVPTX::StoreRetvalV2I64, 2220 NVPTX::StoreRetvalV2F16, NVPTX::StoreRetvalV2F16x2, 2221 NVPTX::StoreRetvalV2F32, NVPTX::StoreRetvalV2F64); 2222 break; 2223 case 4: 2224 Opcode = pickOpcodeForVT(Mem->getMemoryVT().getSimpleVT().SimpleTy, 2225 NVPTX::StoreRetvalV4I8, NVPTX::StoreRetvalV4I16, 2226 NVPTX::StoreRetvalV4I32, std::nullopt, 2227 NVPTX::StoreRetvalV4F16, NVPTX::StoreRetvalV4F16x2, 2228 NVPTX::StoreRetvalV4F32, std::nullopt); 2229 break; 2230 } 2231 if (!Opcode) 2232 return false; 2233 2234 SDNode *Ret = CurDAG->getMachineNode(*Opcode, DL, MVT::Other, Ops); 2235 MachineMemOperand *MemRef = cast<MemSDNode>(N)->getMemOperand(); 2236 CurDAG->setNodeMemRefs(cast<MachineSDNode>(Ret), {MemRef}); 2237 2238 ReplaceNode(N, Ret); 2239 return true; 2240 } 2241 2242 bool NVPTXDAGToDAGISel::tryStoreParam(SDNode *N) { 2243 SDLoc DL(N); 2244 SDValue Chain = N->getOperand(0); 2245 SDValue Param = N->getOperand(1); 2246 unsigned ParamVal = cast<ConstantSDNode>(Param)->getZExtValue(); 2247 SDValue Offset = N->getOperand(2); 2248 unsigned OffsetVal = cast<ConstantSDNode>(Offset)->getZExtValue(); 2249 MemSDNode *Mem = cast<MemSDNode>(N); 2250 SDValue Flag = N->getOperand(N->getNumOperands() - 1); 2251 2252 // How many elements do we have? 2253 unsigned NumElts = 1; 2254 switch (N->getOpcode()) { 2255 default: 2256 return false; 2257 case NVPTXISD::StoreParamU32: 2258 case NVPTXISD::StoreParamS32: 2259 case NVPTXISD::StoreParam: 2260 NumElts = 1; 2261 break; 2262 case NVPTXISD::StoreParamV2: 2263 NumElts = 2; 2264 break; 2265 case NVPTXISD::StoreParamV4: 2266 NumElts = 4; 2267 break; 2268 } 2269 2270 // Build vector of operands 2271 SmallVector<SDValue, 8> Ops; 2272 for (unsigned i = 0; i < NumElts; ++i) 2273 Ops.push_back(N->getOperand(i + 3)); 2274 Ops.push_back(CurDAG->getTargetConstant(ParamVal, DL, MVT::i32)); 2275 Ops.push_back(CurDAG->getTargetConstant(OffsetVal, DL, MVT::i32)); 2276 Ops.push_back(Chain); 2277 Ops.push_back(Flag); 2278 2279 // Determine target opcode 2280 // If we have an i1, use an 8-bit store. The lowering code in 2281 // NVPTXISelLowering will have already emitted an upcast. 2282 std::optional<unsigned> Opcode = 0; 2283 switch (N->getOpcode()) { 2284 default: 2285 switch (NumElts) { 2286 default: 2287 return false; 2288 case 1: 2289 Opcode = pickOpcodeForVT(Mem->getMemoryVT().getSimpleVT().SimpleTy, 2290 NVPTX::StoreParamI8, NVPTX::StoreParamI16, 2291 NVPTX::StoreParamI32, NVPTX::StoreParamI64, 2292 NVPTX::StoreParamF16, NVPTX::StoreParamF16x2, 2293 NVPTX::StoreParamF32, NVPTX::StoreParamF64); 2294 break; 2295 case 2: 2296 Opcode = pickOpcodeForVT(Mem->getMemoryVT().getSimpleVT().SimpleTy, 2297 NVPTX::StoreParamV2I8, NVPTX::StoreParamV2I16, 2298 NVPTX::StoreParamV2I32, NVPTX::StoreParamV2I64, 2299 NVPTX::StoreParamV2F16, NVPTX::StoreParamV2F16x2, 2300 NVPTX::StoreParamV2F32, NVPTX::StoreParamV2F64); 2301 break; 2302 case 4: 2303 Opcode = pickOpcodeForVT(Mem->getMemoryVT().getSimpleVT().SimpleTy, 2304 NVPTX::StoreParamV4I8, NVPTX::StoreParamV4I16, 2305 NVPTX::StoreParamV4I32, std::nullopt, 2306 NVPTX::StoreParamV4F16, NVPTX::StoreParamV4F16x2, 2307 NVPTX::StoreParamV4F32, std::nullopt); 2308 break; 2309 } 2310 if (!Opcode) 2311 return false; 2312 break; 2313 // Special case: if we have a sign-extend/zero-extend node, insert the 2314 // conversion instruction first, and use that as the value operand to 2315 // the selected StoreParam node. 2316 case NVPTXISD::StoreParamU32: { 2317 Opcode = NVPTX::StoreParamI32; 2318 SDValue CvtNone = CurDAG->getTargetConstant(NVPTX::PTXCvtMode::NONE, DL, 2319 MVT::i32); 2320 SDNode *Cvt = CurDAG->getMachineNode(NVPTX::CVT_u32_u16, DL, 2321 MVT::i32, Ops[0], CvtNone); 2322 Ops[0] = SDValue(Cvt, 0); 2323 break; 2324 } 2325 case NVPTXISD::StoreParamS32: { 2326 Opcode = NVPTX::StoreParamI32; 2327 SDValue CvtNone = CurDAG->getTargetConstant(NVPTX::PTXCvtMode::NONE, DL, 2328 MVT::i32); 2329 SDNode *Cvt = CurDAG->getMachineNode(NVPTX::CVT_s32_s16, DL, 2330 MVT::i32, Ops[0], CvtNone); 2331 Ops[0] = SDValue(Cvt, 0); 2332 break; 2333 } 2334 } 2335 2336 SDVTList RetVTs = CurDAG->getVTList(MVT::Other, MVT::Glue); 2337 SDNode *Ret = CurDAG->getMachineNode(*Opcode, DL, RetVTs, Ops); 2338 MachineMemOperand *MemRef = cast<MemSDNode>(N)->getMemOperand(); 2339 CurDAG->setNodeMemRefs(cast<MachineSDNode>(Ret), {MemRef}); 2340 2341 ReplaceNode(N, Ret); 2342 return true; 2343 } 2344 2345 bool NVPTXDAGToDAGISel::tryTextureIntrinsic(SDNode *N) { 2346 unsigned Opc = 0; 2347 2348 switch (N->getOpcode()) { 2349 default: return false; 2350 case NVPTXISD::Tex1DFloatS32: 2351 Opc = NVPTX::TEX_1D_F32_S32_RR; 2352 break; 2353 case NVPTXISD::Tex1DFloatFloat: 2354 Opc = NVPTX::TEX_1D_F32_F32_RR; 2355 break; 2356 case NVPTXISD::Tex1DFloatFloatLevel: 2357 Opc = NVPTX::TEX_1D_F32_F32_LEVEL_RR; 2358 break; 2359 case NVPTXISD::Tex1DFloatFloatGrad: 2360 Opc = NVPTX::TEX_1D_F32_F32_GRAD_RR; 2361 break; 2362 case NVPTXISD::Tex1DS32S32: 2363 Opc = NVPTX::TEX_1D_S32_S32_RR; 2364 break; 2365 case NVPTXISD::Tex1DS32Float: 2366 Opc = NVPTX::TEX_1D_S32_F32_RR; 2367 break; 2368 case NVPTXISD::Tex1DS32FloatLevel: 2369 Opc = NVPTX::TEX_1D_S32_F32_LEVEL_RR; 2370 break; 2371 case NVPTXISD::Tex1DS32FloatGrad: 2372 Opc = NVPTX::TEX_1D_S32_F32_GRAD_RR; 2373 break; 2374 case NVPTXISD::Tex1DU32S32: 2375 Opc = NVPTX::TEX_1D_U32_S32_RR; 2376 break; 2377 case NVPTXISD::Tex1DU32Float: 2378 Opc = NVPTX::TEX_1D_U32_F32_RR; 2379 break; 2380 case NVPTXISD::Tex1DU32FloatLevel: 2381 Opc = NVPTX::TEX_1D_U32_F32_LEVEL_RR; 2382 break; 2383 case NVPTXISD::Tex1DU32FloatGrad: 2384 Opc = NVPTX::TEX_1D_U32_F32_GRAD_RR; 2385 break; 2386 case NVPTXISD::Tex1DArrayFloatS32: 2387 Opc = NVPTX::TEX_1D_ARRAY_F32_S32_RR; 2388 break; 2389 case NVPTXISD::Tex1DArrayFloatFloat: 2390 Opc = NVPTX::TEX_1D_ARRAY_F32_F32_RR; 2391 break; 2392 case NVPTXISD::Tex1DArrayFloatFloatLevel: 2393 Opc = NVPTX::TEX_1D_ARRAY_F32_F32_LEVEL_RR; 2394 break; 2395 case NVPTXISD::Tex1DArrayFloatFloatGrad: 2396 Opc = NVPTX::TEX_1D_ARRAY_F32_F32_GRAD_RR; 2397 break; 2398 case NVPTXISD::Tex1DArrayS32S32: 2399 Opc = NVPTX::TEX_1D_ARRAY_S32_S32_RR; 2400 break; 2401 case NVPTXISD::Tex1DArrayS32Float: 2402 Opc = NVPTX::TEX_1D_ARRAY_S32_F32_RR; 2403 break; 2404 case NVPTXISD::Tex1DArrayS32FloatLevel: 2405 Opc = NVPTX::TEX_1D_ARRAY_S32_F32_LEVEL_RR; 2406 break; 2407 case NVPTXISD::Tex1DArrayS32FloatGrad: 2408 Opc = NVPTX::TEX_1D_ARRAY_S32_F32_GRAD_RR; 2409 break; 2410 case NVPTXISD::Tex1DArrayU32S32: 2411 Opc = NVPTX::TEX_1D_ARRAY_U32_S32_RR; 2412 break; 2413 case NVPTXISD::Tex1DArrayU32Float: 2414 Opc = NVPTX::TEX_1D_ARRAY_U32_F32_RR; 2415 break; 2416 case NVPTXISD::Tex1DArrayU32FloatLevel: 2417 Opc = NVPTX::TEX_1D_ARRAY_U32_F32_LEVEL_RR; 2418 break; 2419 case NVPTXISD::Tex1DArrayU32FloatGrad: 2420 Opc = NVPTX::TEX_1D_ARRAY_U32_F32_GRAD_RR; 2421 break; 2422 case NVPTXISD::Tex2DFloatS32: 2423 Opc = NVPTX::TEX_2D_F32_S32_RR; 2424 break; 2425 case NVPTXISD::Tex2DFloatFloat: 2426 Opc = NVPTX::TEX_2D_F32_F32_RR; 2427 break; 2428 case NVPTXISD::Tex2DFloatFloatLevel: 2429 Opc = NVPTX::TEX_2D_F32_F32_LEVEL_RR; 2430 break; 2431 case NVPTXISD::Tex2DFloatFloatGrad: 2432 Opc = NVPTX::TEX_2D_F32_F32_GRAD_RR; 2433 break; 2434 case NVPTXISD::Tex2DS32S32: 2435 Opc = NVPTX::TEX_2D_S32_S32_RR; 2436 break; 2437 case NVPTXISD::Tex2DS32Float: 2438 Opc = NVPTX::TEX_2D_S32_F32_RR; 2439 break; 2440 case NVPTXISD::Tex2DS32FloatLevel: 2441 Opc = NVPTX::TEX_2D_S32_F32_LEVEL_RR; 2442 break; 2443 case NVPTXISD::Tex2DS32FloatGrad: 2444 Opc = NVPTX::TEX_2D_S32_F32_GRAD_RR; 2445 break; 2446 case NVPTXISD::Tex2DU32S32: 2447 Opc = NVPTX::TEX_2D_U32_S32_RR; 2448 break; 2449 case NVPTXISD::Tex2DU32Float: 2450 Opc = NVPTX::TEX_2D_U32_F32_RR; 2451 break; 2452 case NVPTXISD::Tex2DU32FloatLevel: 2453 Opc = NVPTX::TEX_2D_U32_F32_LEVEL_RR; 2454 break; 2455 case NVPTXISD::Tex2DU32FloatGrad: 2456 Opc = NVPTX::TEX_2D_U32_F32_GRAD_RR; 2457 break; 2458 case NVPTXISD::Tex2DArrayFloatS32: 2459 Opc = NVPTX::TEX_2D_ARRAY_F32_S32_RR; 2460 break; 2461 case NVPTXISD::Tex2DArrayFloatFloat: 2462 Opc = NVPTX::TEX_2D_ARRAY_F32_F32_RR; 2463 break; 2464 case NVPTXISD::Tex2DArrayFloatFloatLevel: 2465 Opc = NVPTX::TEX_2D_ARRAY_F32_F32_LEVEL_RR; 2466 break; 2467 case NVPTXISD::Tex2DArrayFloatFloatGrad: 2468 Opc = NVPTX::TEX_2D_ARRAY_F32_F32_GRAD_RR; 2469 break; 2470 case NVPTXISD::Tex2DArrayS32S32: 2471 Opc = NVPTX::TEX_2D_ARRAY_S32_S32_RR; 2472 break; 2473 case NVPTXISD::Tex2DArrayS32Float: 2474 Opc = NVPTX::TEX_2D_ARRAY_S32_F32_RR; 2475 break; 2476 case NVPTXISD::Tex2DArrayS32FloatLevel: 2477 Opc = NVPTX::TEX_2D_ARRAY_S32_F32_LEVEL_RR; 2478 break; 2479 case NVPTXISD::Tex2DArrayS32FloatGrad: 2480 Opc = NVPTX::TEX_2D_ARRAY_S32_F32_GRAD_RR; 2481 break; 2482 case NVPTXISD::Tex2DArrayU32S32: 2483 Opc = NVPTX::TEX_2D_ARRAY_U32_S32_RR; 2484 break; 2485 case NVPTXISD::Tex2DArrayU32Float: 2486 Opc = NVPTX::TEX_2D_ARRAY_U32_F32_RR; 2487 break; 2488 case NVPTXISD::Tex2DArrayU32FloatLevel: 2489 Opc = NVPTX::TEX_2D_ARRAY_U32_F32_LEVEL_RR; 2490 break; 2491 case NVPTXISD::Tex2DArrayU32FloatGrad: 2492 Opc = NVPTX::TEX_2D_ARRAY_U32_F32_GRAD_RR; 2493 break; 2494 case NVPTXISD::Tex3DFloatS32: 2495 Opc = NVPTX::TEX_3D_F32_S32_RR; 2496 break; 2497 case NVPTXISD::Tex3DFloatFloat: 2498 Opc = NVPTX::TEX_3D_F32_F32_RR; 2499 break; 2500 case NVPTXISD::Tex3DFloatFloatLevel: 2501 Opc = NVPTX::TEX_3D_F32_F32_LEVEL_RR; 2502 break; 2503 case NVPTXISD::Tex3DFloatFloatGrad: 2504 Opc = NVPTX::TEX_3D_F32_F32_GRAD_RR; 2505 break; 2506 case NVPTXISD::Tex3DS32S32: 2507 Opc = NVPTX::TEX_3D_S32_S32_RR; 2508 break; 2509 case NVPTXISD::Tex3DS32Float: 2510 Opc = NVPTX::TEX_3D_S32_F32_RR; 2511 break; 2512 case NVPTXISD::Tex3DS32FloatLevel: 2513 Opc = NVPTX::TEX_3D_S32_F32_LEVEL_RR; 2514 break; 2515 case NVPTXISD::Tex3DS32FloatGrad: 2516 Opc = NVPTX::TEX_3D_S32_F32_GRAD_RR; 2517 break; 2518 case NVPTXISD::Tex3DU32S32: 2519 Opc = NVPTX::TEX_3D_U32_S32_RR; 2520 break; 2521 case NVPTXISD::Tex3DU32Float: 2522 Opc = NVPTX::TEX_3D_U32_F32_RR; 2523 break; 2524 case NVPTXISD::Tex3DU32FloatLevel: 2525 Opc = NVPTX::TEX_3D_U32_F32_LEVEL_RR; 2526 break; 2527 case NVPTXISD::Tex3DU32FloatGrad: 2528 Opc = NVPTX::TEX_3D_U32_F32_GRAD_RR; 2529 break; 2530 case NVPTXISD::TexCubeFloatFloat: 2531 Opc = NVPTX::TEX_CUBE_F32_F32_RR; 2532 break; 2533 case NVPTXISD::TexCubeFloatFloatLevel: 2534 Opc = NVPTX::TEX_CUBE_F32_F32_LEVEL_RR; 2535 break; 2536 case NVPTXISD::TexCubeS32Float: 2537 Opc = NVPTX::TEX_CUBE_S32_F32_RR; 2538 break; 2539 case NVPTXISD::TexCubeS32FloatLevel: 2540 Opc = NVPTX::TEX_CUBE_S32_F32_LEVEL_RR; 2541 break; 2542 case NVPTXISD::TexCubeU32Float: 2543 Opc = NVPTX::TEX_CUBE_U32_F32_RR; 2544 break; 2545 case NVPTXISD::TexCubeU32FloatLevel: 2546 Opc = NVPTX::TEX_CUBE_U32_F32_LEVEL_RR; 2547 break; 2548 case NVPTXISD::TexCubeArrayFloatFloat: 2549 Opc = NVPTX::TEX_CUBE_ARRAY_F32_F32_RR; 2550 break; 2551 case NVPTXISD::TexCubeArrayFloatFloatLevel: 2552 Opc = NVPTX::TEX_CUBE_ARRAY_F32_F32_LEVEL_RR; 2553 break; 2554 case NVPTXISD::TexCubeArrayS32Float: 2555 Opc = NVPTX::TEX_CUBE_ARRAY_S32_F32_RR; 2556 break; 2557 case NVPTXISD::TexCubeArrayS32FloatLevel: 2558 Opc = NVPTX::TEX_CUBE_ARRAY_S32_F32_LEVEL_RR; 2559 break; 2560 case NVPTXISD::TexCubeArrayU32Float: 2561 Opc = NVPTX::TEX_CUBE_ARRAY_U32_F32_RR; 2562 break; 2563 case NVPTXISD::TexCubeArrayU32FloatLevel: 2564 Opc = NVPTX::TEX_CUBE_ARRAY_U32_F32_LEVEL_RR; 2565 break; 2566 case NVPTXISD::Tld4R2DFloatFloat: 2567 Opc = NVPTX::TLD4_R_2D_F32_F32_RR; 2568 break; 2569 case NVPTXISD::Tld4G2DFloatFloat: 2570 Opc = NVPTX::TLD4_G_2D_F32_F32_RR; 2571 break; 2572 case NVPTXISD::Tld4B2DFloatFloat: 2573 Opc = NVPTX::TLD4_B_2D_F32_F32_RR; 2574 break; 2575 case NVPTXISD::Tld4A2DFloatFloat: 2576 Opc = NVPTX::TLD4_A_2D_F32_F32_RR; 2577 break; 2578 case NVPTXISD::Tld4R2DS64Float: 2579 Opc = NVPTX::TLD4_R_2D_S32_F32_RR; 2580 break; 2581 case NVPTXISD::Tld4G2DS64Float: 2582 Opc = NVPTX::TLD4_G_2D_S32_F32_RR; 2583 break; 2584 case NVPTXISD::Tld4B2DS64Float: 2585 Opc = NVPTX::TLD4_B_2D_S32_F32_RR; 2586 break; 2587 case NVPTXISD::Tld4A2DS64Float: 2588 Opc = NVPTX::TLD4_A_2D_S32_F32_RR; 2589 break; 2590 case NVPTXISD::Tld4R2DU64Float: 2591 Opc = NVPTX::TLD4_R_2D_U32_F32_RR; 2592 break; 2593 case NVPTXISD::Tld4G2DU64Float: 2594 Opc = NVPTX::TLD4_G_2D_U32_F32_RR; 2595 break; 2596 case NVPTXISD::Tld4B2DU64Float: 2597 Opc = NVPTX::TLD4_B_2D_U32_F32_RR; 2598 break; 2599 case NVPTXISD::Tld4A2DU64Float: 2600 Opc = NVPTX::TLD4_A_2D_U32_F32_RR; 2601 break; 2602 case NVPTXISD::TexUnified1DFloatS32: 2603 Opc = NVPTX::TEX_UNIFIED_1D_F32_S32_R; 2604 break; 2605 case NVPTXISD::TexUnified1DFloatFloat: 2606 Opc = NVPTX::TEX_UNIFIED_1D_F32_F32_R; 2607 break; 2608 case NVPTXISD::TexUnified1DFloatFloatLevel: 2609 Opc = NVPTX::TEX_UNIFIED_1D_F32_F32_LEVEL_R; 2610 break; 2611 case NVPTXISD::TexUnified1DFloatFloatGrad: 2612 Opc = NVPTX::TEX_UNIFIED_1D_F32_F32_GRAD_R; 2613 break; 2614 case NVPTXISD::TexUnified1DS32S32: 2615 Opc = NVPTX::TEX_UNIFIED_1D_S32_S32_R; 2616 break; 2617 case NVPTXISD::TexUnified1DS32Float: 2618 Opc = NVPTX::TEX_UNIFIED_1D_S32_F32_R; 2619 break; 2620 case NVPTXISD::TexUnified1DS32FloatLevel: 2621 Opc = NVPTX::TEX_UNIFIED_1D_S32_F32_LEVEL_R; 2622 break; 2623 case NVPTXISD::TexUnified1DS32FloatGrad: 2624 Opc = NVPTX::TEX_UNIFIED_1D_S32_F32_GRAD_R; 2625 break; 2626 case NVPTXISD::TexUnified1DU32S32: 2627 Opc = NVPTX::TEX_UNIFIED_1D_U32_S32_R; 2628 break; 2629 case NVPTXISD::TexUnified1DU32Float: 2630 Opc = NVPTX::TEX_UNIFIED_1D_U32_F32_R; 2631 break; 2632 case NVPTXISD::TexUnified1DU32FloatLevel: 2633 Opc = NVPTX::TEX_UNIFIED_1D_U32_F32_LEVEL_R; 2634 break; 2635 case NVPTXISD::TexUnified1DU32FloatGrad: 2636 Opc = NVPTX::TEX_UNIFIED_1D_U32_F32_GRAD_R; 2637 break; 2638 case NVPTXISD::TexUnified1DArrayFloatS32: 2639 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_F32_S32_R; 2640 break; 2641 case NVPTXISD::TexUnified1DArrayFloatFloat: 2642 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_F32_F32_R; 2643 break; 2644 case NVPTXISD::TexUnified1DArrayFloatFloatLevel: 2645 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_F32_F32_LEVEL_R; 2646 break; 2647 case NVPTXISD::TexUnified1DArrayFloatFloatGrad: 2648 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_F32_F32_GRAD_R; 2649 break; 2650 case NVPTXISD::TexUnified1DArrayS32S32: 2651 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_S32_S32_R; 2652 break; 2653 case NVPTXISD::TexUnified1DArrayS32Float: 2654 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_S32_F32_R; 2655 break; 2656 case NVPTXISD::TexUnified1DArrayS32FloatLevel: 2657 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_S32_F32_LEVEL_R; 2658 break; 2659 case NVPTXISD::TexUnified1DArrayS32FloatGrad: 2660 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_S32_F32_GRAD_R; 2661 break; 2662 case NVPTXISD::TexUnified1DArrayU32S32: 2663 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_U32_S32_R; 2664 break; 2665 case NVPTXISD::TexUnified1DArrayU32Float: 2666 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_U32_F32_R; 2667 break; 2668 case NVPTXISD::TexUnified1DArrayU32FloatLevel: 2669 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_U32_F32_LEVEL_R; 2670 break; 2671 case NVPTXISD::TexUnified1DArrayU32FloatGrad: 2672 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_U32_F32_GRAD_R; 2673 break; 2674 case NVPTXISD::TexUnified2DFloatS32: 2675 Opc = NVPTX::TEX_UNIFIED_2D_F32_S32_R; 2676 break; 2677 case NVPTXISD::TexUnified2DFloatFloat: 2678 Opc = NVPTX::TEX_UNIFIED_2D_F32_F32_R; 2679 break; 2680 case NVPTXISD::TexUnified2DFloatFloatLevel: 2681 Opc = NVPTX::TEX_UNIFIED_2D_F32_F32_LEVEL_R; 2682 break; 2683 case NVPTXISD::TexUnified2DFloatFloatGrad: 2684 Opc = NVPTX::TEX_UNIFIED_2D_F32_F32_GRAD_R; 2685 break; 2686 case NVPTXISD::TexUnified2DS32S32: 2687 Opc = NVPTX::TEX_UNIFIED_2D_S32_S32_R; 2688 break; 2689 case NVPTXISD::TexUnified2DS32Float: 2690 Opc = NVPTX::TEX_UNIFIED_2D_S32_F32_R; 2691 break; 2692 case NVPTXISD::TexUnified2DS32FloatLevel: 2693 Opc = NVPTX::TEX_UNIFIED_2D_S32_F32_LEVEL_R; 2694 break; 2695 case NVPTXISD::TexUnified2DS32FloatGrad: 2696 Opc = NVPTX::TEX_UNIFIED_2D_S32_F32_GRAD_R; 2697 break; 2698 case NVPTXISD::TexUnified2DU32S32: 2699 Opc = NVPTX::TEX_UNIFIED_2D_U32_S32_R; 2700 break; 2701 case NVPTXISD::TexUnified2DU32Float: 2702 Opc = NVPTX::TEX_UNIFIED_2D_U32_F32_R; 2703 break; 2704 case NVPTXISD::TexUnified2DU32FloatLevel: 2705 Opc = NVPTX::TEX_UNIFIED_2D_U32_F32_LEVEL_R; 2706 break; 2707 case NVPTXISD::TexUnified2DU32FloatGrad: 2708 Opc = NVPTX::TEX_UNIFIED_2D_U32_F32_GRAD_R; 2709 break; 2710 case NVPTXISD::TexUnified2DArrayFloatS32: 2711 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_F32_S32_R; 2712 break; 2713 case NVPTXISD::TexUnified2DArrayFloatFloat: 2714 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_F32_F32_R; 2715 break; 2716 case NVPTXISD::TexUnified2DArrayFloatFloatLevel: 2717 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_F32_F32_LEVEL_R; 2718 break; 2719 case NVPTXISD::TexUnified2DArrayFloatFloatGrad: 2720 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_F32_F32_GRAD_R; 2721 break; 2722 case NVPTXISD::TexUnified2DArrayS32S32: 2723 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_S32_S32_R; 2724 break; 2725 case NVPTXISD::TexUnified2DArrayS32Float: 2726 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_S32_F32_R; 2727 break; 2728 case NVPTXISD::TexUnified2DArrayS32FloatLevel: 2729 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_S32_F32_LEVEL_R; 2730 break; 2731 case NVPTXISD::TexUnified2DArrayS32FloatGrad: 2732 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_S32_F32_GRAD_R; 2733 break; 2734 case NVPTXISD::TexUnified2DArrayU32S32: 2735 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_U32_S32_R; 2736 break; 2737 case NVPTXISD::TexUnified2DArrayU32Float: 2738 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_U32_F32_R; 2739 break; 2740 case NVPTXISD::TexUnified2DArrayU32FloatLevel: 2741 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_U32_F32_LEVEL_R; 2742 break; 2743 case NVPTXISD::TexUnified2DArrayU32FloatGrad: 2744 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_U32_F32_GRAD_R; 2745 break; 2746 case NVPTXISD::TexUnified3DFloatS32: 2747 Opc = NVPTX::TEX_UNIFIED_3D_F32_S32_R; 2748 break; 2749 case NVPTXISD::TexUnified3DFloatFloat: 2750 Opc = NVPTX::TEX_UNIFIED_3D_F32_F32_R; 2751 break; 2752 case NVPTXISD::TexUnified3DFloatFloatLevel: 2753 Opc = NVPTX::TEX_UNIFIED_3D_F32_F32_LEVEL_R; 2754 break; 2755 case NVPTXISD::TexUnified3DFloatFloatGrad: 2756 Opc = NVPTX::TEX_UNIFIED_3D_F32_F32_GRAD_R; 2757 break; 2758 case NVPTXISD::TexUnified3DS32S32: 2759 Opc = NVPTX::TEX_UNIFIED_3D_S32_S32_R; 2760 break; 2761 case NVPTXISD::TexUnified3DS32Float: 2762 Opc = NVPTX::TEX_UNIFIED_3D_S32_F32_R; 2763 break; 2764 case NVPTXISD::TexUnified3DS32FloatLevel: 2765 Opc = NVPTX::TEX_UNIFIED_3D_S32_F32_LEVEL_R; 2766 break; 2767 case NVPTXISD::TexUnified3DS32FloatGrad: 2768 Opc = NVPTX::TEX_UNIFIED_3D_S32_F32_GRAD_R; 2769 break; 2770 case NVPTXISD::TexUnified3DU32S32: 2771 Opc = NVPTX::TEX_UNIFIED_3D_U32_S32_R; 2772 break; 2773 case NVPTXISD::TexUnified3DU32Float: 2774 Opc = NVPTX::TEX_UNIFIED_3D_U32_F32_R; 2775 break; 2776 case NVPTXISD::TexUnified3DU32FloatLevel: 2777 Opc = NVPTX::TEX_UNIFIED_3D_U32_F32_LEVEL_R; 2778 break; 2779 case NVPTXISD::TexUnified3DU32FloatGrad: 2780 Opc = NVPTX::TEX_UNIFIED_3D_U32_F32_GRAD_R; 2781 break; 2782 case NVPTXISD::TexUnifiedCubeFloatFloat: 2783 Opc = NVPTX::TEX_UNIFIED_CUBE_F32_F32_R; 2784 break; 2785 case NVPTXISD::TexUnifiedCubeFloatFloatLevel: 2786 Opc = NVPTX::TEX_UNIFIED_CUBE_F32_F32_LEVEL_R; 2787 break; 2788 case NVPTXISD::TexUnifiedCubeS32Float: 2789 Opc = NVPTX::TEX_UNIFIED_CUBE_S32_F32_R; 2790 break; 2791 case NVPTXISD::TexUnifiedCubeS32FloatLevel: 2792 Opc = NVPTX::TEX_UNIFIED_CUBE_S32_F32_LEVEL_R; 2793 break; 2794 case NVPTXISD::TexUnifiedCubeU32Float: 2795 Opc = NVPTX::TEX_UNIFIED_CUBE_U32_F32_R; 2796 break; 2797 case NVPTXISD::TexUnifiedCubeU32FloatLevel: 2798 Opc = NVPTX::TEX_UNIFIED_CUBE_U32_F32_LEVEL_R; 2799 break; 2800 case NVPTXISD::TexUnifiedCubeArrayFloatFloat: 2801 Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_F32_F32_R; 2802 break; 2803 case NVPTXISD::TexUnifiedCubeArrayFloatFloatLevel: 2804 Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_F32_F32_LEVEL_R; 2805 break; 2806 case NVPTXISD::TexUnifiedCubeArrayS32Float: 2807 Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_S32_F32_R; 2808 break; 2809 case NVPTXISD::TexUnifiedCubeArrayS32FloatLevel: 2810 Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_S32_F32_LEVEL_R; 2811 break; 2812 case NVPTXISD::TexUnifiedCubeArrayU32Float: 2813 Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_U32_F32_R; 2814 break; 2815 case NVPTXISD::TexUnifiedCubeArrayU32FloatLevel: 2816 Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_U32_F32_LEVEL_R; 2817 break; 2818 case NVPTXISD::Tld4UnifiedR2DFloatFloat: 2819 Opc = NVPTX::TLD4_UNIFIED_R_2D_F32_F32_R; 2820 break; 2821 case NVPTXISD::Tld4UnifiedG2DFloatFloat: 2822 Opc = NVPTX::TLD4_UNIFIED_G_2D_F32_F32_R; 2823 break; 2824 case NVPTXISD::Tld4UnifiedB2DFloatFloat: 2825 Opc = NVPTX::TLD4_UNIFIED_B_2D_F32_F32_R; 2826 break; 2827 case NVPTXISD::Tld4UnifiedA2DFloatFloat: 2828 Opc = NVPTX::TLD4_UNIFIED_A_2D_F32_F32_R; 2829 break; 2830 case NVPTXISD::Tld4UnifiedR2DS64Float: 2831 Opc = NVPTX::TLD4_UNIFIED_R_2D_S32_F32_R; 2832 break; 2833 case NVPTXISD::Tld4UnifiedG2DS64Float: 2834 Opc = NVPTX::TLD4_UNIFIED_G_2D_S32_F32_R; 2835 break; 2836 case NVPTXISD::Tld4UnifiedB2DS64Float: 2837 Opc = NVPTX::TLD4_UNIFIED_B_2D_S32_F32_R; 2838 break; 2839 case NVPTXISD::Tld4UnifiedA2DS64Float: 2840 Opc = NVPTX::TLD4_UNIFIED_A_2D_S32_F32_R; 2841 break; 2842 case NVPTXISD::Tld4UnifiedR2DU64Float: 2843 Opc = NVPTX::TLD4_UNIFIED_R_2D_U32_F32_R; 2844 break; 2845 case NVPTXISD::Tld4UnifiedG2DU64Float: 2846 Opc = NVPTX::TLD4_UNIFIED_G_2D_U32_F32_R; 2847 break; 2848 case NVPTXISD::Tld4UnifiedB2DU64Float: 2849 Opc = NVPTX::TLD4_UNIFIED_B_2D_U32_F32_R; 2850 break; 2851 case NVPTXISD::Tld4UnifiedA2DU64Float: 2852 Opc = NVPTX::TLD4_UNIFIED_A_2D_U32_F32_R; 2853 break; 2854 } 2855 2856 // Copy over operands 2857 SmallVector<SDValue, 8> Ops(drop_begin(N->ops())); 2858 Ops.push_back(N->getOperand(0)); // Move chain to the back. 2859 2860 ReplaceNode(N, CurDAG->getMachineNode(Opc, SDLoc(N), N->getVTList(), Ops)); 2861 return true; 2862 } 2863 2864 bool NVPTXDAGToDAGISel::trySurfaceIntrinsic(SDNode *N) { 2865 unsigned Opc = 0; 2866 switch (N->getOpcode()) { 2867 default: return false; 2868 case NVPTXISD::Suld1DI8Clamp: 2869 Opc = NVPTX::SULD_1D_I8_CLAMP_R; 2870 break; 2871 case NVPTXISD::Suld1DI16Clamp: 2872 Opc = NVPTX::SULD_1D_I16_CLAMP_R; 2873 break; 2874 case NVPTXISD::Suld1DI32Clamp: 2875 Opc = NVPTX::SULD_1D_I32_CLAMP_R; 2876 break; 2877 case NVPTXISD::Suld1DI64Clamp: 2878 Opc = NVPTX::SULD_1D_I64_CLAMP_R; 2879 break; 2880 case NVPTXISD::Suld1DV2I8Clamp: 2881 Opc = NVPTX::SULD_1D_V2I8_CLAMP_R; 2882 break; 2883 case NVPTXISD::Suld1DV2I16Clamp: 2884 Opc = NVPTX::SULD_1D_V2I16_CLAMP_R; 2885 break; 2886 case NVPTXISD::Suld1DV2I32Clamp: 2887 Opc = NVPTX::SULD_1D_V2I32_CLAMP_R; 2888 break; 2889 case NVPTXISD::Suld1DV2I64Clamp: 2890 Opc = NVPTX::SULD_1D_V2I64_CLAMP_R; 2891 break; 2892 case NVPTXISD::Suld1DV4I8Clamp: 2893 Opc = NVPTX::SULD_1D_V4I8_CLAMP_R; 2894 break; 2895 case NVPTXISD::Suld1DV4I16Clamp: 2896 Opc = NVPTX::SULD_1D_V4I16_CLAMP_R; 2897 break; 2898 case NVPTXISD::Suld1DV4I32Clamp: 2899 Opc = NVPTX::SULD_1D_V4I32_CLAMP_R; 2900 break; 2901 case NVPTXISD::Suld1DArrayI8Clamp: 2902 Opc = NVPTX::SULD_1D_ARRAY_I8_CLAMP_R; 2903 break; 2904 case NVPTXISD::Suld1DArrayI16Clamp: 2905 Opc = NVPTX::SULD_1D_ARRAY_I16_CLAMP_R; 2906 break; 2907 case NVPTXISD::Suld1DArrayI32Clamp: 2908 Opc = NVPTX::SULD_1D_ARRAY_I32_CLAMP_R; 2909 break; 2910 case NVPTXISD::Suld1DArrayI64Clamp: 2911 Opc = NVPTX::SULD_1D_ARRAY_I64_CLAMP_R; 2912 break; 2913 case NVPTXISD::Suld1DArrayV2I8Clamp: 2914 Opc = NVPTX::SULD_1D_ARRAY_V2I8_CLAMP_R; 2915 break; 2916 case NVPTXISD::Suld1DArrayV2I16Clamp: 2917 Opc = NVPTX::SULD_1D_ARRAY_V2I16_CLAMP_R; 2918 break; 2919 case NVPTXISD::Suld1DArrayV2I32Clamp: 2920 Opc = NVPTX::SULD_1D_ARRAY_V2I32_CLAMP_R; 2921 break; 2922 case NVPTXISD::Suld1DArrayV2I64Clamp: 2923 Opc = NVPTX::SULD_1D_ARRAY_V2I64_CLAMP_R; 2924 break; 2925 case NVPTXISD::Suld1DArrayV4I8Clamp: 2926 Opc = NVPTX::SULD_1D_ARRAY_V4I8_CLAMP_R; 2927 break; 2928 case NVPTXISD::Suld1DArrayV4I16Clamp: 2929 Opc = NVPTX::SULD_1D_ARRAY_V4I16_CLAMP_R; 2930 break; 2931 case NVPTXISD::Suld1DArrayV4I32Clamp: 2932 Opc = NVPTX::SULD_1D_ARRAY_V4I32_CLAMP_R; 2933 break; 2934 case NVPTXISD::Suld2DI8Clamp: 2935 Opc = NVPTX::SULD_2D_I8_CLAMP_R; 2936 break; 2937 case NVPTXISD::Suld2DI16Clamp: 2938 Opc = NVPTX::SULD_2D_I16_CLAMP_R; 2939 break; 2940 case NVPTXISD::Suld2DI32Clamp: 2941 Opc = NVPTX::SULD_2D_I32_CLAMP_R; 2942 break; 2943 case NVPTXISD::Suld2DI64Clamp: 2944 Opc = NVPTX::SULD_2D_I64_CLAMP_R; 2945 break; 2946 case NVPTXISD::Suld2DV2I8Clamp: 2947 Opc = NVPTX::SULD_2D_V2I8_CLAMP_R; 2948 break; 2949 case NVPTXISD::Suld2DV2I16Clamp: 2950 Opc = NVPTX::SULD_2D_V2I16_CLAMP_R; 2951 break; 2952 case NVPTXISD::Suld2DV2I32Clamp: 2953 Opc = NVPTX::SULD_2D_V2I32_CLAMP_R; 2954 break; 2955 case NVPTXISD::Suld2DV2I64Clamp: 2956 Opc = NVPTX::SULD_2D_V2I64_CLAMP_R; 2957 break; 2958 case NVPTXISD::Suld2DV4I8Clamp: 2959 Opc = NVPTX::SULD_2D_V4I8_CLAMP_R; 2960 break; 2961 case NVPTXISD::Suld2DV4I16Clamp: 2962 Opc = NVPTX::SULD_2D_V4I16_CLAMP_R; 2963 break; 2964 case NVPTXISD::Suld2DV4I32Clamp: 2965 Opc = NVPTX::SULD_2D_V4I32_CLAMP_R; 2966 break; 2967 case NVPTXISD::Suld2DArrayI8Clamp: 2968 Opc = NVPTX::SULD_2D_ARRAY_I8_CLAMP_R; 2969 break; 2970 case NVPTXISD::Suld2DArrayI16Clamp: 2971 Opc = NVPTX::SULD_2D_ARRAY_I16_CLAMP_R; 2972 break; 2973 case NVPTXISD::Suld2DArrayI32Clamp: 2974 Opc = NVPTX::SULD_2D_ARRAY_I32_CLAMP_R; 2975 break; 2976 case NVPTXISD::Suld2DArrayI64Clamp: 2977 Opc = NVPTX::SULD_2D_ARRAY_I64_CLAMP_R; 2978 break; 2979 case NVPTXISD::Suld2DArrayV2I8Clamp: 2980 Opc = NVPTX::SULD_2D_ARRAY_V2I8_CLAMP_R; 2981 break; 2982 case NVPTXISD::Suld2DArrayV2I16Clamp: 2983 Opc = NVPTX::SULD_2D_ARRAY_V2I16_CLAMP_R; 2984 break; 2985 case NVPTXISD::Suld2DArrayV2I32Clamp: 2986 Opc = NVPTX::SULD_2D_ARRAY_V2I32_CLAMP_R; 2987 break; 2988 case NVPTXISD::Suld2DArrayV2I64Clamp: 2989 Opc = NVPTX::SULD_2D_ARRAY_V2I64_CLAMP_R; 2990 break; 2991 case NVPTXISD::Suld2DArrayV4I8Clamp: 2992 Opc = NVPTX::SULD_2D_ARRAY_V4I8_CLAMP_R; 2993 break; 2994 case NVPTXISD::Suld2DArrayV4I16Clamp: 2995 Opc = NVPTX::SULD_2D_ARRAY_V4I16_CLAMP_R; 2996 break; 2997 case NVPTXISD::Suld2DArrayV4I32Clamp: 2998 Opc = NVPTX::SULD_2D_ARRAY_V4I32_CLAMP_R; 2999 break; 3000 case NVPTXISD::Suld3DI8Clamp: 3001 Opc = NVPTX::SULD_3D_I8_CLAMP_R; 3002 break; 3003 case NVPTXISD::Suld3DI16Clamp: 3004 Opc = NVPTX::SULD_3D_I16_CLAMP_R; 3005 break; 3006 case NVPTXISD::Suld3DI32Clamp: 3007 Opc = NVPTX::SULD_3D_I32_CLAMP_R; 3008 break; 3009 case NVPTXISD::Suld3DI64Clamp: 3010 Opc = NVPTX::SULD_3D_I64_CLAMP_R; 3011 break; 3012 case NVPTXISD::Suld3DV2I8Clamp: 3013 Opc = NVPTX::SULD_3D_V2I8_CLAMP_R; 3014 break; 3015 case NVPTXISD::Suld3DV2I16Clamp: 3016 Opc = NVPTX::SULD_3D_V2I16_CLAMP_R; 3017 break; 3018 case NVPTXISD::Suld3DV2I32Clamp: 3019 Opc = NVPTX::SULD_3D_V2I32_CLAMP_R; 3020 break; 3021 case NVPTXISD::Suld3DV2I64Clamp: 3022 Opc = NVPTX::SULD_3D_V2I64_CLAMP_R; 3023 break; 3024 case NVPTXISD::Suld3DV4I8Clamp: 3025 Opc = NVPTX::SULD_3D_V4I8_CLAMP_R; 3026 break; 3027 case NVPTXISD::Suld3DV4I16Clamp: 3028 Opc = NVPTX::SULD_3D_V4I16_CLAMP_R; 3029 break; 3030 case NVPTXISD::Suld3DV4I32Clamp: 3031 Opc = NVPTX::SULD_3D_V4I32_CLAMP_R; 3032 break; 3033 case NVPTXISD::Suld1DI8Trap: 3034 Opc = NVPTX::SULD_1D_I8_TRAP_R; 3035 break; 3036 case NVPTXISD::Suld1DI16Trap: 3037 Opc = NVPTX::SULD_1D_I16_TRAP_R; 3038 break; 3039 case NVPTXISD::Suld1DI32Trap: 3040 Opc = NVPTX::SULD_1D_I32_TRAP_R; 3041 break; 3042 case NVPTXISD::Suld1DI64Trap: 3043 Opc = NVPTX::SULD_1D_I64_TRAP_R; 3044 break; 3045 case NVPTXISD::Suld1DV2I8Trap: 3046 Opc = NVPTX::SULD_1D_V2I8_TRAP_R; 3047 break; 3048 case NVPTXISD::Suld1DV2I16Trap: 3049 Opc = NVPTX::SULD_1D_V2I16_TRAP_R; 3050 break; 3051 case NVPTXISD::Suld1DV2I32Trap: 3052 Opc = NVPTX::SULD_1D_V2I32_TRAP_R; 3053 break; 3054 case NVPTXISD::Suld1DV2I64Trap: 3055 Opc = NVPTX::SULD_1D_V2I64_TRAP_R; 3056 break; 3057 case NVPTXISD::Suld1DV4I8Trap: 3058 Opc = NVPTX::SULD_1D_V4I8_TRAP_R; 3059 break; 3060 case NVPTXISD::Suld1DV4I16Trap: 3061 Opc = NVPTX::SULD_1D_V4I16_TRAP_R; 3062 break; 3063 case NVPTXISD::Suld1DV4I32Trap: 3064 Opc = NVPTX::SULD_1D_V4I32_TRAP_R; 3065 break; 3066 case NVPTXISD::Suld1DArrayI8Trap: 3067 Opc = NVPTX::SULD_1D_ARRAY_I8_TRAP_R; 3068 break; 3069 case NVPTXISD::Suld1DArrayI16Trap: 3070 Opc = NVPTX::SULD_1D_ARRAY_I16_TRAP_R; 3071 break; 3072 case NVPTXISD::Suld1DArrayI32Trap: 3073 Opc = NVPTX::SULD_1D_ARRAY_I32_TRAP_R; 3074 break; 3075 case NVPTXISD::Suld1DArrayI64Trap: 3076 Opc = NVPTX::SULD_1D_ARRAY_I64_TRAP_R; 3077 break; 3078 case NVPTXISD::Suld1DArrayV2I8Trap: 3079 Opc = NVPTX::SULD_1D_ARRAY_V2I8_TRAP_R; 3080 break; 3081 case NVPTXISD::Suld1DArrayV2I16Trap: 3082 Opc = NVPTX::SULD_1D_ARRAY_V2I16_TRAP_R; 3083 break; 3084 case NVPTXISD::Suld1DArrayV2I32Trap: 3085 Opc = NVPTX::SULD_1D_ARRAY_V2I32_TRAP_R; 3086 break; 3087 case NVPTXISD::Suld1DArrayV2I64Trap: 3088 Opc = NVPTX::SULD_1D_ARRAY_V2I64_TRAP_R; 3089 break; 3090 case NVPTXISD::Suld1DArrayV4I8Trap: 3091 Opc = NVPTX::SULD_1D_ARRAY_V4I8_TRAP_R; 3092 break; 3093 case NVPTXISD::Suld1DArrayV4I16Trap: 3094 Opc = NVPTX::SULD_1D_ARRAY_V4I16_TRAP_R; 3095 break; 3096 case NVPTXISD::Suld1DArrayV4I32Trap: 3097 Opc = NVPTX::SULD_1D_ARRAY_V4I32_TRAP_R; 3098 break; 3099 case NVPTXISD::Suld2DI8Trap: 3100 Opc = NVPTX::SULD_2D_I8_TRAP_R; 3101 break; 3102 case NVPTXISD::Suld2DI16Trap: 3103 Opc = NVPTX::SULD_2D_I16_TRAP_R; 3104 break; 3105 case NVPTXISD::Suld2DI32Trap: 3106 Opc = NVPTX::SULD_2D_I32_TRAP_R; 3107 break; 3108 case NVPTXISD::Suld2DI64Trap: 3109 Opc = NVPTX::SULD_2D_I64_TRAP_R; 3110 break; 3111 case NVPTXISD::Suld2DV2I8Trap: 3112 Opc = NVPTX::SULD_2D_V2I8_TRAP_R; 3113 break; 3114 case NVPTXISD::Suld2DV2I16Trap: 3115 Opc = NVPTX::SULD_2D_V2I16_TRAP_R; 3116 break; 3117 case NVPTXISD::Suld2DV2I32Trap: 3118 Opc = NVPTX::SULD_2D_V2I32_TRAP_R; 3119 break; 3120 case NVPTXISD::Suld2DV2I64Trap: 3121 Opc = NVPTX::SULD_2D_V2I64_TRAP_R; 3122 break; 3123 case NVPTXISD::Suld2DV4I8Trap: 3124 Opc = NVPTX::SULD_2D_V4I8_TRAP_R; 3125 break; 3126 case NVPTXISD::Suld2DV4I16Trap: 3127 Opc = NVPTX::SULD_2D_V4I16_TRAP_R; 3128 break; 3129 case NVPTXISD::Suld2DV4I32Trap: 3130 Opc = NVPTX::SULD_2D_V4I32_TRAP_R; 3131 break; 3132 case NVPTXISD::Suld2DArrayI8Trap: 3133 Opc = NVPTX::SULD_2D_ARRAY_I8_TRAP_R; 3134 break; 3135 case NVPTXISD::Suld2DArrayI16Trap: 3136 Opc = NVPTX::SULD_2D_ARRAY_I16_TRAP_R; 3137 break; 3138 case NVPTXISD::Suld2DArrayI32Trap: 3139 Opc = NVPTX::SULD_2D_ARRAY_I32_TRAP_R; 3140 break; 3141 case NVPTXISD::Suld2DArrayI64Trap: 3142 Opc = NVPTX::SULD_2D_ARRAY_I64_TRAP_R; 3143 break; 3144 case NVPTXISD::Suld2DArrayV2I8Trap: 3145 Opc = NVPTX::SULD_2D_ARRAY_V2I8_TRAP_R; 3146 break; 3147 case NVPTXISD::Suld2DArrayV2I16Trap: 3148 Opc = NVPTX::SULD_2D_ARRAY_V2I16_TRAP_R; 3149 break; 3150 case NVPTXISD::Suld2DArrayV2I32Trap: 3151 Opc = NVPTX::SULD_2D_ARRAY_V2I32_TRAP_R; 3152 break; 3153 case NVPTXISD::Suld2DArrayV2I64Trap: 3154 Opc = NVPTX::SULD_2D_ARRAY_V2I64_TRAP_R; 3155 break; 3156 case NVPTXISD::Suld2DArrayV4I8Trap: 3157 Opc = NVPTX::SULD_2D_ARRAY_V4I8_TRAP_R; 3158 break; 3159 case NVPTXISD::Suld2DArrayV4I16Trap: 3160 Opc = NVPTX::SULD_2D_ARRAY_V4I16_TRAP_R; 3161 break; 3162 case NVPTXISD::Suld2DArrayV4I32Trap: 3163 Opc = NVPTX::SULD_2D_ARRAY_V4I32_TRAP_R; 3164 break; 3165 case NVPTXISD::Suld3DI8Trap: 3166 Opc = NVPTX::SULD_3D_I8_TRAP_R; 3167 break; 3168 case NVPTXISD::Suld3DI16Trap: 3169 Opc = NVPTX::SULD_3D_I16_TRAP_R; 3170 break; 3171 case NVPTXISD::Suld3DI32Trap: 3172 Opc = NVPTX::SULD_3D_I32_TRAP_R; 3173 break; 3174 case NVPTXISD::Suld3DI64Trap: 3175 Opc = NVPTX::SULD_3D_I64_TRAP_R; 3176 break; 3177 case NVPTXISD::Suld3DV2I8Trap: 3178 Opc = NVPTX::SULD_3D_V2I8_TRAP_R; 3179 break; 3180 case NVPTXISD::Suld3DV2I16Trap: 3181 Opc = NVPTX::SULD_3D_V2I16_TRAP_R; 3182 break; 3183 case NVPTXISD::Suld3DV2I32Trap: 3184 Opc = NVPTX::SULD_3D_V2I32_TRAP_R; 3185 break; 3186 case NVPTXISD::Suld3DV2I64Trap: 3187 Opc = NVPTX::SULD_3D_V2I64_TRAP_R; 3188 break; 3189 case NVPTXISD::Suld3DV4I8Trap: 3190 Opc = NVPTX::SULD_3D_V4I8_TRAP_R; 3191 break; 3192 case NVPTXISD::Suld3DV4I16Trap: 3193 Opc = NVPTX::SULD_3D_V4I16_TRAP_R; 3194 break; 3195 case NVPTXISD::Suld3DV4I32Trap: 3196 Opc = NVPTX::SULD_3D_V4I32_TRAP_R; 3197 break; 3198 case NVPTXISD::Suld1DI8Zero: 3199 Opc = NVPTX::SULD_1D_I8_ZERO_R; 3200 break; 3201 case NVPTXISD::Suld1DI16Zero: 3202 Opc = NVPTX::SULD_1D_I16_ZERO_R; 3203 break; 3204 case NVPTXISD::Suld1DI32Zero: 3205 Opc = NVPTX::SULD_1D_I32_ZERO_R; 3206 break; 3207 case NVPTXISD::Suld1DI64Zero: 3208 Opc = NVPTX::SULD_1D_I64_ZERO_R; 3209 break; 3210 case NVPTXISD::Suld1DV2I8Zero: 3211 Opc = NVPTX::SULD_1D_V2I8_ZERO_R; 3212 break; 3213 case NVPTXISD::Suld1DV2I16Zero: 3214 Opc = NVPTX::SULD_1D_V2I16_ZERO_R; 3215 break; 3216 case NVPTXISD::Suld1DV2I32Zero: 3217 Opc = NVPTX::SULD_1D_V2I32_ZERO_R; 3218 break; 3219 case NVPTXISD::Suld1DV2I64Zero: 3220 Opc = NVPTX::SULD_1D_V2I64_ZERO_R; 3221 break; 3222 case NVPTXISD::Suld1DV4I8Zero: 3223 Opc = NVPTX::SULD_1D_V4I8_ZERO_R; 3224 break; 3225 case NVPTXISD::Suld1DV4I16Zero: 3226 Opc = NVPTX::SULD_1D_V4I16_ZERO_R; 3227 break; 3228 case NVPTXISD::Suld1DV4I32Zero: 3229 Opc = NVPTX::SULD_1D_V4I32_ZERO_R; 3230 break; 3231 case NVPTXISD::Suld1DArrayI8Zero: 3232 Opc = NVPTX::SULD_1D_ARRAY_I8_ZERO_R; 3233 break; 3234 case NVPTXISD::Suld1DArrayI16Zero: 3235 Opc = NVPTX::SULD_1D_ARRAY_I16_ZERO_R; 3236 break; 3237 case NVPTXISD::Suld1DArrayI32Zero: 3238 Opc = NVPTX::SULD_1D_ARRAY_I32_ZERO_R; 3239 break; 3240 case NVPTXISD::Suld1DArrayI64Zero: 3241 Opc = NVPTX::SULD_1D_ARRAY_I64_ZERO_R; 3242 break; 3243 case NVPTXISD::Suld1DArrayV2I8Zero: 3244 Opc = NVPTX::SULD_1D_ARRAY_V2I8_ZERO_R; 3245 break; 3246 case NVPTXISD::Suld1DArrayV2I16Zero: 3247 Opc = NVPTX::SULD_1D_ARRAY_V2I16_ZERO_R; 3248 break; 3249 case NVPTXISD::Suld1DArrayV2I32Zero: 3250 Opc = NVPTX::SULD_1D_ARRAY_V2I32_ZERO_R; 3251 break; 3252 case NVPTXISD::Suld1DArrayV2I64Zero: 3253 Opc = NVPTX::SULD_1D_ARRAY_V2I64_ZERO_R; 3254 break; 3255 case NVPTXISD::Suld1DArrayV4I8Zero: 3256 Opc = NVPTX::SULD_1D_ARRAY_V4I8_ZERO_R; 3257 break; 3258 case NVPTXISD::Suld1DArrayV4I16Zero: 3259 Opc = NVPTX::SULD_1D_ARRAY_V4I16_ZERO_R; 3260 break; 3261 case NVPTXISD::Suld1DArrayV4I32Zero: 3262 Opc = NVPTX::SULD_1D_ARRAY_V4I32_ZERO_R; 3263 break; 3264 case NVPTXISD::Suld2DI8Zero: 3265 Opc = NVPTX::SULD_2D_I8_ZERO_R; 3266 break; 3267 case NVPTXISD::Suld2DI16Zero: 3268 Opc = NVPTX::SULD_2D_I16_ZERO_R; 3269 break; 3270 case NVPTXISD::Suld2DI32Zero: 3271 Opc = NVPTX::SULD_2D_I32_ZERO_R; 3272 break; 3273 case NVPTXISD::Suld2DI64Zero: 3274 Opc = NVPTX::SULD_2D_I64_ZERO_R; 3275 break; 3276 case NVPTXISD::Suld2DV2I8Zero: 3277 Opc = NVPTX::SULD_2D_V2I8_ZERO_R; 3278 break; 3279 case NVPTXISD::Suld2DV2I16Zero: 3280 Opc = NVPTX::SULD_2D_V2I16_ZERO_R; 3281 break; 3282 case NVPTXISD::Suld2DV2I32Zero: 3283 Opc = NVPTX::SULD_2D_V2I32_ZERO_R; 3284 break; 3285 case NVPTXISD::Suld2DV2I64Zero: 3286 Opc = NVPTX::SULD_2D_V2I64_ZERO_R; 3287 break; 3288 case NVPTXISD::Suld2DV4I8Zero: 3289 Opc = NVPTX::SULD_2D_V4I8_ZERO_R; 3290 break; 3291 case NVPTXISD::Suld2DV4I16Zero: 3292 Opc = NVPTX::SULD_2D_V4I16_ZERO_R; 3293 break; 3294 case NVPTXISD::Suld2DV4I32Zero: 3295 Opc = NVPTX::SULD_2D_V4I32_ZERO_R; 3296 break; 3297 case NVPTXISD::Suld2DArrayI8Zero: 3298 Opc = NVPTX::SULD_2D_ARRAY_I8_ZERO_R; 3299 break; 3300 case NVPTXISD::Suld2DArrayI16Zero: 3301 Opc = NVPTX::SULD_2D_ARRAY_I16_ZERO_R; 3302 break; 3303 case NVPTXISD::Suld2DArrayI32Zero: 3304 Opc = NVPTX::SULD_2D_ARRAY_I32_ZERO_R; 3305 break; 3306 case NVPTXISD::Suld2DArrayI64Zero: 3307 Opc = NVPTX::SULD_2D_ARRAY_I64_ZERO_R; 3308 break; 3309 case NVPTXISD::Suld2DArrayV2I8Zero: 3310 Opc = NVPTX::SULD_2D_ARRAY_V2I8_ZERO_R; 3311 break; 3312 case NVPTXISD::Suld2DArrayV2I16Zero: 3313 Opc = NVPTX::SULD_2D_ARRAY_V2I16_ZERO_R; 3314 break; 3315 case NVPTXISD::Suld2DArrayV2I32Zero: 3316 Opc = NVPTX::SULD_2D_ARRAY_V2I32_ZERO_R; 3317 break; 3318 case NVPTXISD::Suld2DArrayV2I64Zero: 3319 Opc = NVPTX::SULD_2D_ARRAY_V2I64_ZERO_R; 3320 break; 3321 case NVPTXISD::Suld2DArrayV4I8Zero: 3322 Opc = NVPTX::SULD_2D_ARRAY_V4I8_ZERO_R; 3323 break; 3324 case NVPTXISD::Suld2DArrayV4I16Zero: 3325 Opc = NVPTX::SULD_2D_ARRAY_V4I16_ZERO_R; 3326 break; 3327 case NVPTXISD::Suld2DArrayV4I32Zero: 3328 Opc = NVPTX::SULD_2D_ARRAY_V4I32_ZERO_R; 3329 break; 3330 case NVPTXISD::Suld3DI8Zero: 3331 Opc = NVPTX::SULD_3D_I8_ZERO_R; 3332 break; 3333 case NVPTXISD::Suld3DI16Zero: 3334 Opc = NVPTX::SULD_3D_I16_ZERO_R; 3335 break; 3336 case NVPTXISD::Suld3DI32Zero: 3337 Opc = NVPTX::SULD_3D_I32_ZERO_R; 3338 break; 3339 case NVPTXISD::Suld3DI64Zero: 3340 Opc = NVPTX::SULD_3D_I64_ZERO_R; 3341 break; 3342 case NVPTXISD::Suld3DV2I8Zero: 3343 Opc = NVPTX::SULD_3D_V2I8_ZERO_R; 3344 break; 3345 case NVPTXISD::Suld3DV2I16Zero: 3346 Opc = NVPTX::SULD_3D_V2I16_ZERO_R; 3347 break; 3348 case NVPTXISD::Suld3DV2I32Zero: 3349 Opc = NVPTX::SULD_3D_V2I32_ZERO_R; 3350 break; 3351 case NVPTXISD::Suld3DV2I64Zero: 3352 Opc = NVPTX::SULD_3D_V2I64_ZERO_R; 3353 break; 3354 case NVPTXISD::Suld3DV4I8Zero: 3355 Opc = NVPTX::SULD_3D_V4I8_ZERO_R; 3356 break; 3357 case NVPTXISD::Suld3DV4I16Zero: 3358 Opc = NVPTX::SULD_3D_V4I16_ZERO_R; 3359 break; 3360 case NVPTXISD::Suld3DV4I32Zero: 3361 Opc = NVPTX::SULD_3D_V4I32_ZERO_R; 3362 break; 3363 } 3364 3365 // Copy over operands 3366 SmallVector<SDValue, 8> Ops(drop_begin(N->ops())); 3367 Ops.push_back(N->getOperand(0)); // Move chain to the back. 3368 3369 ReplaceNode(N, CurDAG->getMachineNode(Opc, SDLoc(N), N->getVTList(), Ops)); 3370 return true; 3371 } 3372 3373 3374 /// SelectBFE - Look for instruction sequences that can be made more efficient 3375 /// by using the 'bfe' (bit-field extract) PTX instruction 3376 bool NVPTXDAGToDAGISel::tryBFE(SDNode *N) { 3377 SDLoc DL(N); 3378 SDValue LHS = N->getOperand(0); 3379 SDValue RHS = N->getOperand(1); 3380 SDValue Len; 3381 SDValue Start; 3382 SDValue Val; 3383 bool IsSigned = false; 3384 3385 if (N->getOpcode() == ISD::AND) { 3386 // Canonicalize the operands 3387 // We want 'and %val, %mask' 3388 if (isa<ConstantSDNode>(LHS) && !isa<ConstantSDNode>(RHS)) { 3389 std::swap(LHS, RHS); 3390 } 3391 3392 ConstantSDNode *Mask = dyn_cast<ConstantSDNode>(RHS); 3393 if (!Mask) { 3394 // We need a constant mask on the RHS of the AND 3395 return false; 3396 } 3397 3398 // Extract the mask bits 3399 uint64_t MaskVal = Mask->getZExtValue(); 3400 if (!isMask_64(MaskVal)) { 3401 // We *could* handle shifted masks here, but doing so would require an 3402 // 'and' operation to fix up the low-order bits so we would trade 3403 // shr+and for bfe+and, which has the same throughput 3404 return false; 3405 } 3406 3407 // How many bits are in our mask? 3408 uint64_t NumBits = countTrailingOnes(MaskVal); 3409 Len = CurDAG->getTargetConstant(NumBits, DL, MVT::i32); 3410 3411 if (LHS.getOpcode() == ISD::SRL || LHS.getOpcode() == ISD::SRA) { 3412 // We have a 'srl/and' pair, extract the effective start bit and length 3413 Val = LHS.getNode()->getOperand(0); 3414 Start = LHS.getNode()->getOperand(1); 3415 ConstantSDNode *StartConst = dyn_cast<ConstantSDNode>(Start); 3416 if (StartConst) { 3417 uint64_t StartVal = StartConst->getZExtValue(); 3418 // How many "good" bits do we have left? "good" is defined here as bits 3419 // that exist in the original value, not shifted in. 3420 uint64_t GoodBits = Start.getValueSizeInBits() - StartVal; 3421 if (NumBits > GoodBits) { 3422 // Do not handle the case where bits have been shifted in. In theory 3423 // we could handle this, but the cost is likely higher than just 3424 // emitting the srl/and pair. 3425 return false; 3426 } 3427 Start = CurDAG->getTargetConstant(StartVal, DL, MVT::i32); 3428 } else { 3429 // Do not handle the case where the shift amount (can be zero if no srl 3430 // was found) is not constant. We could handle this case, but it would 3431 // require run-time logic that would be more expensive than just 3432 // emitting the srl/and pair. 3433 return false; 3434 } 3435 } else { 3436 // Do not handle the case where the LHS of the and is not a shift. While 3437 // it would be trivial to handle this case, it would just transform 3438 // 'and' -> 'bfe', but 'and' has higher-throughput. 3439 return false; 3440 } 3441 } else if (N->getOpcode() == ISD::SRL || N->getOpcode() == ISD::SRA) { 3442 if (LHS->getOpcode() == ISD::AND) { 3443 ConstantSDNode *ShiftCnst = dyn_cast<ConstantSDNode>(RHS); 3444 if (!ShiftCnst) { 3445 // Shift amount must be constant 3446 return false; 3447 } 3448 3449 uint64_t ShiftAmt = ShiftCnst->getZExtValue(); 3450 3451 SDValue AndLHS = LHS->getOperand(0); 3452 SDValue AndRHS = LHS->getOperand(1); 3453 3454 // Canonicalize the AND to have the mask on the RHS 3455 if (isa<ConstantSDNode>(AndLHS)) { 3456 std::swap(AndLHS, AndRHS); 3457 } 3458 3459 ConstantSDNode *MaskCnst = dyn_cast<ConstantSDNode>(AndRHS); 3460 if (!MaskCnst) { 3461 // Mask must be constant 3462 return false; 3463 } 3464 3465 uint64_t MaskVal = MaskCnst->getZExtValue(); 3466 uint64_t NumZeros; 3467 uint64_t NumBits; 3468 if (isMask_64(MaskVal)) { 3469 NumZeros = 0; 3470 // The number of bits in the result bitfield will be the number of 3471 // trailing ones (the AND) minus the number of bits we shift off 3472 NumBits = countTrailingOnes(MaskVal) - ShiftAmt; 3473 } else if (isShiftedMask_64(MaskVal)) { 3474 NumZeros = countTrailingZeros(MaskVal); 3475 unsigned NumOnes = countTrailingOnes(MaskVal >> NumZeros); 3476 // The number of bits in the result bitfield will be the number of 3477 // trailing zeros plus the number of set bits in the mask minus the 3478 // number of bits we shift off 3479 NumBits = NumZeros + NumOnes - ShiftAmt; 3480 } else { 3481 // This is not a mask we can handle 3482 return false; 3483 } 3484 3485 if (ShiftAmt < NumZeros) { 3486 // Handling this case would require extra logic that would make this 3487 // transformation non-profitable 3488 return false; 3489 } 3490 3491 Val = AndLHS; 3492 Start = CurDAG->getTargetConstant(ShiftAmt, DL, MVT::i32); 3493 Len = CurDAG->getTargetConstant(NumBits, DL, MVT::i32); 3494 } else if (LHS->getOpcode() == ISD::SHL) { 3495 // Here, we have a pattern like: 3496 // 3497 // (sra (shl val, NN), MM) 3498 // or 3499 // (srl (shl val, NN), MM) 3500 // 3501 // If MM >= NN, we can efficiently optimize this with bfe 3502 Val = LHS->getOperand(0); 3503 3504 SDValue ShlRHS = LHS->getOperand(1); 3505 ConstantSDNode *ShlCnst = dyn_cast<ConstantSDNode>(ShlRHS); 3506 if (!ShlCnst) { 3507 // Shift amount must be constant 3508 return false; 3509 } 3510 uint64_t InnerShiftAmt = ShlCnst->getZExtValue(); 3511 3512 SDValue ShrRHS = RHS; 3513 ConstantSDNode *ShrCnst = dyn_cast<ConstantSDNode>(ShrRHS); 3514 if (!ShrCnst) { 3515 // Shift amount must be constant 3516 return false; 3517 } 3518 uint64_t OuterShiftAmt = ShrCnst->getZExtValue(); 3519 3520 // To avoid extra codegen and be profitable, we need Outer >= Inner 3521 if (OuterShiftAmt < InnerShiftAmt) { 3522 return false; 3523 } 3524 3525 // If the outer shift is more than the type size, we have no bitfield to 3526 // extract (since we also check that the inner shift is <= the outer shift 3527 // then this also implies that the inner shift is < the type size) 3528 if (OuterShiftAmt >= Val.getValueSizeInBits()) { 3529 return false; 3530 } 3531 3532 Start = CurDAG->getTargetConstant(OuterShiftAmt - InnerShiftAmt, DL, 3533 MVT::i32); 3534 Len = CurDAG->getTargetConstant(Val.getValueSizeInBits() - OuterShiftAmt, 3535 DL, MVT::i32); 3536 3537 if (N->getOpcode() == ISD::SRA) { 3538 // If we have a arithmetic right shift, we need to use the signed bfe 3539 // variant 3540 IsSigned = true; 3541 } 3542 } else { 3543 // No can do... 3544 return false; 3545 } 3546 } else { 3547 // No can do... 3548 return false; 3549 } 3550 3551 3552 unsigned Opc; 3553 // For the BFE operations we form here from "and" and "srl", always use the 3554 // unsigned variants. 3555 if (Val.getValueType() == MVT::i32) { 3556 if (IsSigned) { 3557 Opc = NVPTX::BFE_S32rii; 3558 } else { 3559 Opc = NVPTX::BFE_U32rii; 3560 } 3561 } else if (Val.getValueType() == MVT::i64) { 3562 if (IsSigned) { 3563 Opc = NVPTX::BFE_S64rii; 3564 } else { 3565 Opc = NVPTX::BFE_U64rii; 3566 } 3567 } else { 3568 // We cannot handle this type 3569 return false; 3570 } 3571 3572 SDValue Ops[] = { 3573 Val, Start, Len 3574 }; 3575 3576 ReplaceNode(N, CurDAG->getMachineNode(Opc, DL, N->getVTList(), Ops)); 3577 return true; 3578 } 3579 3580 // SelectDirectAddr - Match a direct address for DAG. 3581 // A direct address could be a globaladdress or externalsymbol. 3582 bool NVPTXDAGToDAGISel::SelectDirectAddr(SDValue N, SDValue &Address) { 3583 // Return true if TGA or ES. 3584 if (N.getOpcode() == ISD::TargetGlobalAddress || 3585 N.getOpcode() == ISD::TargetExternalSymbol) { 3586 Address = N; 3587 return true; 3588 } 3589 if (N.getOpcode() == NVPTXISD::Wrapper) { 3590 Address = N.getOperand(0); 3591 return true; 3592 } 3593 // addrspacecast(MoveParam(arg_symbol) to addrspace(PARAM)) -> arg_symbol 3594 if (AddrSpaceCastSDNode *CastN = dyn_cast<AddrSpaceCastSDNode>(N)) { 3595 if (CastN->getSrcAddressSpace() == ADDRESS_SPACE_GENERIC && 3596 CastN->getDestAddressSpace() == ADDRESS_SPACE_PARAM && 3597 CastN->getOperand(0).getOpcode() == NVPTXISD::MoveParam) 3598 return SelectDirectAddr(CastN->getOperand(0).getOperand(0), Address); 3599 } 3600 return false; 3601 } 3602 3603 // symbol+offset 3604 bool NVPTXDAGToDAGISel::SelectADDRsi_imp( 3605 SDNode *OpNode, SDValue Addr, SDValue &Base, SDValue &Offset, MVT mvt) { 3606 if (Addr.getOpcode() == ISD::ADD) { 3607 if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1))) { 3608 SDValue base = Addr.getOperand(0); 3609 if (SelectDirectAddr(base, Base)) { 3610 Offset = CurDAG->getTargetConstant(CN->getZExtValue(), SDLoc(OpNode), 3611 mvt); 3612 return true; 3613 } 3614 } 3615 } 3616 return false; 3617 } 3618 3619 // symbol+offset 3620 bool NVPTXDAGToDAGISel::SelectADDRsi(SDNode *OpNode, SDValue Addr, 3621 SDValue &Base, SDValue &Offset) { 3622 return SelectADDRsi_imp(OpNode, Addr, Base, Offset, MVT::i32); 3623 } 3624 3625 // symbol+offset 3626 bool NVPTXDAGToDAGISel::SelectADDRsi64(SDNode *OpNode, SDValue Addr, 3627 SDValue &Base, SDValue &Offset) { 3628 return SelectADDRsi_imp(OpNode, Addr, Base, Offset, MVT::i64); 3629 } 3630 3631 // register+offset 3632 bool NVPTXDAGToDAGISel::SelectADDRri_imp( 3633 SDNode *OpNode, SDValue Addr, SDValue &Base, SDValue &Offset, MVT mvt) { 3634 if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) { 3635 Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), mvt); 3636 Offset = CurDAG->getTargetConstant(0, SDLoc(OpNode), mvt); 3637 return true; 3638 } 3639 if (Addr.getOpcode() == ISD::TargetExternalSymbol || 3640 Addr.getOpcode() == ISD::TargetGlobalAddress) 3641 return false; // direct calls. 3642 3643 if (Addr.getOpcode() == ISD::ADD) { 3644 if (SelectDirectAddr(Addr.getOperand(0), Addr)) { 3645 return false; 3646 } 3647 if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1))) { 3648 if (FrameIndexSDNode *FIN = 3649 dyn_cast<FrameIndexSDNode>(Addr.getOperand(0))) 3650 // Constant offset from frame ref. 3651 Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), mvt); 3652 else 3653 Base = Addr.getOperand(0); 3654 Offset = CurDAG->getTargetConstant(CN->getZExtValue(), SDLoc(OpNode), 3655 mvt); 3656 return true; 3657 } 3658 } 3659 return false; 3660 } 3661 3662 // register+offset 3663 bool NVPTXDAGToDAGISel::SelectADDRri(SDNode *OpNode, SDValue Addr, 3664 SDValue &Base, SDValue &Offset) { 3665 return SelectADDRri_imp(OpNode, Addr, Base, Offset, MVT::i32); 3666 } 3667 3668 // register+offset 3669 bool NVPTXDAGToDAGISel::SelectADDRri64(SDNode *OpNode, SDValue Addr, 3670 SDValue &Base, SDValue &Offset) { 3671 return SelectADDRri_imp(OpNode, Addr, Base, Offset, MVT::i64); 3672 } 3673 3674 bool NVPTXDAGToDAGISel::ChkMemSDNodeAddressSpace(SDNode *N, 3675 unsigned int spN) const { 3676 const Value *Src = nullptr; 3677 if (MemSDNode *mN = dyn_cast<MemSDNode>(N)) { 3678 if (spN == 0 && mN->getMemOperand()->getPseudoValue()) 3679 return true; 3680 Src = mN->getMemOperand()->getValue(); 3681 } 3682 if (!Src) 3683 return false; 3684 if (auto *PT = dyn_cast<PointerType>(Src->getType())) 3685 return (PT->getAddressSpace() == spN); 3686 return false; 3687 } 3688 3689 /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for 3690 /// inline asm expressions. 3691 bool NVPTXDAGToDAGISel::SelectInlineAsmMemoryOperand( 3692 const SDValue &Op, unsigned ConstraintID, std::vector<SDValue> &OutOps) { 3693 SDValue Op0, Op1; 3694 switch (ConstraintID) { 3695 default: 3696 return true; 3697 case InlineAsm::Constraint_m: // memory 3698 if (SelectDirectAddr(Op, Op0)) { 3699 OutOps.push_back(Op0); 3700 OutOps.push_back(CurDAG->getTargetConstant(0, SDLoc(Op), MVT::i32)); 3701 return false; 3702 } 3703 if (SelectADDRri(Op.getNode(), Op, Op0, Op1)) { 3704 OutOps.push_back(Op0); 3705 OutOps.push_back(Op1); 3706 return false; 3707 } 3708 break; 3709 } 3710 return true; 3711 } 3712 3713 /// GetConvertOpcode - Returns the CVT_ instruction opcode that implements a 3714 /// conversion from \p SrcTy to \p DestTy. 3715 unsigned NVPTXDAGToDAGISel::GetConvertOpcode(MVT DestTy, MVT SrcTy, 3716 bool IsSigned) { 3717 switch (SrcTy.SimpleTy) { 3718 default: 3719 llvm_unreachable("Unhandled source type"); 3720 case MVT::i8: 3721 switch (DestTy.SimpleTy) { 3722 default: 3723 llvm_unreachable("Unhandled dest type"); 3724 case MVT::i16: 3725 return IsSigned ? NVPTX::CVT_s16_s8 : NVPTX::CVT_u16_u8; 3726 case MVT::i32: 3727 return IsSigned ? NVPTX::CVT_s32_s8 : NVPTX::CVT_u32_u8; 3728 case MVT::i64: 3729 return IsSigned ? NVPTX::CVT_s64_s8 : NVPTX::CVT_u64_u8; 3730 } 3731 case MVT::i16: 3732 switch (DestTy.SimpleTy) { 3733 default: 3734 llvm_unreachable("Unhandled dest type"); 3735 case MVT::i8: 3736 return IsSigned ? NVPTX::CVT_s8_s16 : NVPTX::CVT_u8_u16; 3737 case MVT::i32: 3738 return IsSigned ? NVPTX::CVT_s32_s16 : NVPTX::CVT_u32_u16; 3739 case MVT::i64: 3740 return IsSigned ? NVPTX::CVT_s64_s16 : NVPTX::CVT_u64_u16; 3741 } 3742 case MVT::i32: 3743 switch (DestTy.SimpleTy) { 3744 default: 3745 llvm_unreachable("Unhandled dest type"); 3746 case MVT::i8: 3747 return IsSigned ? NVPTX::CVT_s8_s32 : NVPTX::CVT_u8_u32; 3748 case MVT::i16: 3749 return IsSigned ? NVPTX::CVT_s16_s32 : NVPTX::CVT_u16_u32; 3750 case MVT::i64: 3751 return IsSigned ? NVPTX::CVT_s64_s32 : NVPTX::CVT_u64_u32; 3752 } 3753 case MVT::i64: 3754 switch (DestTy.SimpleTy) { 3755 default: 3756 llvm_unreachable("Unhandled dest type"); 3757 case MVT::i8: 3758 return IsSigned ? NVPTX::CVT_s8_s64 : NVPTX::CVT_u8_u64; 3759 case MVT::i16: 3760 return IsSigned ? NVPTX::CVT_s16_s64 : NVPTX::CVT_u16_u64; 3761 case MVT::i32: 3762 return IsSigned ? NVPTX::CVT_s32_s64 : NVPTX::CVT_u32_u64; 3763 } 3764 } 3765 } 3766