1 //===-- NVPTXISelDAGToDAG.cpp - A dag to dag inst selector for NVPTX ------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file defines an instruction selector for the NVPTX target. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "NVPTXISelDAGToDAG.h" 14 #include "MCTargetDesc/NVPTXBaseInfo.h" 15 #include "NVPTXUtilities.h" 16 #include "llvm/Analysis/ValueTracking.h" 17 #include "llvm/IR/GlobalValue.h" 18 #include "llvm/IR/Instructions.h" 19 #include "llvm/IR/IntrinsicsNVPTX.h" 20 #include "llvm/Support/AtomicOrdering.h" 21 #include "llvm/Support/CommandLine.h" 22 #include "llvm/Support/Debug.h" 23 #include "llvm/Support/ErrorHandling.h" 24 #include "llvm/Support/raw_ostream.h" 25 #include "llvm/Target/TargetIntrinsicInfo.h" 26 27 using namespace llvm; 28 29 #define DEBUG_TYPE "nvptx-isel" 30 31 /// createNVPTXISelDag - This pass converts a legalized DAG into a 32 /// NVPTX-specific DAG, ready for instruction scheduling. 33 FunctionPass *llvm::createNVPTXISelDag(NVPTXTargetMachine &TM, 34 llvm::CodeGenOpt::Level OptLevel) { 35 return new NVPTXDAGToDAGISel(TM, OptLevel); 36 } 37 38 NVPTXDAGToDAGISel::NVPTXDAGToDAGISel(NVPTXTargetMachine &tm, 39 CodeGenOpt::Level OptLevel) 40 : SelectionDAGISel(tm, OptLevel), TM(tm) { 41 doMulWide = (OptLevel > 0); 42 } 43 44 bool NVPTXDAGToDAGISel::runOnMachineFunction(MachineFunction &MF) { 45 Subtarget = &static_cast<const NVPTXSubtarget &>(MF.getSubtarget()); 46 return SelectionDAGISel::runOnMachineFunction(MF); 47 } 48 49 int NVPTXDAGToDAGISel::getDivF32Level() const { 50 return Subtarget->getTargetLowering()->getDivF32Level(); 51 } 52 53 bool NVPTXDAGToDAGISel::usePrecSqrtF32() const { 54 return Subtarget->getTargetLowering()->usePrecSqrtF32(); 55 } 56 57 bool NVPTXDAGToDAGISel::useF32FTZ() const { 58 return Subtarget->getTargetLowering()->useF32FTZ(*MF); 59 } 60 61 bool NVPTXDAGToDAGISel::allowFMA() const { 62 const NVPTXTargetLowering *TL = Subtarget->getTargetLowering(); 63 return TL->allowFMA(*MF, OptLevel); 64 } 65 66 bool NVPTXDAGToDAGISel::allowUnsafeFPMath() const { 67 const NVPTXTargetLowering *TL = Subtarget->getTargetLowering(); 68 return TL->allowUnsafeFPMath(*MF); 69 } 70 71 bool NVPTXDAGToDAGISel::useShortPointers() const { 72 return TM.useShortPointers(); 73 } 74 75 /// Select - Select instructions not customized! Used for 76 /// expanded, promoted and normal instructions. 77 void NVPTXDAGToDAGISel::Select(SDNode *N) { 78 79 if (N->isMachineOpcode()) { 80 N->setNodeId(-1); 81 return; // Already selected. 82 } 83 84 switch (N->getOpcode()) { 85 case ISD::LOAD: 86 case ISD::ATOMIC_LOAD: 87 if (tryLoad(N)) 88 return; 89 break; 90 case ISD::STORE: 91 case ISD::ATOMIC_STORE: 92 if (tryStore(N)) 93 return; 94 break; 95 case ISD::EXTRACT_VECTOR_ELT: 96 if (tryEXTRACT_VECTOR_ELEMENT(N)) 97 return; 98 break; 99 case NVPTXISD::SETP_F16X2: 100 SelectSETP_F16X2(N); 101 return; 102 103 case NVPTXISD::LoadV2: 104 case NVPTXISD::LoadV4: 105 if (tryLoadVector(N)) 106 return; 107 break; 108 case NVPTXISD::LDGV2: 109 case NVPTXISD::LDGV4: 110 case NVPTXISD::LDUV2: 111 case NVPTXISD::LDUV4: 112 if (tryLDGLDU(N)) 113 return; 114 break; 115 case NVPTXISD::StoreV2: 116 case NVPTXISD::StoreV4: 117 if (tryStoreVector(N)) 118 return; 119 break; 120 case NVPTXISD::LoadParam: 121 case NVPTXISD::LoadParamV2: 122 case NVPTXISD::LoadParamV4: 123 if (tryLoadParam(N)) 124 return; 125 break; 126 case NVPTXISD::StoreRetval: 127 case NVPTXISD::StoreRetvalV2: 128 case NVPTXISD::StoreRetvalV4: 129 if (tryStoreRetval(N)) 130 return; 131 break; 132 case NVPTXISD::StoreParam: 133 case NVPTXISD::StoreParamV2: 134 case NVPTXISD::StoreParamV4: 135 case NVPTXISD::StoreParamS32: 136 case NVPTXISD::StoreParamU32: 137 if (tryStoreParam(N)) 138 return; 139 break; 140 case ISD::INTRINSIC_WO_CHAIN: 141 if (tryIntrinsicNoChain(N)) 142 return; 143 break; 144 case ISD::INTRINSIC_W_CHAIN: 145 if (tryIntrinsicChain(N)) 146 return; 147 break; 148 case NVPTXISD::Tex1DFloatS32: 149 case NVPTXISD::Tex1DFloatFloat: 150 case NVPTXISD::Tex1DFloatFloatLevel: 151 case NVPTXISD::Tex1DFloatFloatGrad: 152 case NVPTXISD::Tex1DS32S32: 153 case NVPTXISD::Tex1DS32Float: 154 case NVPTXISD::Tex1DS32FloatLevel: 155 case NVPTXISD::Tex1DS32FloatGrad: 156 case NVPTXISD::Tex1DU32S32: 157 case NVPTXISD::Tex1DU32Float: 158 case NVPTXISD::Tex1DU32FloatLevel: 159 case NVPTXISD::Tex1DU32FloatGrad: 160 case NVPTXISD::Tex1DArrayFloatS32: 161 case NVPTXISD::Tex1DArrayFloatFloat: 162 case NVPTXISD::Tex1DArrayFloatFloatLevel: 163 case NVPTXISD::Tex1DArrayFloatFloatGrad: 164 case NVPTXISD::Tex1DArrayS32S32: 165 case NVPTXISD::Tex1DArrayS32Float: 166 case NVPTXISD::Tex1DArrayS32FloatLevel: 167 case NVPTXISD::Tex1DArrayS32FloatGrad: 168 case NVPTXISD::Tex1DArrayU32S32: 169 case NVPTXISD::Tex1DArrayU32Float: 170 case NVPTXISD::Tex1DArrayU32FloatLevel: 171 case NVPTXISD::Tex1DArrayU32FloatGrad: 172 case NVPTXISD::Tex2DFloatS32: 173 case NVPTXISD::Tex2DFloatFloat: 174 case NVPTXISD::Tex2DFloatFloatLevel: 175 case NVPTXISD::Tex2DFloatFloatGrad: 176 case NVPTXISD::Tex2DS32S32: 177 case NVPTXISD::Tex2DS32Float: 178 case NVPTXISD::Tex2DS32FloatLevel: 179 case NVPTXISD::Tex2DS32FloatGrad: 180 case NVPTXISD::Tex2DU32S32: 181 case NVPTXISD::Tex2DU32Float: 182 case NVPTXISD::Tex2DU32FloatLevel: 183 case NVPTXISD::Tex2DU32FloatGrad: 184 case NVPTXISD::Tex2DArrayFloatS32: 185 case NVPTXISD::Tex2DArrayFloatFloat: 186 case NVPTXISD::Tex2DArrayFloatFloatLevel: 187 case NVPTXISD::Tex2DArrayFloatFloatGrad: 188 case NVPTXISD::Tex2DArrayS32S32: 189 case NVPTXISD::Tex2DArrayS32Float: 190 case NVPTXISD::Tex2DArrayS32FloatLevel: 191 case NVPTXISD::Tex2DArrayS32FloatGrad: 192 case NVPTXISD::Tex2DArrayU32S32: 193 case NVPTXISD::Tex2DArrayU32Float: 194 case NVPTXISD::Tex2DArrayU32FloatLevel: 195 case NVPTXISD::Tex2DArrayU32FloatGrad: 196 case NVPTXISD::Tex3DFloatS32: 197 case NVPTXISD::Tex3DFloatFloat: 198 case NVPTXISD::Tex3DFloatFloatLevel: 199 case NVPTXISD::Tex3DFloatFloatGrad: 200 case NVPTXISD::Tex3DS32S32: 201 case NVPTXISD::Tex3DS32Float: 202 case NVPTXISD::Tex3DS32FloatLevel: 203 case NVPTXISD::Tex3DS32FloatGrad: 204 case NVPTXISD::Tex3DU32S32: 205 case NVPTXISD::Tex3DU32Float: 206 case NVPTXISD::Tex3DU32FloatLevel: 207 case NVPTXISD::Tex3DU32FloatGrad: 208 case NVPTXISD::TexCubeFloatFloat: 209 case NVPTXISD::TexCubeFloatFloatLevel: 210 case NVPTXISD::TexCubeS32Float: 211 case NVPTXISD::TexCubeS32FloatLevel: 212 case NVPTXISD::TexCubeU32Float: 213 case NVPTXISD::TexCubeU32FloatLevel: 214 case NVPTXISD::TexCubeArrayFloatFloat: 215 case NVPTXISD::TexCubeArrayFloatFloatLevel: 216 case NVPTXISD::TexCubeArrayS32Float: 217 case NVPTXISD::TexCubeArrayS32FloatLevel: 218 case NVPTXISD::TexCubeArrayU32Float: 219 case NVPTXISD::TexCubeArrayU32FloatLevel: 220 case NVPTXISD::Tld4R2DFloatFloat: 221 case NVPTXISD::Tld4G2DFloatFloat: 222 case NVPTXISD::Tld4B2DFloatFloat: 223 case NVPTXISD::Tld4A2DFloatFloat: 224 case NVPTXISD::Tld4R2DS64Float: 225 case NVPTXISD::Tld4G2DS64Float: 226 case NVPTXISD::Tld4B2DS64Float: 227 case NVPTXISD::Tld4A2DS64Float: 228 case NVPTXISD::Tld4R2DU64Float: 229 case NVPTXISD::Tld4G2DU64Float: 230 case NVPTXISD::Tld4B2DU64Float: 231 case NVPTXISD::Tld4A2DU64Float: 232 case NVPTXISD::TexUnified1DFloatS32: 233 case NVPTXISD::TexUnified1DFloatFloat: 234 case NVPTXISD::TexUnified1DFloatFloatLevel: 235 case NVPTXISD::TexUnified1DFloatFloatGrad: 236 case NVPTXISD::TexUnified1DS32S32: 237 case NVPTXISD::TexUnified1DS32Float: 238 case NVPTXISD::TexUnified1DS32FloatLevel: 239 case NVPTXISD::TexUnified1DS32FloatGrad: 240 case NVPTXISD::TexUnified1DU32S32: 241 case NVPTXISD::TexUnified1DU32Float: 242 case NVPTXISD::TexUnified1DU32FloatLevel: 243 case NVPTXISD::TexUnified1DU32FloatGrad: 244 case NVPTXISD::TexUnified1DArrayFloatS32: 245 case NVPTXISD::TexUnified1DArrayFloatFloat: 246 case NVPTXISD::TexUnified1DArrayFloatFloatLevel: 247 case NVPTXISD::TexUnified1DArrayFloatFloatGrad: 248 case NVPTXISD::TexUnified1DArrayS32S32: 249 case NVPTXISD::TexUnified1DArrayS32Float: 250 case NVPTXISD::TexUnified1DArrayS32FloatLevel: 251 case NVPTXISD::TexUnified1DArrayS32FloatGrad: 252 case NVPTXISD::TexUnified1DArrayU32S32: 253 case NVPTXISD::TexUnified1DArrayU32Float: 254 case NVPTXISD::TexUnified1DArrayU32FloatLevel: 255 case NVPTXISD::TexUnified1DArrayU32FloatGrad: 256 case NVPTXISD::TexUnified2DFloatS32: 257 case NVPTXISD::TexUnified2DFloatFloat: 258 case NVPTXISD::TexUnified2DFloatFloatLevel: 259 case NVPTXISD::TexUnified2DFloatFloatGrad: 260 case NVPTXISD::TexUnified2DS32S32: 261 case NVPTXISD::TexUnified2DS32Float: 262 case NVPTXISD::TexUnified2DS32FloatLevel: 263 case NVPTXISD::TexUnified2DS32FloatGrad: 264 case NVPTXISD::TexUnified2DU32S32: 265 case NVPTXISD::TexUnified2DU32Float: 266 case NVPTXISD::TexUnified2DU32FloatLevel: 267 case NVPTXISD::TexUnified2DU32FloatGrad: 268 case NVPTXISD::TexUnified2DArrayFloatS32: 269 case NVPTXISD::TexUnified2DArrayFloatFloat: 270 case NVPTXISD::TexUnified2DArrayFloatFloatLevel: 271 case NVPTXISD::TexUnified2DArrayFloatFloatGrad: 272 case NVPTXISD::TexUnified2DArrayS32S32: 273 case NVPTXISD::TexUnified2DArrayS32Float: 274 case NVPTXISD::TexUnified2DArrayS32FloatLevel: 275 case NVPTXISD::TexUnified2DArrayS32FloatGrad: 276 case NVPTXISD::TexUnified2DArrayU32S32: 277 case NVPTXISD::TexUnified2DArrayU32Float: 278 case NVPTXISD::TexUnified2DArrayU32FloatLevel: 279 case NVPTXISD::TexUnified2DArrayU32FloatGrad: 280 case NVPTXISD::TexUnified3DFloatS32: 281 case NVPTXISD::TexUnified3DFloatFloat: 282 case NVPTXISD::TexUnified3DFloatFloatLevel: 283 case NVPTXISD::TexUnified3DFloatFloatGrad: 284 case NVPTXISD::TexUnified3DS32S32: 285 case NVPTXISD::TexUnified3DS32Float: 286 case NVPTXISD::TexUnified3DS32FloatLevel: 287 case NVPTXISD::TexUnified3DS32FloatGrad: 288 case NVPTXISD::TexUnified3DU32S32: 289 case NVPTXISD::TexUnified3DU32Float: 290 case NVPTXISD::TexUnified3DU32FloatLevel: 291 case NVPTXISD::TexUnified3DU32FloatGrad: 292 case NVPTXISD::TexUnifiedCubeFloatFloat: 293 case NVPTXISD::TexUnifiedCubeFloatFloatLevel: 294 case NVPTXISD::TexUnifiedCubeS32Float: 295 case NVPTXISD::TexUnifiedCubeS32FloatLevel: 296 case NVPTXISD::TexUnifiedCubeU32Float: 297 case NVPTXISD::TexUnifiedCubeU32FloatLevel: 298 case NVPTXISD::TexUnifiedCubeArrayFloatFloat: 299 case NVPTXISD::TexUnifiedCubeArrayFloatFloatLevel: 300 case NVPTXISD::TexUnifiedCubeArrayS32Float: 301 case NVPTXISD::TexUnifiedCubeArrayS32FloatLevel: 302 case NVPTXISD::TexUnifiedCubeArrayU32Float: 303 case NVPTXISD::TexUnifiedCubeArrayU32FloatLevel: 304 case NVPTXISD::Tld4UnifiedR2DFloatFloat: 305 case NVPTXISD::Tld4UnifiedG2DFloatFloat: 306 case NVPTXISD::Tld4UnifiedB2DFloatFloat: 307 case NVPTXISD::Tld4UnifiedA2DFloatFloat: 308 case NVPTXISD::Tld4UnifiedR2DS64Float: 309 case NVPTXISD::Tld4UnifiedG2DS64Float: 310 case NVPTXISD::Tld4UnifiedB2DS64Float: 311 case NVPTXISD::Tld4UnifiedA2DS64Float: 312 case NVPTXISD::Tld4UnifiedR2DU64Float: 313 case NVPTXISD::Tld4UnifiedG2DU64Float: 314 case NVPTXISD::Tld4UnifiedB2DU64Float: 315 case NVPTXISD::Tld4UnifiedA2DU64Float: 316 if (tryTextureIntrinsic(N)) 317 return; 318 break; 319 case NVPTXISD::Suld1DI8Clamp: 320 case NVPTXISD::Suld1DI16Clamp: 321 case NVPTXISD::Suld1DI32Clamp: 322 case NVPTXISD::Suld1DI64Clamp: 323 case NVPTXISD::Suld1DV2I8Clamp: 324 case NVPTXISD::Suld1DV2I16Clamp: 325 case NVPTXISD::Suld1DV2I32Clamp: 326 case NVPTXISD::Suld1DV2I64Clamp: 327 case NVPTXISD::Suld1DV4I8Clamp: 328 case NVPTXISD::Suld1DV4I16Clamp: 329 case NVPTXISD::Suld1DV4I32Clamp: 330 case NVPTXISD::Suld1DArrayI8Clamp: 331 case NVPTXISD::Suld1DArrayI16Clamp: 332 case NVPTXISD::Suld1DArrayI32Clamp: 333 case NVPTXISD::Suld1DArrayI64Clamp: 334 case NVPTXISD::Suld1DArrayV2I8Clamp: 335 case NVPTXISD::Suld1DArrayV2I16Clamp: 336 case NVPTXISD::Suld1DArrayV2I32Clamp: 337 case NVPTXISD::Suld1DArrayV2I64Clamp: 338 case NVPTXISD::Suld1DArrayV4I8Clamp: 339 case NVPTXISD::Suld1DArrayV4I16Clamp: 340 case NVPTXISD::Suld1DArrayV4I32Clamp: 341 case NVPTXISD::Suld2DI8Clamp: 342 case NVPTXISD::Suld2DI16Clamp: 343 case NVPTXISD::Suld2DI32Clamp: 344 case NVPTXISD::Suld2DI64Clamp: 345 case NVPTXISD::Suld2DV2I8Clamp: 346 case NVPTXISD::Suld2DV2I16Clamp: 347 case NVPTXISD::Suld2DV2I32Clamp: 348 case NVPTXISD::Suld2DV2I64Clamp: 349 case NVPTXISD::Suld2DV4I8Clamp: 350 case NVPTXISD::Suld2DV4I16Clamp: 351 case NVPTXISD::Suld2DV4I32Clamp: 352 case NVPTXISD::Suld2DArrayI8Clamp: 353 case NVPTXISD::Suld2DArrayI16Clamp: 354 case NVPTXISD::Suld2DArrayI32Clamp: 355 case NVPTXISD::Suld2DArrayI64Clamp: 356 case NVPTXISD::Suld2DArrayV2I8Clamp: 357 case NVPTXISD::Suld2DArrayV2I16Clamp: 358 case NVPTXISD::Suld2DArrayV2I32Clamp: 359 case NVPTXISD::Suld2DArrayV2I64Clamp: 360 case NVPTXISD::Suld2DArrayV4I8Clamp: 361 case NVPTXISD::Suld2DArrayV4I16Clamp: 362 case NVPTXISD::Suld2DArrayV4I32Clamp: 363 case NVPTXISD::Suld3DI8Clamp: 364 case NVPTXISD::Suld3DI16Clamp: 365 case NVPTXISD::Suld3DI32Clamp: 366 case NVPTXISD::Suld3DI64Clamp: 367 case NVPTXISD::Suld3DV2I8Clamp: 368 case NVPTXISD::Suld3DV2I16Clamp: 369 case NVPTXISD::Suld3DV2I32Clamp: 370 case NVPTXISD::Suld3DV2I64Clamp: 371 case NVPTXISD::Suld3DV4I8Clamp: 372 case NVPTXISD::Suld3DV4I16Clamp: 373 case NVPTXISD::Suld3DV4I32Clamp: 374 case NVPTXISD::Suld1DI8Trap: 375 case NVPTXISD::Suld1DI16Trap: 376 case NVPTXISD::Suld1DI32Trap: 377 case NVPTXISD::Suld1DI64Trap: 378 case NVPTXISD::Suld1DV2I8Trap: 379 case NVPTXISD::Suld1DV2I16Trap: 380 case NVPTXISD::Suld1DV2I32Trap: 381 case NVPTXISD::Suld1DV2I64Trap: 382 case NVPTXISD::Suld1DV4I8Trap: 383 case NVPTXISD::Suld1DV4I16Trap: 384 case NVPTXISD::Suld1DV4I32Trap: 385 case NVPTXISD::Suld1DArrayI8Trap: 386 case NVPTXISD::Suld1DArrayI16Trap: 387 case NVPTXISD::Suld1DArrayI32Trap: 388 case NVPTXISD::Suld1DArrayI64Trap: 389 case NVPTXISD::Suld1DArrayV2I8Trap: 390 case NVPTXISD::Suld1DArrayV2I16Trap: 391 case NVPTXISD::Suld1DArrayV2I32Trap: 392 case NVPTXISD::Suld1DArrayV2I64Trap: 393 case NVPTXISD::Suld1DArrayV4I8Trap: 394 case NVPTXISD::Suld1DArrayV4I16Trap: 395 case NVPTXISD::Suld1DArrayV4I32Trap: 396 case NVPTXISD::Suld2DI8Trap: 397 case NVPTXISD::Suld2DI16Trap: 398 case NVPTXISD::Suld2DI32Trap: 399 case NVPTXISD::Suld2DI64Trap: 400 case NVPTXISD::Suld2DV2I8Trap: 401 case NVPTXISD::Suld2DV2I16Trap: 402 case NVPTXISD::Suld2DV2I32Trap: 403 case NVPTXISD::Suld2DV2I64Trap: 404 case NVPTXISD::Suld2DV4I8Trap: 405 case NVPTXISD::Suld2DV4I16Trap: 406 case NVPTXISD::Suld2DV4I32Trap: 407 case NVPTXISD::Suld2DArrayI8Trap: 408 case NVPTXISD::Suld2DArrayI16Trap: 409 case NVPTXISD::Suld2DArrayI32Trap: 410 case NVPTXISD::Suld2DArrayI64Trap: 411 case NVPTXISD::Suld2DArrayV2I8Trap: 412 case NVPTXISD::Suld2DArrayV2I16Trap: 413 case NVPTXISD::Suld2DArrayV2I32Trap: 414 case NVPTXISD::Suld2DArrayV2I64Trap: 415 case NVPTXISD::Suld2DArrayV4I8Trap: 416 case NVPTXISD::Suld2DArrayV4I16Trap: 417 case NVPTXISD::Suld2DArrayV4I32Trap: 418 case NVPTXISD::Suld3DI8Trap: 419 case NVPTXISD::Suld3DI16Trap: 420 case NVPTXISD::Suld3DI32Trap: 421 case NVPTXISD::Suld3DI64Trap: 422 case NVPTXISD::Suld3DV2I8Trap: 423 case NVPTXISD::Suld3DV2I16Trap: 424 case NVPTXISD::Suld3DV2I32Trap: 425 case NVPTXISD::Suld3DV2I64Trap: 426 case NVPTXISD::Suld3DV4I8Trap: 427 case NVPTXISD::Suld3DV4I16Trap: 428 case NVPTXISD::Suld3DV4I32Trap: 429 case NVPTXISD::Suld1DI8Zero: 430 case NVPTXISD::Suld1DI16Zero: 431 case NVPTXISD::Suld1DI32Zero: 432 case NVPTXISD::Suld1DI64Zero: 433 case NVPTXISD::Suld1DV2I8Zero: 434 case NVPTXISD::Suld1DV2I16Zero: 435 case NVPTXISD::Suld1DV2I32Zero: 436 case NVPTXISD::Suld1DV2I64Zero: 437 case NVPTXISD::Suld1DV4I8Zero: 438 case NVPTXISD::Suld1DV4I16Zero: 439 case NVPTXISD::Suld1DV4I32Zero: 440 case NVPTXISD::Suld1DArrayI8Zero: 441 case NVPTXISD::Suld1DArrayI16Zero: 442 case NVPTXISD::Suld1DArrayI32Zero: 443 case NVPTXISD::Suld1DArrayI64Zero: 444 case NVPTXISD::Suld1DArrayV2I8Zero: 445 case NVPTXISD::Suld1DArrayV2I16Zero: 446 case NVPTXISD::Suld1DArrayV2I32Zero: 447 case NVPTXISD::Suld1DArrayV2I64Zero: 448 case NVPTXISD::Suld1DArrayV4I8Zero: 449 case NVPTXISD::Suld1DArrayV4I16Zero: 450 case NVPTXISD::Suld1DArrayV4I32Zero: 451 case NVPTXISD::Suld2DI8Zero: 452 case NVPTXISD::Suld2DI16Zero: 453 case NVPTXISD::Suld2DI32Zero: 454 case NVPTXISD::Suld2DI64Zero: 455 case NVPTXISD::Suld2DV2I8Zero: 456 case NVPTXISD::Suld2DV2I16Zero: 457 case NVPTXISD::Suld2DV2I32Zero: 458 case NVPTXISD::Suld2DV2I64Zero: 459 case NVPTXISD::Suld2DV4I8Zero: 460 case NVPTXISD::Suld2DV4I16Zero: 461 case NVPTXISD::Suld2DV4I32Zero: 462 case NVPTXISD::Suld2DArrayI8Zero: 463 case NVPTXISD::Suld2DArrayI16Zero: 464 case NVPTXISD::Suld2DArrayI32Zero: 465 case NVPTXISD::Suld2DArrayI64Zero: 466 case NVPTXISD::Suld2DArrayV2I8Zero: 467 case NVPTXISD::Suld2DArrayV2I16Zero: 468 case NVPTXISD::Suld2DArrayV2I32Zero: 469 case NVPTXISD::Suld2DArrayV2I64Zero: 470 case NVPTXISD::Suld2DArrayV4I8Zero: 471 case NVPTXISD::Suld2DArrayV4I16Zero: 472 case NVPTXISD::Suld2DArrayV4I32Zero: 473 case NVPTXISD::Suld3DI8Zero: 474 case NVPTXISD::Suld3DI16Zero: 475 case NVPTXISD::Suld3DI32Zero: 476 case NVPTXISD::Suld3DI64Zero: 477 case NVPTXISD::Suld3DV2I8Zero: 478 case NVPTXISD::Suld3DV2I16Zero: 479 case NVPTXISD::Suld3DV2I32Zero: 480 case NVPTXISD::Suld3DV2I64Zero: 481 case NVPTXISD::Suld3DV4I8Zero: 482 case NVPTXISD::Suld3DV4I16Zero: 483 case NVPTXISD::Suld3DV4I32Zero: 484 if (trySurfaceIntrinsic(N)) 485 return; 486 break; 487 case ISD::AND: 488 case ISD::SRA: 489 case ISD::SRL: 490 // Try to select BFE 491 if (tryBFE(N)) 492 return; 493 break; 494 case ISD::ADDRSPACECAST: 495 SelectAddrSpaceCast(N); 496 return; 497 case ISD::ConstantFP: 498 if (tryConstantFP16(N)) 499 return; 500 break; 501 default: 502 break; 503 } 504 SelectCode(N); 505 } 506 507 bool NVPTXDAGToDAGISel::tryIntrinsicChain(SDNode *N) { 508 unsigned IID = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue(); 509 switch (IID) { 510 default: 511 return false; 512 case Intrinsic::nvvm_ldg_global_f: 513 case Intrinsic::nvvm_ldg_global_i: 514 case Intrinsic::nvvm_ldg_global_p: 515 case Intrinsic::nvvm_ldu_global_f: 516 case Intrinsic::nvvm_ldu_global_i: 517 case Intrinsic::nvvm_ldu_global_p: 518 return tryLDGLDU(N); 519 } 520 } 521 522 // There's no way to specify FP16 immediates in .f16 ops, so we have to 523 // load them into an .f16 register first. 524 bool NVPTXDAGToDAGISel::tryConstantFP16(SDNode *N) { 525 if (N->getValueType(0) != MVT::f16) 526 return false; 527 SDValue Val = CurDAG->getTargetConstantFP( 528 cast<ConstantFPSDNode>(N)->getValueAPF(), SDLoc(N), MVT::f16); 529 SDNode *LoadConstF16 = 530 CurDAG->getMachineNode(NVPTX::LOAD_CONST_F16, SDLoc(N), MVT::f16, Val); 531 ReplaceNode(N, LoadConstF16); 532 return true; 533 } 534 535 // Map ISD:CONDCODE value to appropriate CmpMode expected by 536 // NVPTXInstPrinter::printCmpMode() 537 static unsigned getPTXCmpMode(const CondCodeSDNode &CondCode, bool FTZ) { 538 using NVPTX::PTXCmpMode::CmpMode; 539 unsigned PTXCmpMode = [](ISD::CondCode CC) { 540 switch (CC) { 541 default: 542 llvm_unreachable("Unexpected condition code."); 543 case ISD::SETOEQ: 544 return CmpMode::EQ; 545 case ISD::SETOGT: 546 return CmpMode::GT; 547 case ISD::SETOGE: 548 return CmpMode::GE; 549 case ISD::SETOLT: 550 return CmpMode::LT; 551 case ISD::SETOLE: 552 return CmpMode::LE; 553 case ISD::SETONE: 554 return CmpMode::NE; 555 case ISD::SETO: 556 return CmpMode::NUM; 557 case ISD::SETUO: 558 return CmpMode::NotANumber; 559 case ISD::SETUEQ: 560 return CmpMode::EQU; 561 case ISD::SETUGT: 562 return CmpMode::GTU; 563 case ISD::SETUGE: 564 return CmpMode::GEU; 565 case ISD::SETULT: 566 return CmpMode::LTU; 567 case ISD::SETULE: 568 return CmpMode::LEU; 569 case ISD::SETUNE: 570 return CmpMode::NEU; 571 case ISD::SETEQ: 572 return CmpMode::EQ; 573 case ISD::SETGT: 574 return CmpMode::GT; 575 case ISD::SETGE: 576 return CmpMode::GE; 577 case ISD::SETLT: 578 return CmpMode::LT; 579 case ISD::SETLE: 580 return CmpMode::LE; 581 case ISD::SETNE: 582 return CmpMode::NE; 583 } 584 }(CondCode.get()); 585 586 if (FTZ) 587 PTXCmpMode |= NVPTX::PTXCmpMode::FTZ_FLAG; 588 589 return PTXCmpMode; 590 } 591 592 bool NVPTXDAGToDAGISel::SelectSETP_F16X2(SDNode *N) { 593 unsigned PTXCmpMode = 594 getPTXCmpMode(*cast<CondCodeSDNode>(N->getOperand(2)), useF32FTZ()); 595 SDLoc DL(N); 596 SDNode *SetP = CurDAG->getMachineNode( 597 NVPTX::SETP_f16x2rr, DL, MVT::i1, MVT::i1, N->getOperand(0), 598 N->getOperand(1), CurDAG->getTargetConstant(PTXCmpMode, DL, MVT::i32)); 599 ReplaceNode(N, SetP); 600 return true; 601 } 602 603 // Find all instances of extract_vector_elt that use this v2f16 vector 604 // and coalesce them into a scattering move instruction. 605 bool NVPTXDAGToDAGISel::tryEXTRACT_VECTOR_ELEMENT(SDNode *N) { 606 SDValue Vector = N->getOperand(0); 607 608 // We only care about f16x2 as it's the only real vector type we 609 // need to deal with. 610 if (Vector.getSimpleValueType() != MVT::v2f16) 611 return false; 612 613 // Find and record all uses of this vector that extract element 0 or 1. 614 SmallVector<SDNode *, 4> E0, E1; 615 for (auto U : Vector.getNode()->uses()) { 616 if (U->getOpcode() != ISD::EXTRACT_VECTOR_ELT) 617 continue; 618 if (U->getOperand(0) != Vector) 619 continue; 620 if (const ConstantSDNode *IdxConst = 621 dyn_cast<ConstantSDNode>(U->getOperand(1))) { 622 if (IdxConst->getZExtValue() == 0) 623 E0.push_back(U); 624 else if (IdxConst->getZExtValue() == 1) 625 E1.push_back(U); 626 else 627 llvm_unreachable("Invalid vector index."); 628 } 629 } 630 631 // There's no point scattering f16x2 if we only ever access one 632 // element of it. 633 if (E0.empty() || E1.empty()) 634 return false; 635 636 unsigned Op = NVPTX::SplitF16x2; 637 // If the vector has been BITCAST'ed from i32, we can use original 638 // value directly and avoid register-to-register move. 639 SDValue Source = Vector; 640 if (Vector->getOpcode() == ISD::BITCAST) { 641 Op = NVPTX::SplitI32toF16x2; 642 Source = Vector->getOperand(0); 643 } 644 // Merge (f16 extractelt(V, 0), f16 extractelt(V,1)) 645 // into f16,f16 SplitF16x2(V) 646 SDNode *ScatterOp = 647 CurDAG->getMachineNode(Op, SDLoc(N), MVT::f16, MVT::f16, Source); 648 for (auto *Node : E0) 649 ReplaceUses(SDValue(Node, 0), SDValue(ScatterOp, 0)); 650 for (auto *Node : E1) 651 ReplaceUses(SDValue(Node, 0), SDValue(ScatterOp, 1)); 652 653 return true; 654 } 655 656 static unsigned int getCodeAddrSpace(MemSDNode *N) { 657 const Value *Src = N->getMemOperand()->getValue(); 658 659 if (!Src) 660 return NVPTX::PTXLdStInstCode::GENERIC; 661 662 if (auto *PT = dyn_cast<PointerType>(Src->getType())) { 663 switch (PT->getAddressSpace()) { 664 case llvm::ADDRESS_SPACE_LOCAL: return NVPTX::PTXLdStInstCode::LOCAL; 665 case llvm::ADDRESS_SPACE_GLOBAL: return NVPTX::PTXLdStInstCode::GLOBAL; 666 case llvm::ADDRESS_SPACE_SHARED: return NVPTX::PTXLdStInstCode::SHARED; 667 case llvm::ADDRESS_SPACE_GENERIC: return NVPTX::PTXLdStInstCode::GENERIC; 668 case llvm::ADDRESS_SPACE_PARAM: return NVPTX::PTXLdStInstCode::PARAM; 669 case llvm::ADDRESS_SPACE_CONST: return NVPTX::PTXLdStInstCode::CONSTANT; 670 default: break; 671 } 672 } 673 return NVPTX::PTXLdStInstCode::GENERIC; 674 } 675 676 static bool canLowerToLDG(MemSDNode *N, const NVPTXSubtarget &Subtarget, 677 unsigned CodeAddrSpace, MachineFunction *F) { 678 // We use ldg (i.e. ld.global.nc) for invariant loads from the global address 679 // space. 680 // 681 // We have two ways of identifying invariant loads: Loads may be explicitly 682 // marked as invariant, or we may infer them to be invariant. 683 // 684 // We currently infer invariance for loads from 685 // - constant global variables, and 686 // - kernel function pointer params that are noalias (i.e. __restrict) and 687 // never written to. 688 // 689 // TODO: Perform a more powerful invariance analysis (ideally IPO, and ideally 690 // not during the SelectionDAG phase). 691 // 692 // TODO: Infer invariance only at -O2. We still want to use ldg at -O0 for 693 // explicitly invariant loads because these are how clang tells us to use ldg 694 // when the user uses a builtin. 695 if (!Subtarget.hasLDG() || CodeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL) 696 return false; 697 698 if (N->isInvariant()) 699 return true; 700 701 bool IsKernelFn = isKernelFunction(F->getFunction()); 702 703 // We use getUnderlyingObjects() here instead of getUnderlyingObject() mainly 704 // because the former looks through phi nodes while the latter does not. We 705 // need to look through phi nodes to handle pointer induction variables. 706 SmallVector<const Value *, 8> Objs; 707 getUnderlyingObjects(N->getMemOperand()->getValue(), Objs); 708 709 return all_of(Objs, [&](const Value *V) { 710 if (auto *A = dyn_cast<const Argument>(V)) 711 return IsKernelFn && A->onlyReadsMemory() && A->hasNoAliasAttr(); 712 if (auto *GV = dyn_cast<const GlobalVariable>(V)) 713 return GV->isConstant(); 714 return false; 715 }); 716 } 717 718 bool NVPTXDAGToDAGISel::tryIntrinsicNoChain(SDNode *N) { 719 unsigned IID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue(); 720 switch (IID) { 721 default: 722 return false; 723 case Intrinsic::nvvm_texsurf_handle_internal: 724 SelectTexSurfHandle(N); 725 return true; 726 } 727 } 728 729 void NVPTXDAGToDAGISel::SelectTexSurfHandle(SDNode *N) { 730 // Op 0 is the intrinsic ID 731 SDValue Wrapper = N->getOperand(1); 732 SDValue GlobalVal = Wrapper.getOperand(0); 733 ReplaceNode(N, CurDAG->getMachineNode(NVPTX::texsurf_handles, SDLoc(N), 734 MVT::i64, GlobalVal)); 735 } 736 737 void NVPTXDAGToDAGISel::SelectAddrSpaceCast(SDNode *N) { 738 SDValue Src = N->getOperand(0); 739 AddrSpaceCastSDNode *CastN = cast<AddrSpaceCastSDNode>(N); 740 unsigned SrcAddrSpace = CastN->getSrcAddressSpace(); 741 unsigned DstAddrSpace = CastN->getDestAddressSpace(); 742 assert(SrcAddrSpace != DstAddrSpace && 743 "addrspacecast must be between different address spaces"); 744 745 if (DstAddrSpace == ADDRESS_SPACE_GENERIC) { 746 // Specific to generic 747 unsigned Opc; 748 switch (SrcAddrSpace) { 749 default: report_fatal_error("Bad address space in addrspacecast"); 750 case ADDRESS_SPACE_GLOBAL: 751 Opc = TM.is64Bit() ? NVPTX::cvta_global_yes_64 : NVPTX::cvta_global_yes; 752 break; 753 case ADDRESS_SPACE_SHARED: 754 Opc = TM.is64Bit() ? (useShortPointers() ? NVPTX::cvta_shared_yes_6432 755 : NVPTX::cvta_shared_yes_64) 756 : NVPTX::cvta_shared_yes; 757 break; 758 case ADDRESS_SPACE_CONST: 759 Opc = TM.is64Bit() ? (useShortPointers() ? NVPTX::cvta_const_yes_6432 760 : NVPTX::cvta_const_yes_64) 761 : NVPTX::cvta_const_yes; 762 break; 763 case ADDRESS_SPACE_LOCAL: 764 Opc = TM.is64Bit() ? (useShortPointers() ? NVPTX::cvta_local_yes_6432 765 : NVPTX::cvta_local_yes_64) 766 : NVPTX::cvta_local_yes; 767 break; 768 } 769 ReplaceNode(N, CurDAG->getMachineNode(Opc, SDLoc(N), N->getValueType(0), 770 Src)); 771 return; 772 } else { 773 // Generic to specific 774 if (SrcAddrSpace != 0) 775 report_fatal_error("Cannot cast between two non-generic address spaces"); 776 unsigned Opc; 777 switch (DstAddrSpace) { 778 default: report_fatal_error("Bad address space in addrspacecast"); 779 case ADDRESS_SPACE_GLOBAL: 780 Opc = TM.is64Bit() ? NVPTX::cvta_to_global_yes_64 781 : NVPTX::cvta_to_global_yes; 782 break; 783 case ADDRESS_SPACE_SHARED: 784 Opc = TM.is64Bit() ? (useShortPointers() ? NVPTX::cvta_to_shared_yes_3264 785 : NVPTX::cvta_to_shared_yes_64) 786 : NVPTX::cvta_to_shared_yes; 787 break; 788 case ADDRESS_SPACE_CONST: 789 Opc = TM.is64Bit() ? (useShortPointers() ? NVPTX::cvta_to_const_yes_3264 790 : NVPTX::cvta_to_const_yes_64) 791 : NVPTX::cvta_to_const_yes; 792 break; 793 case ADDRESS_SPACE_LOCAL: 794 Opc = TM.is64Bit() ? (useShortPointers() ? NVPTX::cvta_to_local_yes_3264 795 : NVPTX::cvta_to_local_yes_64) 796 : NVPTX::cvta_to_local_yes; 797 break; 798 case ADDRESS_SPACE_PARAM: 799 Opc = TM.is64Bit() ? NVPTX::nvvm_ptr_gen_to_param_64 800 : NVPTX::nvvm_ptr_gen_to_param; 801 break; 802 } 803 ReplaceNode(N, CurDAG->getMachineNode(Opc, SDLoc(N), N->getValueType(0), 804 Src)); 805 return; 806 } 807 } 808 809 // Helper function template to reduce amount of boilerplate code for 810 // opcode selection. 811 static Optional<unsigned> pickOpcodeForVT( 812 MVT::SimpleValueType VT, unsigned Opcode_i8, unsigned Opcode_i16, 813 unsigned Opcode_i32, Optional<unsigned> Opcode_i64, unsigned Opcode_f16, 814 unsigned Opcode_f16x2, unsigned Opcode_f32, Optional<unsigned> Opcode_f64) { 815 switch (VT) { 816 case MVT::i1: 817 case MVT::i8: 818 return Opcode_i8; 819 case MVT::i16: 820 return Opcode_i16; 821 case MVT::i32: 822 return Opcode_i32; 823 case MVT::i64: 824 return Opcode_i64; 825 case MVT::f16: 826 return Opcode_f16; 827 case MVT::v2f16: 828 return Opcode_f16x2; 829 case MVT::f32: 830 return Opcode_f32; 831 case MVT::f64: 832 return Opcode_f64; 833 default: 834 return None; 835 } 836 } 837 838 bool NVPTXDAGToDAGISel::tryLoad(SDNode *N) { 839 SDLoc dl(N); 840 MemSDNode *LD = cast<MemSDNode>(N); 841 assert(LD->readMem() && "Expected load"); 842 LoadSDNode *PlainLoad = dyn_cast<LoadSDNode>(N); 843 EVT LoadedVT = LD->getMemoryVT(); 844 SDNode *NVPTXLD = nullptr; 845 846 // do not support pre/post inc/dec 847 if (PlainLoad && PlainLoad->isIndexed()) 848 return false; 849 850 if (!LoadedVT.isSimple()) 851 return false; 852 853 AtomicOrdering Ordering = LD->getSuccessOrdering(); 854 // In order to lower atomic loads with stronger guarantees we would need to 855 // use load.acquire or insert fences. However these features were only added 856 // with PTX ISA 6.0 / sm_70. 857 // TODO: Check if we can actually use the new instructions and implement them. 858 if (isStrongerThanMonotonic(Ordering)) 859 return false; 860 861 // Address Space Setting 862 unsigned int CodeAddrSpace = getCodeAddrSpace(LD); 863 if (canLowerToLDG(LD, *Subtarget, CodeAddrSpace, MF)) { 864 return tryLDGLDU(N); 865 } 866 867 unsigned int PointerSize = 868 CurDAG->getDataLayout().getPointerSizeInBits(LD->getAddressSpace()); 869 870 // Volatile Setting 871 // - .volatile is only available for .global and .shared 872 // - .volatile has the same memory synchronization semantics as .relaxed.sys 873 bool isVolatile = LD->isVolatile() || Ordering == AtomicOrdering::Monotonic; 874 if (CodeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL && 875 CodeAddrSpace != NVPTX::PTXLdStInstCode::SHARED && 876 CodeAddrSpace != NVPTX::PTXLdStInstCode::GENERIC) 877 isVolatile = false; 878 879 // Type Setting: fromType + fromTypeWidth 880 // 881 // Sign : ISD::SEXTLOAD 882 // Unsign : ISD::ZEXTLOAD, ISD::NON_EXTLOAD or ISD::EXTLOAD and the 883 // type is integer 884 // Float : ISD::NON_EXTLOAD or ISD::EXTLOAD and the type is float 885 MVT SimpleVT = LoadedVT.getSimpleVT(); 886 MVT ScalarVT = SimpleVT.getScalarType(); 887 // Read at least 8 bits (predicates are stored as 8-bit values) 888 unsigned fromTypeWidth = std::max(8U, (unsigned)ScalarVT.getSizeInBits()); 889 unsigned int fromType; 890 891 // Vector Setting 892 unsigned vecType = NVPTX::PTXLdStInstCode::Scalar; 893 if (SimpleVT.isVector()) { 894 assert(LoadedVT == MVT::v2f16 && "Unexpected vector type"); 895 // v2f16 is loaded using ld.b32 896 fromTypeWidth = 32; 897 } 898 899 if (PlainLoad && (PlainLoad->getExtensionType() == ISD::SEXTLOAD)) 900 fromType = NVPTX::PTXLdStInstCode::Signed; 901 else if (ScalarVT.isFloatingPoint()) 902 // f16 uses .b16 as its storage type. 903 fromType = ScalarVT.SimpleTy == MVT::f16 ? NVPTX::PTXLdStInstCode::Untyped 904 : NVPTX::PTXLdStInstCode::Float; 905 else 906 fromType = NVPTX::PTXLdStInstCode::Unsigned; 907 908 // Create the machine instruction DAG 909 SDValue Chain = N->getOperand(0); 910 SDValue N1 = N->getOperand(1); 911 SDValue Addr; 912 SDValue Offset, Base; 913 Optional<unsigned> Opcode; 914 MVT::SimpleValueType TargetVT = LD->getSimpleValueType(0).SimpleTy; 915 916 if (SelectDirectAddr(N1, Addr)) { 917 Opcode = pickOpcodeForVT( 918 TargetVT, NVPTX::LD_i8_avar, NVPTX::LD_i16_avar, NVPTX::LD_i32_avar, 919 NVPTX::LD_i64_avar, NVPTX::LD_f16_avar, NVPTX::LD_f16x2_avar, 920 NVPTX::LD_f32_avar, NVPTX::LD_f64_avar); 921 if (!Opcode) 922 return false; 923 SDValue Ops[] = { getI32Imm(isVolatile, dl), getI32Imm(CodeAddrSpace, dl), 924 getI32Imm(vecType, dl), getI32Imm(fromType, dl), 925 getI32Imm(fromTypeWidth, dl), Addr, Chain }; 926 NVPTXLD = CurDAG->getMachineNode(Opcode.getValue(), dl, TargetVT, 927 MVT::Other, Ops); 928 } else if (PointerSize == 64 ? SelectADDRsi64(N1.getNode(), N1, Base, Offset) 929 : SelectADDRsi(N1.getNode(), N1, Base, Offset)) { 930 Opcode = pickOpcodeForVT(TargetVT, NVPTX::LD_i8_asi, NVPTX::LD_i16_asi, 931 NVPTX::LD_i32_asi, NVPTX::LD_i64_asi, 932 NVPTX::LD_f16_asi, NVPTX::LD_f16x2_asi, 933 NVPTX::LD_f32_asi, NVPTX::LD_f64_asi); 934 if (!Opcode) 935 return false; 936 SDValue Ops[] = { getI32Imm(isVolatile, dl), getI32Imm(CodeAddrSpace, dl), 937 getI32Imm(vecType, dl), getI32Imm(fromType, dl), 938 getI32Imm(fromTypeWidth, dl), Base, Offset, Chain }; 939 NVPTXLD = CurDAG->getMachineNode(Opcode.getValue(), dl, TargetVT, 940 MVT::Other, Ops); 941 } else if (PointerSize == 64 ? SelectADDRri64(N1.getNode(), N1, Base, Offset) 942 : SelectADDRri(N1.getNode(), N1, Base, Offset)) { 943 if (PointerSize == 64) 944 Opcode = pickOpcodeForVT( 945 TargetVT, NVPTX::LD_i8_ari_64, NVPTX::LD_i16_ari_64, 946 NVPTX::LD_i32_ari_64, NVPTX::LD_i64_ari_64, NVPTX::LD_f16_ari_64, 947 NVPTX::LD_f16x2_ari_64, NVPTX::LD_f32_ari_64, NVPTX::LD_f64_ari_64); 948 else 949 Opcode = pickOpcodeForVT( 950 TargetVT, NVPTX::LD_i8_ari, NVPTX::LD_i16_ari, NVPTX::LD_i32_ari, 951 NVPTX::LD_i64_ari, NVPTX::LD_f16_ari, NVPTX::LD_f16x2_ari, 952 NVPTX::LD_f32_ari, NVPTX::LD_f64_ari); 953 if (!Opcode) 954 return false; 955 SDValue Ops[] = { getI32Imm(isVolatile, dl), getI32Imm(CodeAddrSpace, dl), 956 getI32Imm(vecType, dl), getI32Imm(fromType, dl), 957 getI32Imm(fromTypeWidth, dl), Base, Offset, Chain }; 958 NVPTXLD = CurDAG->getMachineNode(Opcode.getValue(), dl, TargetVT, 959 MVT::Other, Ops); 960 } else { 961 if (PointerSize == 64) 962 Opcode = pickOpcodeForVT( 963 TargetVT, NVPTX::LD_i8_areg_64, NVPTX::LD_i16_areg_64, 964 NVPTX::LD_i32_areg_64, NVPTX::LD_i64_areg_64, NVPTX::LD_f16_areg_64, 965 NVPTX::LD_f16x2_areg_64, NVPTX::LD_f32_areg_64, 966 NVPTX::LD_f64_areg_64); 967 else 968 Opcode = pickOpcodeForVT( 969 TargetVT, NVPTX::LD_i8_areg, NVPTX::LD_i16_areg, NVPTX::LD_i32_areg, 970 NVPTX::LD_i64_areg, NVPTX::LD_f16_areg, NVPTX::LD_f16x2_areg, 971 NVPTX::LD_f32_areg, NVPTX::LD_f64_areg); 972 if (!Opcode) 973 return false; 974 SDValue Ops[] = { getI32Imm(isVolatile, dl), getI32Imm(CodeAddrSpace, dl), 975 getI32Imm(vecType, dl), getI32Imm(fromType, dl), 976 getI32Imm(fromTypeWidth, dl), N1, Chain }; 977 NVPTXLD = CurDAG->getMachineNode(Opcode.getValue(), dl, TargetVT, 978 MVT::Other, Ops); 979 } 980 981 if (!NVPTXLD) 982 return false; 983 984 MachineMemOperand *MemRef = cast<MemSDNode>(N)->getMemOperand(); 985 CurDAG->setNodeMemRefs(cast<MachineSDNode>(NVPTXLD), {MemRef}); 986 987 ReplaceNode(N, NVPTXLD); 988 return true; 989 } 990 991 bool NVPTXDAGToDAGISel::tryLoadVector(SDNode *N) { 992 993 SDValue Chain = N->getOperand(0); 994 SDValue Op1 = N->getOperand(1); 995 SDValue Addr, Offset, Base; 996 Optional<unsigned> Opcode; 997 SDLoc DL(N); 998 SDNode *LD; 999 MemSDNode *MemSD = cast<MemSDNode>(N); 1000 EVT LoadedVT = MemSD->getMemoryVT(); 1001 1002 if (!LoadedVT.isSimple()) 1003 return false; 1004 1005 // Address Space Setting 1006 unsigned int CodeAddrSpace = getCodeAddrSpace(MemSD); 1007 if (canLowerToLDG(MemSD, *Subtarget, CodeAddrSpace, MF)) { 1008 return tryLDGLDU(N); 1009 } 1010 1011 unsigned int PointerSize = 1012 CurDAG->getDataLayout().getPointerSizeInBits(MemSD->getAddressSpace()); 1013 1014 // Volatile Setting 1015 // - .volatile is only availalble for .global and .shared 1016 bool IsVolatile = MemSD->isVolatile(); 1017 if (CodeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL && 1018 CodeAddrSpace != NVPTX::PTXLdStInstCode::SHARED && 1019 CodeAddrSpace != NVPTX::PTXLdStInstCode::GENERIC) 1020 IsVolatile = false; 1021 1022 // Vector Setting 1023 MVT SimpleVT = LoadedVT.getSimpleVT(); 1024 1025 // Type Setting: fromType + fromTypeWidth 1026 // 1027 // Sign : ISD::SEXTLOAD 1028 // Unsign : ISD::ZEXTLOAD, ISD::NON_EXTLOAD or ISD::EXTLOAD and the 1029 // type is integer 1030 // Float : ISD::NON_EXTLOAD or ISD::EXTLOAD and the type is float 1031 MVT ScalarVT = SimpleVT.getScalarType(); 1032 // Read at least 8 bits (predicates are stored as 8-bit values) 1033 unsigned FromTypeWidth = std::max(8U, (unsigned)ScalarVT.getSizeInBits()); 1034 unsigned int FromType; 1035 // The last operand holds the original LoadSDNode::getExtensionType() value 1036 unsigned ExtensionType = cast<ConstantSDNode>( 1037 N->getOperand(N->getNumOperands() - 1))->getZExtValue(); 1038 if (ExtensionType == ISD::SEXTLOAD) 1039 FromType = NVPTX::PTXLdStInstCode::Signed; 1040 else if (ScalarVT.isFloatingPoint()) 1041 FromType = ScalarVT.SimpleTy == MVT::f16 ? NVPTX::PTXLdStInstCode::Untyped 1042 : NVPTX::PTXLdStInstCode::Float; 1043 else 1044 FromType = NVPTX::PTXLdStInstCode::Unsigned; 1045 1046 unsigned VecType; 1047 1048 switch (N->getOpcode()) { 1049 case NVPTXISD::LoadV2: 1050 VecType = NVPTX::PTXLdStInstCode::V2; 1051 break; 1052 case NVPTXISD::LoadV4: 1053 VecType = NVPTX::PTXLdStInstCode::V4; 1054 break; 1055 default: 1056 return false; 1057 } 1058 1059 EVT EltVT = N->getValueType(0); 1060 1061 // v8f16 is a special case. PTX doesn't have ld.v8.f16 1062 // instruction. Instead, we split the vector into v2f16 chunks and 1063 // load them with ld.v4.b32. 1064 if (EltVT == MVT::v2f16) { 1065 assert(N->getOpcode() == NVPTXISD::LoadV4 && "Unexpected load opcode."); 1066 EltVT = MVT::i32; 1067 FromType = NVPTX::PTXLdStInstCode::Untyped; 1068 FromTypeWidth = 32; 1069 } 1070 1071 if (SelectDirectAddr(Op1, Addr)) { 1072 switch (N->getOpcode()) { 1073 default: 1074 return false; 1075 case NVPTXISD::LoadV2: 1076 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, 1077 NVPTX::LDV_i8_v2_avar, NVPTX::LDV_i16_v2_avar, 1078 NVPTX::LDV_i32_v2_avar, NVPTX::LDV_i64_v2_avar, 1079 NVPTX::LDV_f16_v2_avar, NVPTX::LDV_f16x2_v2_avar, 1080 NVPTX::LDV_f32_v2_avar, NVPTX::LDV_f64_v2_avar); 1081 break; 1082 case NVPTXISD::LoadV4: 1083 Opcode = 1084 pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, NVPTX::LDV_i8_v4_avar, 1085 NVPTX::LDV_i16_v4_avar, NVPTX::LDV_i32_v4_avar, None, 1086 NVPTX::LDV_f16_v4_avar, NVPTX::LDV_f16x2_v4_avar, 1087 NVPTX::LDV_f32_v4_avar, None); 1088 break; 1089 } 1090 if (!Opcode) 1091 return false; 1092 SDValue Ops[] = { getI32Imm(IsVolatile, DL), getI32Imm(CodeAddrSpace, DL), 1093 getI32Imm(VecType, DL), getI32Imm(FromType, DL), 1094 getI32Imm(FromTypeWidth, DL), Addr, Chain }; 1095 LD = CurDAG->getMachineNode(Opcode.getValue(), DL, N->getVTList(), Ops); 1096 } else if (PointerSize == 64 1097 ? SelectADDRsi64(Op1.getNode(), Op1, Base, Offset) 1098 : SelectADDRsi(Op1.getNode(), Op1, Base, Offset)) { 1099 switch (N->getOpcode()) { 1100 default: 1101 return false; 1102 case NVPTXISD::LoadV2: 1103 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, 1104 NVPTX::LDV_i8_v2_asi, NVPTX::LDV_i16_v2_asi, 1105 NVPTX::LDV_i32_v2_asi, NVPTX::LDV_i64_v2_asi, 1106 NVPTX::LDV_f16_v2_asi, NVPTX::LDV_f16x2_v2_asi, 1107 NVPTX::LDV_f32_v2_asi, NVPTX::LDV_f64_v2_asi); 1108 break; 1109 case NVPTXISD::LoadV4: 1110 Opcode = 1111 pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, NVPTX::LDV_i8_v4_asi, 1112 NVPTX::LDV_i16_v4_asi, NVPTX::LDV_i32_v4_asi, None, 1113 NVPTX::LDV_f16_v4_asi, NVPTX::LDV_f16x2_v4_asi, 1114 NVPTX::LDV_f32_v4_asi, None); 1115 break; 1116 } 1117 if (!Opcode) 1118 return false; 1119 SDValue Ops[] = { getI32Imm(IsVolatile, DL), getI32Imm(CodeAddrSpace, DL), 1120 getI32Imm(VecType, DL), getI32Imm(FromType, DL), 1121 getI32Imm(FromTypeWidth, DL), Base, Offset, Chain }; 1122 LD = CurDAG->getMachineNode(Opcode.getValue(), DL, N->getVTList(), Ops); 1123 } else if (PointerSize == 64 1124 ? SelectADDRri64(Op1.getNode(), Op1, Base, Offset) 1125 : SelectADDRri(Op1.getNode(), Op1, Base, Offset)) { 1126 if (PointerSize == 64) { 1127 switch (N->getOpcode()) { 1128 default: 1129 return false; 1130 case NVPTXISD::LoadV2: 1131 Opcode = pickOpcodeForVT( 1132 EltVT.getSimpleVT().SimpleTy, NVPTX::LDV_i8_v2_ari_64, 1133 NVPTX::LDV_i16_v2_ari_64, NVPTX::LDV_i32_v2_ari_64, 1134 NVPTX::LDV_i64_v2_ari_64, NVPTX::LDV_f16_v2_ari_64, 1135 NVPTX::LDV_f16x2_v2_ari_64, NVPTX::LDV_f32_v2_ari_64, 1136 NVPTX::LDV_f64_v2_ari_64); 1137 break; 1138 case NVPTXISD::LoadV4: 1139 Opcode = pickOpcodeForVT( 1140 EltVT.getSimpleVT().SimpleTy, NVPTX::LDV_i8_v4_ari_64, 1141 NVPTX::LDV_i16_v4_ari_64, NVPTX::LDV_i32_v4_ari_64, None, 1142 NVPTX::LDV_f16_v4_ari_64, NVPTX::LDV_f16x2_v4_ari_64, 1143 NVPTX::LDV_f32_v4_ari_64, None); 1144 break; 1145 } 1146 } else { 1147 switch (N->getOpcode()) { 1148 default: 1149 return false; 1150 case NVPTXISD::LoadV2: 1151 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, 1152 NVPTX::LDV_i8_v2_ari, NVPTX::LDV_i16_v2_ari, 1153 NVPTX::LDV_i32_v2_ari, NVPTX::LDV_i64_v2_ari, 1154 NVPTX::LDV_f16_v2_ari, NVPTX::LDV_f16x2_v2_ari, 1155 NVPTX::LDV_f32_v2_ari, NVPTX::LDV_f64_v2_ari); 1156 break; 1157 case NVPTXISD::LoadV4: 1158 Opcode = 1159 pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, NVPTX::LDV_i8_v4_ari, 1160 NVPTX::LDV_i16_v4_ari, NVPTX::LDV_i32_v4_ari, None, 1161 NVPTX::LDV_f16_v4_ari, NVPTX::LDV_f16x2_v4_ari, 1162 NVPTX::LDV_f32_v4_ari, None); 1163 break; 1164 } 1165 } 1166 if (!Opcode) 1167 return false; 1168 SDValue Ops[] = { getI32Imm(IsVolatile, DL), getI32Imm(CodeAddrSpace, DL), 1169 getI32Imm(VecType, DL), getI32Imm(FromType, DL), 1170 getI32Imm(FromTypeWidth, DL), Base, Offset, Chain }; 1171 1172 LD = CurDAG->getMachineNode(Opcode.getValue(), DL, N->getVTList(), Ops); 1173 } else { 1174 if (PointerSize == 64) { 1175 switch (N->getOpcode()) { 1176 default: 1177 return false; 1178 case NVPTXISD::LoadV2: 1179 Opcode = pickOpcodeForVT( 1180 EltVT.getSimpleVT().SimpleTy, NVPTX::LDV_i8_v2_areg_64, 1181 NVPTX::LDV_i16_v2_areg_64, NVPTX::LDV_i32_v2_areg_64, 1182 NVPTX::LDV_i64_v2_areg_64, NVPTX::LDV_f16_v2_areg_64, 1183 NVPTX::LDV_f16x2_v2_areg_64, NVPTX::LDV_f32_v2_areg_64, 1184 NVPTX::LDV_f64_v2_areg_64); 1185 break; 1186 case NVPTXISD::LoadV4: 1187 Opcode = pickOpcodeForVT( 1188 EltVT.getSimpleVT().SimpleTy, NVPTX::LDV_i8_v4_areg_64, 1189 NVPTX::LDV_i16_v4_areg_64, NVPTX::LDV_i32_v4_areg_64, None, 1190 NVPTX::LDV_f16_v4_areg_64, NVPTX::LDV_f16x2_v4_areg_64, 1191 NVPTX::LDV_f32_v4_areg_64, None); 1192 break; 1193 } 1194 } else { 1195 switch (N->getOpcode()) { 1196 default: 1197 return false; 1198 case NVPTXISD::LoadV2: 1199 Opcode = 1200 pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, NVPTX::LDV_i8_v2_areg, 1201 NVPTX::LDV_i16_v2_areg, NVPTX::LDV_i32_v2_areg, 1202 NVPTX::LDV_i64_v2_areg, NVPTX::LDV_f16_v2_areg, 1203 NVPTX::LDV_f16x2_v2_areg, NVPTX::LDV_f32_v2_areg, 1204 NVPTX::LDV_f64_v2_areg); 1205 break; 1206 case NVPTXISD::LoadV4: 1207 Opcode = pickOpcodeForVT( 1208 EltVT.getSimpleVT().SimpleTy, NVPTX::LDV_i8_v4_areg, 1209 NVPTX::LDV_i16_v4_areg, NVPTX::LDV_i32_v4_areg, None, 1210 NVPTX::LDV_f16_v4_areg, NVPTX::LDV_f16x2_v4_areg, 1211 NVPTX::LDV_f32_v4_areg, None); 1212 break; 1213 } 1214 } 1215 if (!Opcode) 1216 return false; 1217 SDValue Ops[] = { getI32Imm(IsVolatile, DL), getI32Imm(CodeAddrSpace, DL), 1218 getI32Imm(VecType, DL), getI32Imm(FromType, DL), 1219 getI32Imm(FromTypeWidth, DL), Op1, Chain }; 1220 LD = CurDAG->getMachineNode(Opcode.getValue(), DL, N->getVTList(), Ops); 1221 } 1222 1223 MachineMemOperand *MemRef = cast<MemSDNode>(N)->getMemOperand(); 1224 CurDAG->setNodeMemRefs(cast<MachineSDNode>(LD), {MemRef}); 1225 1226 ReplaceNode(N, LD); 1227 return true; 1228 } 1229 1230 bool NVPTXDAGToDAGISel::tryLDGLDU(SDNode *N) { 1231 1232 SDValue Chain = N->getOperand(0); 1233 SDValue Op1; 1234 MemSDNode *Mem; 1235 bool IsLDG = true; 1236 1237 // If this is an LDG intrinsic, the address is the third operand. If its an 1238 // LDG/LDU SD node (from custom vector handling), then its the second operand 1239 if (N->getOpcode() == ISD::INTRINSIC_W_CHAIN) { 1240 Op1 = N->getOperand(2); 1241 Mem = cast<MemIntrinsicSDNode>(N); 1242 unsigned IID = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue(); 1243 switch (IID) { 1244 default: 1245 return false; 1246 case Intrinsic::nvvm_ldg_global_f: 1247 case Intrinsic::nvvm_ldg_global_i: 1248 case Intrinsic::nvvm_ldg_global_p: 1249 IsLDG = true; 1250 break; 1251 case Intrinsic::nvvm_ldu_global_f: 1252 case Intrinsic::nvvm_ldu_global_i: 1253 case Intrinsic::nvvm_ldu_global_p: 1254 IsLDG = false; 1255 break; 1256 } 1257 } else { 1258 Op1 = N->getOperand(1); 1259 Mem = cast<MemSDNode>(N); 1260 } 1261 1262 Optional<unsigned> Opcode; 1263 SDLoc DL(N); 1264 SDNode *LD; 1265 SDValue Base, Offset, Addr; 1266 1267 EVT EltVT = Mem->getMemoryVT(); 1268 unsigned NumElts = 1; 1269 if (EltVT.isVector()) { 1270 NumElts = EltVT.getVectorNumElements(); 1271 EltVT = EltVT.getVectorElementType(); 1272 // vectors of f16 are loaded/stored as multiples of v2f16 elements. 1273 if (EltVT == MVT::f16 && N->getValueType(0) == MVT::v2f16) { 1274 assert(NumElts % 2 == 0 && "Vector must have even number of elements"); 1275 EltVT = MVT::v2f16; 1276 NumElts /= 2; 1277 } 1278 } 1279 1280 // Build the "promoted" result VTList for the load. If we are really loading 1281 // i8s, then the return type will be promoted to i16 since we do not expose 1282 // 8-bit registers in NVPTX. 1283 EVT NodeVT = (EltVT == MVT::i8) ? MVT::i16 : EltVT; 1284 SmallVector<EVT, 5> InstVTs; 1285 for (unsigned i = 0; i != NumElts; ++i) { 1286 InstVTs.push_back(NodeVT); 1287 } 1288 InstVTs.push_back(MVT::Other); 1289 SDVTList InstVTList = CurDAG->getVTList(InstVTs); 1290 1291 if (SelectDirectAddr(Op1, Addr)) { 1292 switch (N->getOpcode()) { 1293 default: 1294 return false; 1295 case ISD::LOAD: 1296 case ISD::INTRINSIC_W_CHAIN: 1297 if (IsLDG) 1298 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, 1299 NVPTX::INT_PTX_LDG_GLOBAL_i8avar, 1300 NVPTX::INT_PTX_LDG_GLOBAL_i16avar, 1301 NVPTX::INT_PTX_LDG_GLOBAL_i32avar, 1302 NVPTX::INT_PTX_LDG_GLOBAL_i64avar, 1303 NVPTX::INT_PTX_LDG_GLOBAL_f16avar, 1304 NVPTX::INT_PTX_LDG_GLOBAL_f16x2avar, 1305 NVPTX::INT_PTX_LDG_GLOBAL_f32avar, 1306 NVPTX::INT_PTX_LDG_GLOBAL_f64avar); 1307 else 1308 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, 1309 NVPTX::INT_PTX_LDU_GLOBAL_i8avar, 1310 NVPTX::INT_PTX_LDU_GLOBAL_i16avar, 1311 NVPTX::INT_PTX_LDU_GLOBAL_i32avar, 1312 NVPTX::INT_PTX_LDU_GLOBAL_i64avar, 1313 NVPTX::INT_PTX_LDU_GLOBAL_f16avar, 1314 NVPTX::INT_PTX_LDU_GLOBAL_f16x2avar, 1315 NVPTX::INT_PTX_LDU_GLOBAL_f32avar, 1316 NVPTX::INT_PTX_LDU_GLOBAL_f64avar); 1317 break; 1318 case NVPTXISD::LoadV2: 1319 case NVPTXISD::LDGV2: 1320 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, 1321 NVPTX::INT_PTX_LDG_G_v2i8_ELE_avar, 1322 NVPTX::INT_PTX_LDG_G_v2i16_ELE_avar, 1323 NVPTX::INT_PTX_LDG_G_v2i32_ELE_avar, 1324 NVPTX::INT_PTX_LDG_G_v2i64_ELE_avar, 1325 NVPTX::INT_PTX_LDG_G_v2f16_ELE_avar, 1326 NVPTX::INT_PTX_LDG_G_v2f16x2_ELE_avar, 1327 NVPTX::INT_PTX_LDG_G_v2f32_ELE_avar, 1328 NVPTX::INT_PTX_LDG_G_v2f64_ELE_avar); 1329 break; 1330 case NVPTXISD::LDUV2: 1331 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, 1332 NVPTX::INT_PTX_LDU_G_v2i8_ELE_avar, 1333 NVPTX::INT_PTX_LDU_G_v2i16_ELE_avar, 1334 NVPTX::INT_PTX_LDU_G_v2i32_ELE_avar, 1335 NVPTX::INT_PTX_LDU_G_v2i64_ELE_avar, 1336 NVPTX::INT_PTX_LDU_G_v2f16_ELE_avar, 1337 NVPTX::INT_PTX_LDU_G_v2f16x2_ELE_avar, 1338 NVPTX::INT_PTX_LDU_G_v2f32_ELE_avar, 1339 NVPTX::INT_PTX_LDU_G_v2f64_ELE_avar); 1340 break; 1341 case NVPTXISD::LoadV4: 1342 case NVPTXISD::LDGV4: 1343 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, 1344 NVPTX::INT_PTX_LDG_G_v4i8_ELE_avar, 1345 NVPTX::INT_PTX_LDG_G_v4i16_ELE_avar, 1346 NVPTX::INT_PTX_LDG_G_v4i32_ELE_avar, None, 1347 NVPTX::INT_PTX_LDG_G_v4f16_ELE_avar, 1348 NVPTX::INT_PTX_LDG_G_v4f16x2_ELE_avar, 1349 NVPTX::INT_PTX_LDG_G_v4f32_ELE_avar, None); 1350 break; 1351 case NVPTXISD::LDUV4: 1352 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, 1353 NVPTX::INT_PTX_LDU_G_v4i8_ELE_avar, 1354 NVPTX::INT_PTX_LDU_G_v4i16_ELE_avar, 1355 NVPTX::INT_PTX_LDU_G_v4i32_ELE_avar, None, 1356 NVPTX::INT_PTX_LDU_G_v4f16_ELE_avar, 1357 NVPTX::INT_PTX_LDU_G_v4f16x2_ELE_avar, 1358 NVPTX::INT_PTX_LDU_G_v4f32_ELE_avar, None); 1359 break; 1360 } 1361 if (!Opcode) 1362 return false; 1363 SDValue Ops[] = { Addr, Chain }; 1364 LD = CurDAG->getMachineNode(Opcode.getValue(), DL, InstVTList, Ops); 1365 } else if (TM.is64Bit() ? SelectADDRri64(Op1.getNode(), Op1, Base, Offset) 1366 : SelectADDRri(Op1.getNode(), Op1, Base, Offset)) { 1367 if (TM.is64Bit()) { 1368 switch (N->getOpcode()) { 1369 default: 1370 return false; 1371 case ISD::LOAD: 1372 case ISD::INTRINSIC_W_CHAIN: 1373 if (IsLDG) 1374 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, 1375 NVPTX::INT_PTX_LDG_GLOBAL_i8ari64, 1376 NVPTX::INT_PTX_LDG_GLOBAL_i16ari64, 1377 NVPTX::INT_PTX_LDG_GLOBAL_i32ari64, 1378 NVPTX::INT_PTX_LDG_GLOBAL_i64ari64, 1379 NVPTX::INT_PTX_LDG_GLOBAL_f16ari64, 1380 NVPTX::INT_PTX_LDG_GLOBAL_f16x2ari64, 1381 NVPTX::INT_PTX_LDG_GLOBAL_f32ari64, 1382 NVPTX::INT_PTX_LDG_GLOBAL_f64ari64); 1383 else 1384 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, 1385 NVPTX::INT_PTX_LDU_GLOBAL_i8ari64, 1386 NVPTX::INT_PTX_LDU_GLOBAL_i16ari64, 1387 NVPTX::INT_PTX_LDU_GLOBAL_i32ari64, 1388 NVPTX::INT_PTX_LDU_GLOBAL_i64ari64, 1389 NVPTX::INT_PTX_LDU_GLOBAL_f16ari64, 1390 NVPTX::INT_PTX_LDU_GLOBAL_f16x2ari64, 1391 NVPTX::INT_PTX_LDU_GLOBAL_f32ari64, 1392 NVPTX::INT_PTX_LDU_GLOBAL_f64ari64); 1393 break; 1394 case NVPTXISD::LoadV2: 1395 case NVPTXISD::LDGV2: 1396 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, 1397 NVPTX::INT_PTX_LDG_G_v2i8_ELE_ari64, 1398 NVPTX::INT_PTX_LDG_G_v2i16_ELE_ari64, 1399 NVPTX::INT_PTX_LDG_G_v2i32_ELE_ari64, 1400 NVPTX::INT_PTX_LDG_G_v2i64_ELE_ari64, 1401 NVPTX::INT_PTX_LDG_G_v2f16_ELE_ari64, 1402 NVPTX::INT_PTX_LDG_G_v2f16x2_ELE_ari64, 1403 NVPTX::INT_PTX_LDG_G_v2f32_ELE_ari64, 1404 NVPTX::INT_PTX_LDG_G_v2f64_ELE_ari64); 1405 break; 1406 case NVPTXISD::LDUV2: 1407 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, 1408 NVPTX::INT_PTX_LDU_G_v2i8_ELE_ari64, 1409 NVPTX::INT_PTX_LDU_G_v2i16_ELE_ari64, 1410 NVPTX::INT_PTX_LDU_G_v2i32_ELE_ari64, 1411 NVPTX::INT_PTX_LDU_G_v2i64_ELE_ari64, 1412 NVPTX::INT_PTX_LDU_G_v2f16_ELE_ari64, 1413 NVPTX::INT_PTX_LDU_G_v2f16x2_ELE_ari64, 1414 NVPTX::INT_PTX_LDU_G_v2f32_ELE_ari64, 1415 NVPTX::INT_PTX_LDU_G_v2f64_ELE_ari64); 1416 break; 1417 case NVPTXISD::LoadV4: 1418 case NVPTXISD::LDGV4: 1419 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, 1420 NVPTX::INT_PTX_LDG_G_v4i8_ELE_ari64, 1421 NVPTX::INT_PTX_LDG_G_v4i16_ELE_ari64, 1422 NVPTX::INT_PTX_LDG_G_v4i32_ELE_ari64, None, 1423 NVPTX::INT_PTX_LDG_G_v4f16_ELE_ari64, 1424 NVPTX::INT_PTX_LDG_G_v4f16x2_ELE_ari64, 1425 NVPTX::INT_PTX_LDG_G_v4f32_ELE_ari64, None); 1426 break; 1427 case NVPTXISD::LDUV4: 1428 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, 1429 NVPTX::INT_PTX_LDU_G_v4i8_ELE_ari64, 1430 NVPTX::INT_PTX_LDU_G_v4i16_ELE_ari64, 1431 NVPTX::INT_PTX_LDU_G_v4i32_ELE_ari64, None, 1432 NVPTX::INT_PTX_LDU_G_v4f16_ELE_ari64, 1433 NVPTX::INT_PTX_LDU_G_v4f16x2_ELE_ari64, 1434 NVPTX::INT_PTX_LDU_G_v4f32_ELE_ari64, None); 1435 break; 1436 } 1437 } else { 1438 switch (N->getOpcode()) { 1439 default: 1440 return false; 1441 case ISD::LOAD: 1442 case ISD::INTRINSIC_W_CHAIN: 1443 if (IsLDG) 1444 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, 1445 NVPTX::INT_PTX_LDG_GLOBAL_i8ari, 1446 NVPTX::INT_PTX_LDG_GLOBAL_i16ari, 1447 NVPTX::INT_PTX_LDG_GLOBAL_i32ari, 1448 NVPTX::INT_PTX_LDG_GLOBAL_i64ari, 1449 NVPTX::INT_PTX_LDG_GLOBAL_f16ari, 1450 NVPTX::INT_PTX_LDG_GLOBAL_f16x2ari, 1451 NVPTX::INT_PTX_LDG_GLOBAL_f32ari, 1452 NVPTX::INT_PTX_LDG_GLOBAL_f64ari); 1453 else 1454 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, 1455 NVPTX::INT_PTX_LDU_GLOBAL_i8ari, 1456 NVPTX::INT_PTX_LDU_GLOBAL_i16ari, 1457 NVPTX::INT_PTX_LDU_GLOBAL_i32ari, 1458 NVPTX::INT_PTX_LDU_GLOBAL_i64ari, 1459 NVPTX::INT_PTX_LDU_GLOBAL_f16ari, 1460 NVPTX::INT_PTX_LDU_GLOBAL_f16x2ari, 1461 NVPTX::INT_PTX_LDU_GLOBAL_f32ari, 1462 NVPTX::INT_PTX_LDU_GLOBAL_f64ari); 1463 break; 1464 case NVPTXISD::LoadV2: 1465 case NVPTXISD::LDGV2: 1466 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, 1467 NVPTX::INT_PTX_LDG_G_v2i8_ELE_ari32, 1468 NVPTX::INT_PTX_LDG_G_v2i16_ELE_ari32, 1469 NVPTX::INT_PTX_LDG_G_v2i32_ELE_ari32, 1470 NVPTX::INT_PTX_LDG_G_v2i64_ELE_ari32, 1471 NVPTX::INT_PTX_LDG_G_v2f16_ELE_ari32, 1472 NVPTX::INT_PTX_LDG_G_v2f16x2_ELE_ari32, 1473 NVPTX::INT_PTX_LDG_G_v2f32_ELE_ari32, 1474 NVPTX::INT_PTX_LDG_G_v2f64_ELE_ari32); 1475 break; 1476 case NVPTXISD::LDUV2: 1477 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, 1478 NVPTX::INT_PTX_LDU_G_v2i8_ELE_ari32, 1479 NVPTX::INT_PTX_LDU_G_v2i16_ELE_ari32, 1480 NVPTX::INT_PTX_LDU_G_v2i32_ELE_ari32, 1481 NVPTX::INT_PTX_LDU_G_v2i64_ELE_ari32, 1482 NVPTX::INT_PTX_LDU_G_v2f16_ELE_ari32, 1483 NVPTX::INT_PTX_LDU_G_v2f16x2_ELE_ari32, 1484 NVPTX::INT_PTX_LDU_G_v2f32_ELE_ari32, 1485 NVPTX::INT_PTX_LDU_G_v2f64_ELE_ari32); 1486 break; 1487 case NVPTXISD::LoadV4: 1488 case NVPTXISD::LDGV4: 1489 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, 1490 NVPTX::INT_PTX_LDG_G_v4i8_ELE_ari32, 1491 NVPTX::INT_PTX_LDG_G_v4i16_ELE_ari32, 1492 NVPTX::INT_PTX_LDG_G_v4i32_ELE_ari32, None, 1493 NVPTX::INT_PTX_LDG_G_v4f16_ELE_ari32, 1494 NVPTX::INT_PTX_LDG_G_v4f16x2_ELE_ari32, 1495 NVPTX::INT_PTX_LDG_G_v4f32_ELE_ari32, None); 1496 break; 1497 case NVPTXISD::LDUV4: 1498 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, 1499 NVPTX::INT_PTX_LDU_G_v4i8_ELE_ari32, 1500 NVPTX::INT_PTX_LDU_G_v4i16_ELE_ari32, 1501 NVPTX::INT_PTX_LDU_G_v4i32_ELE_ari32, None, 1502 NVPTX::INT_PTX_LDU_G_v4f16_ELE_ari32, 1503 NVPTX::INT_PTX_LDU_G_v4f16x2_ELE_ari32, 1504 NVPTX::INT_PTX_LDU_G_v4f32_ELE_ari32, None); 1505 break; 1506 } 1507 } 1508 if (!Opcode) 1509 return false; 1510 SDValue Ops[] = {Base, Offset, Chain}; 1511 LD = CurDAG->getMachineNode(Opcode.getValue(), DL, InstVTList, Ops); 1512 } else { 1513 if (TM.is64Bit()) { 1514 switch (N->getOpcode()) { 1515 default: 1516 return false; 1517 case ISD::LOAD: 1518 case ISD::INTRINSIC_W_CHAIN: 1519 if (IsLDG) 1520 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, 1521 NVPTX::INT_PTX_LDG_GLOBAL_i8areg64, 1522 NVPTX::INT_PTX_LDG_GLOBAL_i16areg64, 1523 NVPTX::INT_PTX_LDG_GLOBAL_i32areg64, 1524 NVPTX::INT_PTX_LDG_GLOBAL_i64areg64, 1525 NVPTX::INT_PTX_LDG_GLOBAL_f16areg64, 1526 NVPTX::INT_PTX_LDG_GLOBAL_f16x2areg64, 1527 NVPTX::INT_PTX_LDG_GLOBAL_f32areg64, 1528 NVPTX::INT_PTX_LDG_GLOBAL_f64areg64); 1529 else 1530 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, 1531 NVPTX::INT_PTX_LDU_GLOBAL_i8areg64, 1532 NVPTX::INT_PTX_LDU_GLOBAL_i16areg64, 1533 NVPTX::INT_PTX_LDU_GLOBAL_i32areg64, 1534 NVPTX::INT_PTX_LDU_GLOBAL_i64areg64, 1535 NVPTX::INT_PTX_LDU_GLOBAL_f16areg64, 1536 NVPTX::INT_PTX_LDU_GLOBAL_f16x2areg64, 1537 NVPTX::INT_PTX_LDU_GLOBAL_f32areg64, 1538 NVPTX::INT_PTX_LDU_GLOBAL_f64areg64); 1539 break; 1540 case NVPTXISD::LoadV2: 1541 case NVPTXISD::LDGV2: 1542 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, 1543 NVPTX::INT_PTX_LDG_G_v2i8_ELE_areg64, 1544 NVPTX::INT_PTX_LDG_G_v2i16_ELE_areg64, 1545 NVPTX::INT_PTX_LDG_G_v2i32_ELE_areg64, 1546 NVPTX::INT_PTX_LDG_G_v2i64_ELE_areg64, 1547 NVPTX::INT_PTX_LDG_G_v2f16_ELE_areg64, 1548 NVPTX::INT_PTX_LDG_G_v2f16x2_ELE_areg64, 1549 NVPTX::INT_PTX_LDG_G_v2f32_ELE_areg64, 1550 NVPTX::INT_PTX_LDG_G_v2f64_ELE_areg64); 1551 break; 1552 case NVPTXISD::LDUV2: 1553 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, 1554 NVPTX::INT_PTX_LDU_G_v2i8_ELE_areg64, 1555 NVPTX::INT_PTX_LDU_G_v2i16_ELE_areg64, 1556 NVPTX::INT_PTX_LDU_G_v2i32_ELE_areg64, 1557 NVPTX::INT_PTX_LDU_G_v2i64_ELE_areg64, 1558 NVPTX::INT_PTX_LDU_G_v2f16_ELE_areg64, 1559 NVPTX::INT_PTX_LDU_G_v2f16x2_ELE_areg64, 1560 NVPTX::INT_PTX_LDU_G_v2f32_ELE_areg64, 1561 NVPTX::INT_PTX_LDU_G_v2f64_ELE_areg64); 1562 break; 1563 case NVPTXISD::LoadV4: 1564 case NVPTXISD::LDGV4: 1565 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, 1566 NVPTX::INT_PTX_LDG_G_v4i8_ELE_areg64, 1567 NVPTX::INT_PTX_LDG_G_v4i16_ELE_areg64, 1568 NVPTX::INT_PTX_LDG_G_v4i32_ELE_areg64, None, 1569 NVPTX::INT_PTX_LDG_G_v4f16_ELE_areg64, 1570 NVPTX::INT_PTX_LDG_G_v4f16x2_ELE_areg64, 1571 NVPTX::INT_PTX_LDG_G_v4f32_ELE_areg64, None); 1572 break; 1573 case NVPTXISD::LDUV4: 1574 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, 1575 NVPTX::INT_PTX_LDU_G_v4i8_ELE_areg64, 1576 NVPTX::INT_PTX_LDU_G_v4i16_ELE_areg64, 1577 NVPTX::INT_PTX_LDU_G_v4i32_ELE_areg64, None, 1578 NVPTX::INT_PTX_LDU_G_v4f16_ELE_areg64, 1579 NVPTX::INT_PTX_LDU_G_v4f16x2_ELE_areg64, 1580 NVPTX::INT_PTX_LDU_G_v4f32_ELE_areg64, None); 1581 break; 1582 } 1583 } else { 1584 switch (N->getOpcode()) { 1585 default: 1586 return false; 1587 case ISD::LOAD: 1588 case ISD::INTRINSIC_W_CHAIN: 1589 if (IsLDG) 1590 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, 1591 NVPTX::INT_PTX_LDG_GLOBAL_i8areg, 1592 NVPTX::INT_PTX_LDG_GLOBAL_i16areg, 1593 NVPTX::INT_PTX_LDG_GLOBAL_i32areg, 1594 NVPTX::INT_PTX_LDG_GLOBAL_i64areg, 1595 NVPTX::INT_PTX_LDG_GLOBAL_f16areg, 1596 NVPTX::INT_PTX_LDG_GLOBAL_f16x2areg, 1597 NVPTX::INT_PTX_LDG_GLOBAL_f32areg, 1598 NVPTX::INT_PTX_LDG_GLOBAL_f64areg); 1599 else 1600 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, 1601 NVPTX::INT_PTX_LDU_GLOBAL_i8areg, 1602 NVPTX::INT_PTX_LDU_GLOBAL_i16areg, 1603 NVPTX::INT_PTX_LDU_GLOBAL_i32areg, 1604 NVPTX::INT_PTX_LDU_GLOBAL_i64areg, 1605 NVPTX::INT_PTX_LDU_GLOBAL_f16areg, 1606 NVPTX::INT_PTX_LDU_GLOBAL_f16x2areg, 1607 NVPTX::INT_PTX_LDU_GLOBAL_f32areg, 1608 NVPTX::INT_PTX_LDU_GLOBAL_f64areg); 1609 break; 1610 case NVPTXISD::LoadV2: 1611 case NVPTXISD::LDGV2: 1612 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, 1613 NVPTX::INT_PTX_LDG_G_v2i8_ELE_areg32, 1614 NVPTX::INT_PTX_LDG_G_v2i16_ELE_areg32, 1615 NVPTX::INT_PTX_LDG_G_v2i32_ELE_areg32, 1616 NVPTX::INT_PTX_LDG_G_v2i64_ELE_areg32, 1617 NVPTX::INT_PTX_LDG_G_v2f16_ELE_areg32, 1618 NVPTX::INT_PTX_LDG_G_v2f16x2_ELE_areg32, 1619 NVPTX::INT_PTX_LDG_G_v2f32_ELE_areg32, 1620 NVPTX::INT_PTX_LDG_G_v2f64_ELE_areg32); 1621 break; 1622 case NVPTXISD::LDUV2: 1623 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, 1624 NVPTX::INT_PTX_LDU_G_v2i8_ELE_areg32, 1625 NVPTX::INT_PTX_LDU_G_v2i16_ELE_areg32, 1626 NVPTX::INT_PTX_LDU_G_v2i32_ELE_areg32, 1627 NVPTX::INT_PTX_LDU_G_v2i64_ELE_areg32, 1628 NVPTX::INT_PTX_LDU_G_v2f16_ELE_areg32, 1629 NVPTX::INT_PTX_LDU_G_v2f16x2_ELE_areg32, 1630 NVPTX::INT_PTX_LDU_G_v2f32_ELE_areg32, 1631 NVPTX::INT_PTX_LDU_G_v2f64_ELE_areg32); 1632 break; 1633 case NVPTXISD::LoadV4: 1634 case NVPTXISD::LDGV4: 1635 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, 1636 NVPTX::INT_PTX_LDG_G_v4i8_ELE_areg32, 1637 NVPTX::INT_PTX_LDG_G_v4i16_ELE_areg32, 1638 NVPTX::INT_PTX_LDG_G_v4i32_ELE_areg32, None, 1639 NVPTX::INT_PTX_LDG_G_v4f16_ELE_areg32, 1640 NVPTX::INT_PTX_LDG_G_v4f16x2_ELE_areg32, 1641 NVPTX::INT_PTX_LDG_G_v4f32_ELE_areg32, None); 1642 break; 1643 case NVPTXISD::LDUV4: 1644 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, 1645 NVPTX::INT_PTX_LDU_G_v4i8_ELE_areg32, 1646 NVPTX::INT_PTX_LDU_G_v4i16_ELE_areg32, 1647 NVPTX::INT_PTX_LDU_G_v4i32_ELE_areg32, None, 1648 NVPTX::INT_PTX_LDU_G_v4f16_ELE_areg32, 1649 NVPTX::INT_PTX_LDU_G_v4f16x2_ELE_areg32, 1650 NVPTX::INT_PTX_LDU_G_v4f32_ELE_areg32, None); 1651 break; 1652 } 1653 } 1654 if (!Opcode) 1655 return false; 1656 SDValue Ops[] = { Op1, Chain }; 1657 LD = CurDAG->getMachineNode(Opcode.getValue(), DL, InstVTList, Ops); 1658 } 1659 1660 MachineMemOperand *MemRef = Mem->getMemOperand(); 1661 CurDAG->setNodeMemRefs(cast<MachineSDNode>(LD), {MemRef}); 1662 1663 // For automatic generation of LDG (through SelectLoad[Vector], not the 1664 // intrinsics), we may have an extending load like: 1665 // 1666 // i32,ch = load<LD1[%data1(addrspace=1)], zext from i8> t0, t7, undef:i64 1667 // 1668 // In this case, the matching logic above will select a load for the original 1669 // memory type (in this case, i8) and our types will not match (the node needs 1670 // to return an i32 in this case). Our LDG/LDU nodes do not support the 1671 // concept of sign-/zero-extension, so emulate it here by adding an explicit 1672 // CVT instruction. Ptxas should clean up any redundancies here. 1673 1674 EVT OrigType = N->getValueType(0); 1675 LoadSDNode *LdNode = dyn_cast<LoadSDNode>(N); 1676 1677 if (OrigType != EltVT && LdNode) { 1678 // We have an extending-load. The instruction we selected operates on the 1679 // smaller type, but the SDNode we are replacing has the larger type. We 1680 // need to emit a CVT to make the types match. 1681 bool IsSigned = LdNode->getExtensionType() == ISD::SEXTLOAD; 1682 unsigned CvtOpc = GetConvertOpcode(OrigType.getSimpleVT(), 1683 EltVT.getSimpleVT(), IsSigned); 1684 1685 // For each output value, apply the manual sign/zero-extension and make sure 1686 // all users of the load go through that CVT. 1687 for (unsigned i = 0; i != NumElts; ++i) { 1688 SDValue Res(LD, i); 1689 SDValue OrigVal(N, i); 1690 1691 SDNode *CvtNode = 1692 CurDAG->getMachineNode(CvtOpc, DL, OrigType, Res, 1693 CurDAG->getTargetConstant(NVPTX::PTXCvtMode::NONE, 1694 DL, MVT::i32)); 1695 ReplaceUses(OrigVal, SDValue(CvtNode, 0)); 1696 } 1697 } 1698 1699 ReplaceNode(N, LD); 1700 return true; 1701 } 1702 1703 bool NVPTXDAGToDAGISel::tryStore(SDNode *N) { 1704 SDLoc dl(N); 1705 MemSDNode *ST = cast<MemSDNode>(N); 1706 assert(ST->writeMem() && "Expected store"); 1707 StoreSDNode *PlainStore = dyn_cast<StoreSDNode>(N); 1708 AtomicSDNode *AtomicStore = dyn_cast<AtomicSDNode>(N); 1709 assert((PlainStore || AtomicStore) && "Expected store"); 1710 EVT StoreVT = ST->getMemoryVT(); 1711 SDNode *NVPTXST = nullptr; 1712 1713 // do not support pre/post inc/dec 1714 if (PlainStore && PlainStore->isIndexed()) 1715 return false; 1716 1717 if (!StoreVT.isSimple()) 1718 return false; 1719 1720 AtomicOrdering Ordering = ST->getSuccessOrdering(); 1721 // In order to lower atomic loads with stronger guarantees we would need to 1722 // use store.release or insert fences. However these features were only added 1723 // with PTX ISA 6.0 / sm_70. 1724 // TODO: Check if we can actually use the new instructions and implement them. 1725 if (isStrongerThanMonotonic(Ordering)) 1726 return false; 1727 1728 // Address Space Setting 1729 unsigned int CodeAddrSpace = getCodeAddrSpace(ST); 1730 unsigned int PointerSize = 1731 CurDAG->getDataLayout().getPointerSizeInBits(ST->getAddressSpace()); 1732 1733 // Volatile Setting 1734 // - .volatile is only available for .global and .shared 1735 // - .volatile has the same memory synchronization semantics as .relaxed.sys 1736 bool isVolatile = ST->isVolatile() || Ordering == AtomicOrdering::Monotonic; 1737 if (CodeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL && 1738 CodeAddrSpace != NVPTX::PTXLdStInstCode::SHARED && 1739 CodeAddrSpace != NVPTX::PTXLdStInstCode::GENERIC) 1740 isVolatile = false; 1741 1742 // Vector Setting 1743 MVT SimpleVT = StoreVT.getSimpleVT(); 1744 unsigned vecType = NVPTX::PTXLdStInstCode::Scalar; 1745 1746 // Type Setting: toType + toTypeWidth 1747 // - for integer type, always use 'u' 1748 // 1749 MVT ScalarVT = SimpleVT.getScalarType(); 1750 unsigned toTypeWidth = ScalarVT.getSizeInBits(); 1751 if (SimpleVT.isVector()) { 1752 assert(StoreVT == MVT::v2f16 && "Unexpected vector type"); 1753 // v2f16 is stored using st.b32 1754 toTypeWidth = 32; 1755 } 1756 1757 unsigned int toType; 1758 if (ScalarVT.isFloatingPoint()) 1759 // f16 uses .b16 as its storage type. 1760 toType = ScalarVT.SimpleTy == MVT::f16 ? NVPTX::PTXLdStInstCode::Untyped 1761 : NVPTX::PTXLdStInstCode::Float; 1762 else 1763 toType = NVPTX::PTXLdStInstCode::Unsigned; 1764 1765 // Create the machine instruction DAG 1766 SDValue Chain = ST->getChain(); 1767 SDValue Value = PlainStore ? PlainStore->getValue() : AtomicStore->getVal(); 1768 SDValue BasePtr = ST->getBasePtr(); 1769 SDValue Addr; 1770 SDValue Offset, Base; 1771 Optional<unsigned> Opcode; 1772 MVT::SimpleValueType SourceVT = 1773 Value.getNode()->getSimpleValueType(0).SimpleTy; 1774 1775 if (SelectDirectAddr(BasePtr, Addr)) { 1776 Opcode = pickOpcodeForVT(SourceVT, NVPTX::ST_i8_avar, NVPTX::ST_i16_avar, 1777 NVPTX::ST_i32_avar, NVPTX::ST_i64_avar, 1778 NVPTX::ST_f16_avar, NVPTX::ST_f16x2_avar, 1779 NVPTX::ST_f32_avar, NVPTX::ST_f64_avar); 1780 if (!Opcode) 1781 return false; 1782 SDValue Ops[] = {Value, 1783 getI32Imm(isVolatile, dl), 1784 getI32Imm(CodeAddrSpace, dl), 1785 getI32Imm(vecType, dl), 1786 getI32Imm(toType, dl), 1787 getI32Imm(toTypeWidth, dl), 1788 Addr, 1789 Chain}; 1790 NVPTXST = CurDAG->getMachineNode(Opcode.getValue(), dl, MVT::Other, Ops); 1791 } else if (PointerSize == 64 1792 ? SelectADDRsi64(BasePtr.getNode(), BasePtr, Base, Offset) 1793 : SelectADDRsi(BasePtr.getNode(), BasePtr, Base, Offset)) { 1794 Opcode = pickOpcodeForVT(SourceVT, NVPTX::ST_i8_asi, NVPTX::ST_i16_asi, 1795 NVPTX::ST_i32_asi, NVPTX::ST_i64_asi, 1796 NVPTX::ST_f16_asi, NVPTX::ST_f16x2_asi, 1797 NVPTX::ST_f32_asi, NVPTX::ST_f64_asi); 1798 if (!Opcode) 1799 return false; 1800 SDValue Ops[] = {Value, 1801 getI32Imm(isVolatile, dl), 1802 getI32Imm(CodeAddrSpace, dl), 1803 getI32Imm(vecType, dl), 1804 getI32Imm(toType, dl), 1805 getI32Imm(toTypeWidth, dl), 1806 Base, 1807 Offset, 1808 Chain}; 1809 NVPTXST = CurDAG->getMachineNode(Opcode.getValue(), dl, MVT::Other, Ops); 1810 } else if (PointerSize == 64 1811 ? SelectADDRri64(BasePtr.getNode(), BasePtr, Base, Offset) 1812 : SelectADDRri(BasePtr.getNode(), BasePtr, Base, Offset)) { 1813 if (PointerSize == 64) 1814 Opcode = pickOpcodeForVT( 1815 SourceVT, NVPTX::ST_i8_ari_64, NVPTX::ST_i16_ari_64, 1816 NVPTX::ST_i32_ari_64, NVPTX::ST_i64_ari_64, NVPTX::ST_f16_ari_64, 1817 NVPTX::ST_f16x2_ari_64, NVPTX::ST_f32_ari_64, NVPTX::ST_f64_ari_64); 1818 else 1819 Opcode = pickOpcodeForVT(SourceVT, NVPTX::ST_i8_ari, NVPTX::ST_i16_ari, 1820 NVPTX::ST_i32_ari, NVPTX::ST_i64_ari, 1821 NVPTX::ST_f16_ari, NVPTX::ST_f16x2_ari, 1822 NVPTX::ST_f32_ari, NVPTX::ST_f64_ari); 1823 if (!Opcode) 1824 return false; 1825 1826 SDValue Ops[] = {Value, 1827 getI32Imm(isVolatile, dl), 1828 getI32Imm(CodeAddrSpace, dl), 1829 getI32Imm(vecType, dl), 1830 getI32Imm(toType, dl), 1831 getI32Imm(toTypeWidth, dl), 1832 Base, 1833 Offset, 1834 Chain}; 1835 NVPTXST = CurDAG->getMachineNode(Opcode.getValue(), dl, MVT::Other, Ops); 1836 } else { 1837 if (PointerSize == 64) 1838 Opcode = 1839 pickOpcodeForVT(SourceVT, NVPTX::ST_i8_areg_64, NVPTX::ST_i16_areg_64, 1840 NVPTX::ST_i32_areg_64, NVPTX::ST_i64_areg_64, 1841 NVPTX::ST_f16_areg_64, NVPTX::ST_f16x2_areg_64, 1842 NVPTX::ST_f32_areg_64, NVPTX::ST_f64_areg_64); 1843 else 1844 Opcode = pickOpcodeForVT(SourceVT, NVPTX::ST_i8_areg, NVPTX::ST_i16_areg, 1845 NVPTX::ST_i32_areg, NVPTX::ST_i64_areg, 1846 NVPTX::ST_f16_areg, NVPTX::ST_f16x2_areg, 1847 NVPTX::ST_f32_areg, NVPTX::ST_f64_areg); 1848 if (!Opcode) 1849 return false; 1850 SDValue Ops[] = {Value, 1851 getI32Imm(isVolatile, dl), 1852 getI32Imm(CodeAddrSpace, dl), 1853 getI32Imm(vecType, dl), 1854 getI32Imm(toType, dl), 1855 getI32Imm(toTypeWidth, dl), 1856 BasePtr, 1857 Chain}; 1858 NVPTXST = CurDAG->getMachineNode(Opcode.getValue(), dl, MVT::Other, Ops); 1859 } 1860 1861 if (!NVPTXST) 1862 return false; 1863 1864 MachineMemOperand *MemRef = cast<MemSDNode>(N)->getMemOperand(); 1865 CurDAG->setNodeMemRefs(cast<MachineSDNode>(NVPTXST), {MemRef}); 1866 ReplaceNode(N, NVPTXST); 1867 return true; 1868 } 1869 1870 bool NVPTXDAGToDAGISel::tryStoreVector(SDNode *N) { 1871 SDValue Chain = N->getOperand(0); 1872 SDValue Op1 = N->getOperand(1); 1873 SDValue Addr, Offset, Base; 1874 Optional<unsigned> Opcode; 1875 SDLoc DL(N); 1876 SDNode *ST; 1877 EVT EltVT = Op1.getValueType(); 1878 MemSDNode *MemSD = cast<MemSDNode>(N); 1879 EVT StoreVT = MemSD->getMemoryVT(); 1880 1881 // Address Space Setting 1882 unsigned CodeAddrSpace = getCodeAddrSpace(MemSD); 1883 if (CodeAddrSpace == NVPTX::PTXLdStInstCode::CONSTANT) { 1884 report_fatal_error("Cannot store to pointer that points to constant " 1885 "memory space"); 1886 } 1887 unsigned int PointerSize = 1888 CurDAG->getDataLayout().getPointerSizeInBits(MemSD->getAddressSpace()); 1889 1890 // Volatile Setting 1891 // - .volatile is only availalble for .global and .shared 1892 bool IsVolatile = MemSD->isVolatile(); 1893 if (CodeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL && 1894 CodeAddrSpace != NVPTX::PTXLdStInstCode::SHARED && 1895 CodeAddrSpace != NVPTX::PTXLdStInstCode::GENERIC) 1896 IsVolatile = false; 1897 1898 // Type Setting: toType + toTypeWidth 1899 // - for integer type, always use 'u' 1900 assert(StoreVT.isSimple() && "Store value is not simple"); 1901 MVT ScalarVT = StoreVT.getSimpleVT().getScalarType(); 1902 unsigned ToTypeWidth = ScalarVT.getSizeInBits(); 1903 unsigned ToType; 1904 if (ScalarVT.isFloatingPoint()) 1905 ToType = ScalarVT.SimpleTy == MVT::f16 ? NVPTX::PTXLdStInstCode::Untyped 1906 : NVPTX::PTXLdStInstCode::Float; 1907 else 1908 ToType = NVPTX::PTXLdStInstCode::Unsigned; 1909 1910 SmallVector<SDValue, 12> StOps; 1911 SDValue N2; 1912 unsigned VecType; 1913 1914 switch (N->getOpcode()) { 1915 case NVPTXISD::StoreV2: 1916 VecType = NVPTX::PTXLdStInstCode::V2; 1917 StOps.push_back(N->getOperand(1)); 1918 StOps.push_back(N->getOperand(2)); 1919 N2 = N->getOperand(3); 1920 break; 1921 case NVPTXISD::StoreV4: 1922 VecType = NVPTX::PTXLdStInstCode::V4; 1923 StOps.push_back(N->getOperand(1)); 1924 StOps.push_back(N->getOperand(2)); 1925 StOps.push_back(N->getOperand(3)); 1926 StOps.push_back(N->getOperand(4)); 1927 N2 = N->getOperand(5); 1928 break; 1929 default: 1930 return false; 1931 } 1932 1933 // v8f16 is a special case. PTX doesn't have st.v8.f16 1934 // instruction. Instead, we split the vector into v2f16 chunks and 1935 // store them with st.v4.b32. 1936 if (EltVT == MVT::v2f16) { 1937 assert(N->getOpcode() == NVPTXISD::StoreV4 && "Unexpected load opcode."); 1938 EltVT = MVT::i32; 1939 ToType = NVPTX::PTXLdStInstCode::Untyped; 1940 ToTypeWidth = 32; 1941 } 1942 1943 StOps.push_back(getI32Imm(IsVolatile, DL)); 1944 StOps.push_back(getI32Imm(CodeAddrSpace, DL)); 1945 StOps.push_back(getI32Imm(VecType, DL)); 1946 StOps.push_back(getI32Imm(ToType, DL)); 1947 StOps.push_back(getI32Imm(ToTypeWidth, DL)); 1948 1949 if (SelectDirectAddr(N2, Addr)) { 1950 switch (N->getOpcode()) { 1951 default: 1952 return false; 1953 case NVPTXISD::StoreV2: 1954 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, 1955 NVPTX::STV_i8_v2_avar, NVPTX::STV_i16_v2_avar, 1956 NVPTX::STV_i32_v2_avar, NVPTX::STV_i64_v2_avar, 1957 NVPTX::STV_f16_v2_avar, NVPTX::STV_f16x2_v2_avar, 1958 NVPTX::STV_f32_v2_avar, NVPTX::STV_f64_v2_avar); 1959 break; 1960 case NVPTXISD::StoreV4: 1961 Opcode = 1962 pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, NVPTX::STV_i8_v4_avar, 1963 NVPTX::STV_i16_v4_avar, NVPTX::STV_i32_v4_avar, None, 1964 NVPTX::STV_f16_v4_avar, NVPTX::STV_f16x2_v4_avar, 1965 NVPTX::STV_f32_v4_avar, None); 1966 break; 1967 } 1968 StOps.push_back(Addr); 1969 } else if (PointerSize == 64 ? SelectADDRsi64(N2.getNode(), N2, Base, Offset) 1970 : SelectADDRsi(N2.getNode(), N2, Base, Offset)) { 1971 switch (N->getOpcode()) { 1972 default: 1973 return false; 1974 case NVPTXISD::StoreV2: 1975 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, 1976 NVPTX::STV_i8_v2_asi, NVPTX::STV_i16_v2_asi, 1977 NVPTX::STV_i32_v2_asi, NVPTX::STV_i64_v2_asi, 1978 NVPTX::STV_f16_v2_asi, NVPTX::STV_f16x2_v2_asi, 1979 NVPTX::STV_f32_v2_asi, NVPTX::STV_f64_v2_asi); 1980 break; 1981 case NVPTXISD::StoreV4: 1982 Opcode = 1983 pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, NVPTX::STV_i8_v4_asi, 1984 NVPTX::STV_i16_v4_asi, NVPTX::STV_i32_v4_asi, None, 1985 NVPTX::STV_f16_v4_asi, NVPTX::STV_f16x2_v4_asi, 1986 NVPTX::STV_f32_v4_asi, None); 1987 break; 1988 } 1989 StOps.push_back(Base); 1990 StOps.push_back(Offset); 1991 } else if (PointerSize == 64 ? SelectADDRri64(N2.getNode(), N2, Base, Offset) 1992 : SelectADDRri(N2.getNode(), N2, Base, Offset)) { 1993 if (PointerSize == 64) { 1994 switch (N->getOpcode()) { 1995 default: 1996 return false; 1997 case NVPTXISD::StoreV2: 1998 Opcode = pickOpcodeForVT( 1999 EltVT.getSimpleVT().SimpleTy, NVPTX::STV_i8_v2_ari_64, 2000 NVPTX::STV_i16_v2_ari_64, NVPTX::STV_i32_v2_ari_64, 2001 NVPTX::STV_i64_v2_ari_64, NVPTX::STV_f16_v2_ari_64, 2002 NVPTX::STV_f16x2_v2_ari_64, NVPTX::STV_f32_v2_ari_64, 2003 NVPTX::STV_f64_v2_ari_64); 2004 break; 2005 case NVPTXISD::StoreV4: 2006 Opcode = pickOpcodeForVT( 2007 EltVT.getSimpleVT().SimpleTy, NVPTX::STV_i8_v4_ari_64, 2008 NVPTX::STV_i16_v4_ari_64, NVPTX::STV_i32_v4_ari_64, None, 2009 NVPTX::STV_f16_v4_ari_64, NVPTX::STV_f16x2_v4_ari_64, 2010 NVPTX::STV_f32_v4_ari_64, None); 2011 break; 2012 } 2013 } else { 2014 switch (N->getOpcode()) { 2015 default: 2016 return false; 2017 case NVPTXISD::StoreV2: 2018 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, 2019 NVPTX::STV_i8_v2_ari, NVPTX::STV_i16_v2_ari, 2020 NVPTX::STV_i32_v2_ari, NVPTX::STV_i64_v2_ari, 2021 NVPTX::STV_f16_v2_ari, NVPTX::STV_f16x2_v2_ari, 2022 NVPTX::STV_f32_v2_ari, NVPTX::STV_f64_v2_ari); 2023 break; 2024 case NVPTXISD::StoreV4: 2025 Opcode = 2026 pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, NVPTX::STV_i8_v4_ari, 2027 NVPTX::STV_i16_v4_ari, NVPTX::STV_i32_v4_ari, None, 2028 NVPTX::STV_f16_v4_ari, NVPTX::STV_f16x2_v4_ari, 2029 NVPTX::STV_f32_v4_ari, None); 2030 break; 2031 } 2032 } 2033 StOps.push_back(Base); 2034 StOps.push_back(Offset); 2035 } else { 2036 if (PointerSize == 64) { 2037 switch (N->getOpcode()) { 2038 default: 2039 return false; 2040 case NVPTXISD::StoreV2: 2041 Opcode = pickOpcodeForVT( 2042 EltVT.getSimpleVT().SimpleTy, NVPTX::STV_i8_v2_areg_64, 2043 NVPTX::STV_i16_v2_areg_64, NVPTX::STV_i32_v2_areg_64, 2044 NVPTX::STV_i64_v2_areg_64, NVPTX::STV_f16_v2_areg_64, 2045 NVPTX::STV_f16x2_v2_areg_64, NVPTX::STV_f32_v2_areg_64, 2046 NVPTX::STV_f64_v2_areg_64); 2047 break; 2048 case NVPTXISD::StoreV4: 2049 Opcode = pickOpcodeForVT( 2050 EltVT.getSimpleVT().SimpleTy, NVPTX::STV_i8_v4_areg_64, 2051 NVPTX::STV_i16_v4_areg_64, NVPTX::STV_i32_v4_areg_64, None, 2052 NVPTX::STV_f16_v4_areg_64, NVPTX::STV_f16x2_v4_areg_64, 2053 NVPTX::STV_f32_v4_areg_64, None); 2054 break; 2055 } 2056 } else { 2057 switch (N->getOpcode()) { 2058 default: 2059 return false; 2060 case NVPTXISD::StoreV2: 2061 Opcode = 2062 pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, NVPTX::STV_i8_v2_areg, 2063 NVPTX::STV_i16_v2_areg, NVPTX::STV_i32_v2_areg, 2064 NVPTX::STV_i64_v2_areg, NVPTX::STV_f16_v2_areg, 2065 NVPTX::STV_f16x2_v2_areg, NVPTX::STV_f32_v2_areg, 2066 NVPTX::STV_f64_v2_areg); 2067 break; 2068 case NVPTXISD::StoreV4: 2069 Opcode = 2070 pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, NVPTX::STV_i8_v4_areg, 2071 NVPTX::STV_i16_v4_areg, NVPTX::STV_i32_v4_areg, None, 2072 NVPTX::STV_f16_v4_areg, NVPTX::STV_f16x2_v4_areg, 2073 NVPTX::STV_f32_v4_areg, None); 2074 break; 2075 } 2076 } 2077 StOps.push_back(N2); 2078 } 2079 2080 if (!Opcode) 2081 return false; 2082 2083 StOps.push_back(Chain); 2084 2085 ST = CurDAG->getMachineNode(Opcode.getValue(), DL, MVT::Other, StOps); 2086 2087 MachineMemOperand *MemRef = cast<MemSDNode>(N)->getMemOperand(); 2088 CurDAG->setNodeMemRefs(cast<MachineSDNode>(ST), {MemRef}); 2089 2090 ReplaceNode(N, ST); 2091 return true; 2092 } 2093 2094 bool NVPTXDAGToDAGISel::tryLoadParam(SDNode *Node) { 2095 SDValue Chain = Node->getOperand(0); 2096 SDValue Offset = Node->getOperand(2); 2097 SDValue Flag = Node->getOperand(3); 2098 SDLoc DL(Node); 2099 MemSDNode *Mem = cast<MemSDNode>(Node); 2100 2101 unsigned VecSize; 2102 switch (Node->getOpcode()) { 2103 default: 2104 return false; 2105 case NVPTXISD::LoadParam: 2106 VecSize = 1; 2107 break; 2108 case NVPTXISD::LoadParamV2: 2109 VecSize = 2; 2110 break; 2111 case NVPTXISD::LoadParamV4: 2112 VecSize = 4; 2113 break; 2114 } 2115 2116 EVT EltVT = Node->getValueType(0); 2117 EVT MemVT = Mem->getMemoryVT(); 2118 2119 Optional<unsigned> Opcode; 2120 2121 switch (VecSize) { 2122 default: 2123 return false; 2124 case 1: 2125 Opcode = pickOpcodeForVT(MemVT.getSimpleVT().SimpleTy, 2126 NVPTX::LoadParamMemI8, NVPTX::LoadParamMemI16, 2127 NVPTX::LoadParamMemI32, NVPTX::LoadParamMemI64, 2128 NVPTX::LoadParamMemF16, NVPTX::LoadParamMemF16x2, 2129 NVPTX::LoadParamMemF32, NVPTX::LoadParamMemF64); 2130 break; 2131 case 2: 2132 Opcode = 2133 pickOpcodeForVT(MemVT.getSimpleVT().SimpleTy, NVPTX::LoadParamMemV2I8, 2134 NVPTX::LoadParamMemV2I16, NVPTX::LoadParamMemV2I32, 2135 NVPTX::LoadParamMemV2I64, NVPTX::LoadParamMemV2F16, 2136 NVPTX::LoadParamMemV2F16x2, NVPTX::LoadParamMemV2F32, 2137 NVPTX::LoadParamMemV2F64); 2138 break; 2139 case 4: 2140 Opcode = pickOpcodeForVT( 2141 MemVT.getSimpleVT().SimpleTy, NVPTX::LoadParamMemV4I8, 2142 NVPTX::LoadParamMemV4I16, NVPTX::LoadParamMemV4I32, None, 2143 NVPTX::LoadParamMemV4F16, NVPTX::LoadParamMemV4F16x2, 2144 NVPTX::LoadParamMemV4F32, None); 2145 break; 2146 } 2147 if (!Opcode) 2148 return false; 2149 2150 SDVTList VTs; 2151 if (VecSize == 1) { 2152 VTs = CurDAG->getVTList(EltVT, MVT::Other, MVT::Glue); 2153 } else if (VecSize == 2) { 2154 VTs = CurDAG->getVTList(EltVT, EltVT, MVT::Other, MVT::Glue); 2155 } else { 2156 EVT EVTs[] = { EltVT, EltVT, EltVT, EltVT, MVT::Other, MVT::Glue }; 2157 VTs = CurDAG->getVTList(EVTs); 2158 } 2159 2160 unsigned OffsetVal = cast<ConstantSDNode>(Offset)->getZExtValue(); 2161 2162 SmallVector<SDValue, 2> Ops; 2163 Ops.push_back(CurDAG->getTargetConstant(OffsetVal, DL, MVT::i32)); 2164 Ops.push_back(Chain); 2165 Ops.push_back(Flag); 2166 2167 ReplaceNode(Node, CurDAG->getMachineNode(Opcode.getValue(), DL, VTs, Ops)); 2168 return true; 2169 } 2170 2171 bool NVPTXDAGToDAGISel::tryStoreRetval(SDNode *N) { 2172 SDLoc DL(N); 2173 SDValue Chain = N->getOperand(0); 2174 SDValue Offset = N->getOperand(1); 2175 unsigned OffsetVal = cast<ConstantSDNode>(Offset)->getZExtValue(); 2176 MemSDNode *Mem = cast<MemSDNode>(N); 2177 2178 // How many elements do we have? 2179 unsigned NumElts = 1; 2180 switch (N->getOpcode()) { 2181 default: 2182 return false; 2183 case NVPTXISD::StoreRetval: 2184 NumElts = 1; 2185 break; 2186 case NVPTXISD::StoreRetvalV2: 2187 NumElts = 2; 2188 break; 2189 case NVPTXISD::StoreRetvalV4: 2190 NumElts = 4; 2191 break; 2192 } 2193 2194 // Build vector of operands 2195 SmallVector<SDValue, 6> Ops; 2196 for (unsigned i = 0; i < NumElts; ++i) 2197 Ops.push_back(N->getOperand(i + 2)); 2198 Ops.push_back(CurDAG->getTargetConstant(OffsetVal, DL, MVT::i32)); 2199 Ops.push_back(Chain); 2200 2201 // Determine target opcode 2202 // If we have an i1, use an 8-bit store. The lowering code in 2203 // NVPTXISelLowering will have already emitted an upcast. 2204 Optional<unsigned> Opcode = 0; 2205 switch (NumElts) { 2206 default: 2207 return false; 2208 case 1: 2209 Opcode = pickOpcodeForVT(Mem->getMemoryVT().getSimpleVT().SimpleTy, 2210 NVPTX::StoreRetvalI8, NVPTX::StoreRetvalI16, 2211 NVPTX::StoreRetvalI32, NVPTX::StoreRetvalI64, 2212 NVPTX::StoreRetvalF16, NVPTX::StoreRetvalF16x2, 2213 NVPTX::StoreRetvalF32, NVPTX::StoreRetvalF64); 2214 break; 2215 case 2: 2216 Opcode = pickOpcodeForVT(Mem->getMemoryVT().getSimpleVT().SimpleTy, 2217 NVPTX::StoreRetvalV2I8, NVPTX::StoreRetvalV2I16, 2218 NVPTX::StoreRetvalV2I32, NVPTX::StoreRetvalV2I64, 2219 NVPTX::StoreRetvalV2F16, NVPTX::StoreRetvalV2F16x2, 2220 NVPTX::StoreRetvalV2F32, NVPTX::StoreRetvalV2F64); 2221 break; 2222 case 4: 2223 Opcode = pickOpcodeForVT(Mem->getMemoryVT().getSimpleVT().SimpleTy, 2224 NVPTX::StoreRetvalV4I8, NVPTX::StoreRetvalV4I16, 2225 NVPTX::StoreRetvalV4I32, None, 2226 NVPTX::StoreRetvalV4F16, NVPTX::StoreRetvalV4F16x2, 2227 NVPTX::StoreRetvalV4F32, None); 2228 break; 2229 } 2230 if (!Opcode) 2231 return false; 2232 2233 SDNode *Ret = CurDAG->getMachineNode(Opcode.getValue(), DL, MVT::Other, Ops); 2234 MachineMemOperand *MemRef = cast<MemSDNode>(N)->getMemOperand(); 2235 CurDAG->setNodeMemRefs(cast<MachineSDNode>(Ret), {MemRef}); 2236 2237 ReplaceNode(N, Ret); 2238 return true; 2239 } 2240 2241 bool NVPTXDAGToDAGISel::tryStoreParam(SDNode *N) { 2242 SDLoc DL(N); 2243 SDValue Chain = N->getOperand(0); 2244 SDValue Param = N->getOperand(1); 2245 unsigned ParamVal = cast<ConstantSDNode>(Param)->getZExtValue(); 2246 SDValue Offset = N->getOperand(2); 2247 unsigned OffsetVal = cast<ConstantSDNode>(Offset)->getZExtValue(); 2248 MemSDNode *Mem = cast<MemSDNode>(N); 2249 SDValue Flag = N->getOperand(N->getNumOperands() - 1); 2250 2251 // How many elements do we have? 2252 unsigned NumElts = 1; 2253 switch (N->getOpcode()) { 2254 default: 2255 return false; 2256 case NVPTXISD::StoreParamU32: 2257 case NVPTXISD::StoreParamS32: 2258 case NVPTXISD::StoreParam: 2259 NumElts = 1; 2260 break; 2261 case NVPTXISD::StoreParamV2: 2262 NumElts = 2; 2263 break; 2264 case NVPTXISD::StoreParamV4: 2265 NumElts = 4; 2266 break; 2267 } 2268 2269 // Build vector of operands 2270 SmallVector<SDValue, 8> Ops; 2271 for (unsigned i = 0; i < NumElts; ++i) 2272 Ops.push_back(N->getOperand(i + 3)); 2273 Ops.push_back(CurDAG->getTargetConstant(ParamVal, DL, MVT::i32)); 2274 Ops.push_back(CurDAG->getTargetConstant(OffsetVal, DL, MVT::i32)); 2275 Ops.push_back(Chain); 2276 Ops.push_back(Flag); 2277 2278 // Determine target opcode 2279 // If we have an i1, use an 8-bit store. The lowering code in 2280 // NVPTXISelLowering will have already emitted an upcast. 2281 Optional<unsigned> Opcode = 0; 2282 switch (N->getOpcode()) { 2283 default: 2284 switch (NumElts) { 2285 default: 2286 return false; 2287 case 1: 2288 Opcode = pickOpcodeForVT(Mem->getMemoryVT().getSimpleVT().SimpleTy, 2289 NVPTX::StoreParamI8, NVPTX::StoreParamI16, 2290 NVPTX::StoreParamI32, NVPTX::StoreParamI64, 2291 NVPTX::StoreParamF16, NVPTX::StoreParamF16x2, 2292 NVPTX::StoreParamF32, NVPTX::StoreParamF64); 2293 break; 2294 case 2: 2295 Opcode = pickOpcodeForVT(Mem->getMemoryVT().getSimpleVT().SimpleTy, 2296 NVPTX::StoreParamV2I8, NVPTX::StoreParamV2I16, 2297 NVPTX::StoreParamV2I32, NVPTX::StoreParamV2I64, 2298 NVPTX::StoreParamV2F16, NVPTX::StoreParamV2F16x2, 2299 NVPTX::StoreParamV2F32, NVPTX::StoreParamV2F64); 2300 break; 2301 case 4: 2302 Opcode = pickOpcodeForVT(Mem->getMemoryVT().getSimpleVT().SimpleTy, 2303 NVPTX::StoreParamV4I8, NVPTX::StoreParamV4I16, 2304 NVPTX::StoreParamV4I32, None, 2305 NVPTX::StoreParamV4F16, NVPTX::StoreParamV4F16x2, 2306 NVPTX::StoreParamV4F32, None); 2307 break; 2308 } 2309 if (!Opcode) 2310 return false; 2311 break; 2312 // Special case: if we have a sign-extend/zero-extend node, insert the 2313 // conversion instruction first, and use that as the value operand to 2314 // the selected StoreParam node. 2315 case NVPTXISD::StoreParamU32: { 2316 Opcode = NVPTX::StoreParamI32; 2317 SDValue CvtNone = CurDAG->getTargetConstant(NVPTX::PTXCvtMode::NONE, DL, 2318 MVT::i32); 2319 SDNode *Cvt = CurDAG->getMachineNode(NVPTX::CVT_u32_u16, DL, 2320 MVT::i32, Ops[0], CvtNone); 2321 Ops[0] = SDValue(Cvt, 0); 2322 break; 2323 } 2324 case NVPTXISD::StoreParamS32: { 2325 Opcode = NVPTX::StoreParamI32; 2326 SDValue CvtNone = CurDAG->getTargetConstant(NVPTX::PTXCvtMode::NONE, DL, 2327 MVT::i32); 2328 SDNode *Cvt = CurDAG->getMachineNode(NVPTX::CVT_s32_s16, DL, 2329 MVT::i32, Ops[0], CvtNone); 2330 Ops[0] = SDValue(Cvt, 0); 2331 break; 2332 } 2333 } 2334 2335 SDVTList RetVTs = CurDAG->getVTList(MVT::Other, MVT::Glue); 2336 SDNode *Ret = 2337 CurDAG->getMachineNode(Opcode.getValue(), DL, RetVTs, Ops); 2338 MachineMemOperand *MemRef = cast<MemSDNode>(N)->getMemOperand(); 2339 CurDAG->setNodeMemRefs(cast<MachineSDNode>(Ret), {MemRef}); 2340 2341 ReplaceNode(N, Ret); 2342 return true; 2343 } 2344 2345 bool NVPTXDAGToDAGISel::tryTextureIntrinsic(SDNode *N) { 2346 unsigned Opc = 0; 2347 2348 switch (N->getOpcode()) { 2349 default: return false; 2350 case NVPTXISD::Tex1DFloatS32: 2351 Opc = NVPTX::TEX_1D_F32_S32_RR; 2352 break; 2353 case NVPTXISD::Tex1DFloatFloat: 2354 Opc = NVPTX::TEX_1D_F32_F32_RR; 2355 break; 2356 case NVPTXISD::Tex1DFloatFloatLevel: 2357 Opc = NVPTX::TEX_1D_F32_F32_LEVEL_RR; 2358 break; 2359 case NVPTXISD::Tex1DFloatFloatGrad: 2360 Opc = NVPTX::TEX_1D_F32_F32_GRAD_RR; 2361 break; 2362 case NVPTXISD::Tex1DS32S32: 2363 Opc = NVPTX::TEX_1D_S32_S32_RR; 2364 break; 2365 case NVPTXISD::Tex1DS32Float: 2366 Opc = NVPTX::TEX_1D_S32_F32_RR; 2367 break; 2368 case NVPTXISD::Tex1DS32FloatLevel: 2369 Opc = NVPTX::TEX_1D_S32_F32_LEVEL_RR; 2370 break; 2371 case NVPTXISD::Tex1DS32FloatGrad: 2372 Opc = NVPTX::TEX_1D_S32_F32_GRAD_RR; 2373 break; 2374 case NVPTXISD::Tex1DU32S32: 2375 Opc = NVPTX::TEX_1D_U32_S32_RR; 2376 break; 2377 case NVPTXISD::Tex1DU32Float: 2378 Opc = NVPTX::TEX_1D_U32_F32_RR; 2379 break; 2380 case NVPTXISD::Tex1DU32FloatLevel: 2381 Opc = NVPTX::TEX_1D_U32_F32_LEVEL_RR; 2382 break; 2383 case NVPTXISD::Tex1DU32FloatGrad: 2384 Opc = NVPTX::TEX_1D_U32_F32_GRAD_RR; 2385 break; 2386 case NVPTXISD::Tex1DArrayFloatS32: 2387 Opc = NVPTX::TEX_1D_ARRAY_F32_S32_RR; 2388 break; 2389 case NVPTXISD::Tex1DArrayFloatFloat: 2390 Opc = NVPTX::TEX_1D_ARRAY_F32_F32_RR; 2391 break; 2392 case NVPTXISD::Tex1DArrayFloatFloatLevel: 2393 Opc = NVPTX::TEX_1D_ARRAY_F32_F32_LEVEL_RR; 2394 break; 2395 case NVPTXISD::Tex1DArrayFloatFloatGrad: 2396 Opc = NVPTX::TEX_1D_ARRAY_F32_F32_GRAD_RR; 2397 break; 2398 case NVPTXISD::Tex1DArrayS32S32: 2399 Opc = NVPTX::TEX_1D_ARRAY_S32_S32_RR; 2400 break; 2401 case NVPTXISD::Tex1DArrayS32Float: 2402 Opc = NVPTX::TEX_1D_ARRAY_S32_F32_RR; 2403 break; 2404 case NVPTXISD::Tex1DArrayS32FloatLevel: 2405 Opc = NVPTX::TEX_1D_ARRAY_S32_F32_LEVEL_RR; 2406 break; 2407 case NVPTXISD::Tex1DArrayS32FloatGrad: 2408 Opc = NVPTX::TEX_1D_ARRAY_S32_F32_GRAD_RR; 2409 break; 2410 case NVPTXISD::Tex1DArrayU32S32: 2411 Opc = NVPTX::TEX_1D_ARRAY_U32_S32_RR; 2412 break; 2413 case NVPTXISD::Tex1DArrayU32Float: 2414 Opc = NVPTX::TEX_1D_ARRAY_U32_F32_RR; 2415 break; 2416 case NVPTXISD::Tex1DArrayU32FloatLevel: 2417 Opc = NVPTX::TEX_1D_ARRAY_U32_F32_LEVEL_RR; 2418 break; 2419 case NVPTXISD::Tex1DArrayU32FloatGrad: 2420 Opc = NVPTX::TEX_1D_ARRAY_U32_F32_GRAD_RR; 2421 break; 2422 case NVPTXISD::Tex2DFloatS32: 2423 Opc = NVPTX::TEX_2D_F32_S32_RR; 2424 break; 2425 case NVPTXISD::Tex2DFloatFloat: 2426 Opc = NVPTX::TEX_2D_F32_F32_RR; 2427 break; 2428 case NVPTXISD::Tex2DFloatFloatLevel: 2429 Opc = NVPTX::TEX_2D_F32_F32_LEVEL_RR; 2430 break; 2431 case NVPTXISD::Tex2DFloatFloatGrad: 2432 Opc = NVPTX::TEX_2D_F32_F32_GRAD_RR; 2433 break; 2434 case NVPTXISD::Tex2DS32S32: 2435 Opc = NVPTX::TEX_2D_S32_S32_RR; 2436 break; 2437 case NVPTXISD::Tex2DS32Float: 2438 Opc = NVPTX::TEX_2D_S32_F32_RR; 2439 break; 2440 case NVPTXISD::Tex2DS32FloatLevel: 2441 Opc = NVPTX::TEX_2D_S32_F32_LEVEL_RR; 2442 break; 2443 case NVPTXISD::Tex2DS32FloatGrad: 2444 Opc = NVPTX::TEX_2D_S32_F32_GRAD_RR; 2445 break; 2446 case NVPTXISD::Tex2DU32S32: 2447 Opc = NVPTX::TEX_2D_U32_S32_RR; 2448 break; 2449 case NVPTXISD::Tex2DU32Float: 2450 Opc = NVPTX::TEX_2D_U32_F32_RR; 2451 break; 2452 case NVPTXISD::Tex2DU32FloatLevel: 2453 Opc = NVPTX::TEX_2D_U32_F32_LEVEL_RR; 2454 break; 2455 case NVPTXISD::Tex2DU32FloatGrad: 2456 Opc = NVPTX::TEX_2D_U32_F32_GRAD_RR; 2457 break; 2458 case NVPTXISD::Tex2DArrayFloatS32: 2459 Opc = NVPTX::TEX_2D_ARRAY_F32_S32_RR; 2460 break; 2461 case NVPTXISD::Tex2DArrayFloatFloat: 2462 Opc = NVPTX::TEX_2D_ARRAY_F32_F32_RR; 2463 break; 2464 case NVPTXISD::Tex2DArrayFloatFloatLevel: 2465 Opc = NVPTX::TEX_2D_ARRAY_F32_F32_LEVEL_RR; 2466 break; 2467 case NVPTXISD::Tex2DArrayFloatFloatGrad: 2468 Opc = NVPTX::TEX_2D_ARRAY_F32_F32_GRAD_RR; 2469 break; 2470 case NVPTXISD::Tex2DArrayS32S32: 2471 Opc = NVPTX::TEX_2D_ARRAY_S32_S32_RR; 2472 break; 2473 case NVPTXISD::Tex2DArrayS32Float: 2474 Opc = NVPTX::TEX_2D_ARRAY_S32_F32_RR; 2475 break; 2476 case NVPTXISD::Tex2DArrayS32FloatLevel: 2477 Opc = NVPTX::TEX_2D_ARRAY_S32_F32_LEVEL_RR; 2478 break; 2479 case NVPTXISD::Tex2DArrayS32FloatGrad: 2480 Opc = NVPTX::TEX_2D_ARRAY_S32_F32_GRAD_RR; 2481 break; 2482 case NVPTXISD::Tex2DArrayU32S32: 2483 Opc = NVPTX::TEX_2D_ARRAY_U32_S32_RR; 2484 break; 2485 case NVPTXISD::Tex2DArrayU32Float: 2486 Opc = NVPTX::TEX_2D_ARRAY_U32_F32_RR; 2487 break; 2488 case NVPTXISD::Tex2DArrayU32FloatLevel: 2489 Opc = NVPTX::TEX_2D_ARRAY_U32_F32_LEVEL_RR; 2490 break; 2491 case NVPTXISD::Tex2DArrayU32FloatGrad: 2492 Opc = NVPTX::TEX_2D_ARRAY_U32_F32_GRAD_RR; 2493 break; 2494 case NVPTXISD::Tex3DFloatS32: 2495 Opc = NVPTX::TEX_3D_F32_S32_RR; 2496 break; 2497 case NVPTXISD::Tex3DFloatFloat: 2498 Opc = NVPTX::TEX_3D_F32_F32_RR; 2499 break; 2500 case NVPTXISD::Tex3DFloatFloatLevel: 2501 Opc = NVPTX::TEX_3D_F32_F32_LEVEL_RR; 2502 break; 2503 case NVPTXISD::Tex3DFloatFloatGrad: 2504 Opc = NVPTX::TEX_3D_F32_F32_GRAD_RR; 2505 break; 2506 case NVPTXISD::Tex3DS32S32: 2507 Opc = NVPTX::TEX_3D_S32_S32_RR; 2508 break; 2509 case NVPTXISD::Tex3DS32Float: 2510 Opc = NVPTX::TEX_3D_S32_F32_RR; 2511 break; 2512 case NVPTXISD::Tex3DS32FloatLevel: 2513 Opc = NVPTX::TEX_3D_S32_F32_LEVEL_RR; 2514 break; 2515 case NVPTXISD::Tex3DS32FloatGrad: 2516 Opc = NVPTX::TEX_3D_S32_F32_GRAD_RR; 2517 break; 2518 case NVPTXISD::Tex3DU32S32: 2519 Opc = NVPTX::TEX_3D_U32_S32_RR; 2520 break; 2521 case NVPTXISD::Tex3DU32Float: 2522 Opc = NVPTX::TEX_3D_U32_F32_RR; 2523 break; 2524 case NVPTXISD::Tex3DU32FloatLevel: 2525 Opc = NVPTX::TEX_3D_U32_F32_LEVEL_RR; 2526 break; 2527 case NVPTXISD::Tex3DU32FloatGrad: 2528 Opc = NVPTX::TEX_3D_U32_F32_GRAD_RR; 2529 break; 2530 case NVPTXISD::TexCubeFloatFloat: 2531 Opc = NVPTX::TEX_CUBE_F32_F32_RR; 2532 break; 2533 case NVPTXISD::TexCubeFloatFloatLevel: 2534 Opc = NVPTX::TEX_CUBE_F32_F32_LEVEL_RR; 2535 break; 2536 case NVPTXISD::TexCubeS32Float: 2537 Opc = NVPTX::TEX_CUBE_S32_F32_RR; 2538 break; 2539 case NVPTXISD::TexCubeS32FloatLevel: 2540 Opc = NVPTX::TEX_CUBE_S32_F32_LEVEL_RR; 2541 break; 2542 case NVPTXISD::TexCubeU32Float: 2543 Opc = NVPTX::TEX_CUBE_U32_F32_RR; 2544 break; 2545 case NVPTXISD::TexCubeU32FloatLevel: 2546 Opc = NVPTX::TEX_CUBE_U32_F32_LEVEL_RR; 2547 break; 2548 case NVPTXISD::TexCubeArrayFloatFloat: 2549 Opc = NVPTX::TEX_CUBE_ARRAY_F32_F32_RR; 2550 break; 2551 case NVPTXISD::TexCubeArrayFloatFloatLevel: 2552 Opc = NVPTX::TEX_CUBE_ARRAY_F32_F32_LEVEL_RR; 2553 break; 2554 case NVPTXISD::TexCubeArrayS32Float: 2555 Opc = NVPTX::TEX_CUBE_ARRAY_S32_F32_RR; 2556 break; 2557 case NVPTXISD::TexCubeArrayS32FloatLevel: 2558 Opc = NVPTX::TEX_CUBE_ARRAY_S32_F32_LEVEL_RR; 2559 break; 2560 case NVPTXISD::TexCubeArrayU32Float: 2561 Opc = NVPTX::TEX_CUBE_ARRAY_U32_F32_RR; 2562 break; 2563 case NVPTXISD::TexCubeArrayU32FloatLevel: 2564 Opc = NVPTX::TEX_CUBE_ARRAY_U32_F32_LEVEL_RR; 2565 break; 2566 case NVPTXISD::Tld4R2DFloatFloat: 2567 Opc = NVPTX::TLD4_R_2D_F32_F32_RR; 2568 break; 2569 case NVPTXISD::Tld4G2DFloatFloat: 2570 Opc = NVPTX::TLD4_G_2D_F32_F32_RR; 2571 break; 2572 case NVPTXISD::Tld4B2DFloatFloat: 2573 Opc = NVPTX::TLD4_B_2D_F32_F32_RR; 2574 break; 2575 case NVPTXISD::Tld4A2DFloatFloat: 2576 Opc = NVPTX::TLD4_A_2D_F32_F32_RR; 2577 break; 2578 case NVPTXISD::Tld4R2DS64Float: 2579 Opc = NVPTX::TLD4_R_2D_S32_F32_RR; 2580 break; 2581 case NVPTXISD::Tld4G2DS64Float: 2582 Opc = NVPTX::TLD4_G_2D_S32_F32_RR; 2583 break; 2584 case NVPTXISD::Tld4B2DS64Float: 2585 Opc = NVPTX::TLD4_B_2D_S32_F32_RR; 2586 break; 2587 case NVPTXISD::Tld4A2DS64Float: 2588 Opc = NVPTX::TLD4_A_2D_S32_F32_RR; 2589 break; 2590 case NVPTXISD::Tld4R2DU64Float: 2591 Opc = NVPTX::TLD4_R_2D_U32_F32_RR; 2592 break; 2593 case NVPTXISD::Tld4G2DU64Float: 2594 Opc = NVPTX::TLD4_G_2D_U32_F32_RR; 2595 break; 2596 case NVPTXISD::Tld4B2DU64Float: 2597 Opc = NVPTX::TLD4_B_2D_U32_F32_RR; 2598 break; 2599 case NVPTXISD::Tld4A2DU64Float: 2600 Opc = NVPTX::TLD4_A_2D_U32_F32_RR; 2601 break; 2602 case NVPTXISD::TexUnified1DFloatS32: 2603 Opc = NVPTX::TEX_UNIFIED_1D_F32_S32_R; 2604 break; 2605 case NVPTXISD::TexUnified1DFloatFloat: 2606 Opc = NVPTX::TEX_UNIFIED_1D_F32_F32_R; 2607 break; 2608 case NVPTXISD::TexUnified1DFloatFloatLevel: 2609 Opc = NVPTX::TEX_UNIFIED_1D_F32_F32_LEVEL_R; 2610 break; 2611 case NVPTXISD::TexUnified1DFloatFloatGrad: 2612 Opc = NVPTX::TEX_UNIFIED_1D_F32_F32_GRAD_R; 2613 break; 2614 case NVPTXISD::TexUnified1DS32S32: 2615 Opc = NVPTX::TEX_UNIFIED_1D_S32_S32_R; 2616 break; 2617 case NVPTXISD::TexUnified1DS32Float: 2618 Opc = NVPTX::TEX_UNIFIED_1D_S32_F32_R; 2619 break; 2620 case NVPTXISD::TexUnified1DS32FloatLevel: 2621 Opc = NVPTX::TEX_UNIFIED_1D_S32_F32_LEVEL_R; 2622 break; 2623 case NVPTXISD::TexUnified1DS32FloatGrad: 2624 Opc = NVPTX::TEX_UNIFIED_1D_S32_F32_GRAD_R; 2625 break; 2626 case NVPTXISD::TexUnified1DU32S32: 2627 Opc = NVPTX::TEX_UNIFIED_1D_U32_S32_R; 2628 break; 2629 case NVPTXISD::TexUnified1DU32Float: 2630 Opc = NVPTX::TEX_UNIFIED_1D_U32_F32_R; 2631 break; 2632 case NVPTXISD::TexUnified1DU32FloatLevel: 2633 Opc = NVPTX::TEX_UNIFIED_1D_U32_F32_LEVEL_R; 2634 break; 2635 case NVPTXISD::TexUnified1DU32FloatGrad: 2636 Opc = NVPTX::TEX_UNIFIED_1D_U32_F32_GRAD_R; 2637 break; 2638 case NVPTXISD::TexUnified1DArrayFloatS32: 2639 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_F32_S32_R; 2640 break; 2641 case NVPTXISD::TexUnified1DArrayFloatFloat: 2642 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_F32_F32_R; 2643 break; 2644 case NVPTXISD::TexUnified1DArrayFloatFloatLevel: 2645 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_F32_F32_LEVEL_R; 2646 break; 2647 case NVPTXISD::TexUnified1DArrayFloatFloatGrad: 2648 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_F32_F32_GRAD_R; 2649 break; 2650 case NVPTXISD::TexUnified1DArrayS32S32: 2651 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_S32_S32_R; 2652 break; 2653 case NVPTXISD::TexUnified1DArrayS32Float: 2654 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_S32_F32_R; 2655 break; 2656 case NVPTXISD::TexUnified1DArrayS32FloatLevel: 2657 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_S32_F32_LEVEL_R; 2658 break; 2659 case NVPTXISD::TexUnified1DArrayS32FloatGrad: 2660 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_S32_F32_GRAD_R; 2661 break; 2662 case NVPTXISD::TexUnified1DArrayU32S32: 2663 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_U32_S32_R; 2664 break; 2665 case NVPTXISD::TexUnified1DArrayU32Float: 2666 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_U32_F32_R; 2667 break; 2668 case NVPTXISD::TexUnified1DArrayU32FloatLevel: 2669 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_U32_F32_LEVEL_R; 2670 break; 2671 case NVPTXISD::TexUnified1DArrayU32FloatGrad: 2672 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_U32_F32_GRAD_R; 2673 break; 2674 case NVPTXISD::TexUnified2DFloatS32: 2675 Opc = NVPTX::TEX_UNIFIED_2D_F32_S32_R; 2676 break; 2677 case NVPTXISD::TexUnified2DFloatFloat: 2678 Opc = NVPTX::TEX_UNIFIED_2D_F32_F32_R; 2679 break; 2680 case NVPTXISD::TexUnified2DFloatFloatLevel: 2681 Opc = NVPTX::TEX_UNIFIED_2D_F32_F32_LEVEL_R; 2682 break; 2683 case NVPTXISD::TexUnified2DFloatFloatGrad: 2684 Opc = NVPTX::TEX_UNIFIED_2D_F32_F32_GRAD_R; 2685 break; 2686 case NVPTXISD::TexUnified2DS32S32: 2687 Opc = NVPTX::TEX_UNIFIED_2D_S32_S32_R; 2688 break; 2689 case NVPTXISD::TexUnified2DS32Float: 2690 Opc = NVPTX::TEX_UNIFIED_2D_S32_F32_R; 2691 break; 2692 case NVPTXISD::TexUnified2DS32FloatLevel: 2693 Opc = NVPTX::TEX_UNIFIED_2D_S32_F32_LEVEL_R; 2694 break; 2695 case NVPTXISD::TexUnified2DS32FloatGrad: 2696 Opc = NVPTX::TEX_UNIFIED_2D_S32_F32_GRAD_R; 2697 break; 2698 case NVPTXISD::TexUnified2DU32S32: 2699 Opc = NVPTX::TEX_UNIFIED_2D_U32_S32_R; 2700 break; 2701 case NVPTXISD::TexUnified2DU32Float: 2702 Opc = NVPTX::TEX_UNIFIED_2D_U32_F32_R; 2703 break; 2704 case NVPTXISD::TexUnified2DU32FloatLevel: 2705 Opc = NVPTX::TEX_UNIFIED_2D_U32_F32_LEVEL_R; 2706 break; 2707 case NVPTXISD::TexUnified2DU32FloatGrad: 2708 Opc = NVPTX::TEX_UNIFIED_2D_U32_F32_GRAD_R; 2709 break; 2710 case NVPTXISD::TexUnified2DArrayFloatS32: 2711 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_F32_S32_R; 2712 break; 2713 case NVPTXISD::TexUnified2DArrayFloatFloat: 2714 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_F32_F32_R; 2715 break; 2716 case NVPTXISD::TexUnified2DArrayFloatFloatLevel: 2717 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_F32_F32_LEVEL_R; 2718 break; 2719 case NVPTXISD::TexUnified2DArrayFloatFloatGrad: 2720 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_F32_F32_GRAD_R; 2721 break; 2722 case NVPTXISD::TexUnified2DArrayS32S32: 2723 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_S32_S32_R; 2724 break; 2725 case NVPTXISD::TexUnified2DArrayS32Float: 2726 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_S32_F32_R; 2727 break; 2728 case NVPTXISD::TexUnified2DArrayS32FloatLevel: 2729 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_S32_F32_LEVEL_R; 2730 break; 2731 case NVPTXISD::TexUnified2DArrayS32FloatGrad: 2732 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_S32_F32_GRAD_R; 2733 break; 2734 case NVPTXISD::TexUnified2DArrayU32S32: 2735 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_U32_S32_R; 2736 break; 2737 case NVPTXISD::TexUnified2DArrayU32Float: 2738 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_U32_F32_R; 2739 break; 2740 case NVPTXISD::TexUnified2DArrayU32FloatLevel: 2741 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_U32_F32_LEVEL_R; 2742 break; 2743 case NVPTXISD::TexUnified2DArrayU32FloatGrad: 2744 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_U32_F32_GRAD_R; 2745 break; 2746 case NVPTXISD::TexUnified3DFloatS32: 2747 Opc = NVPTX::TEX_UNIFIED_3D_F32_S32_R; 2748 break; 2749 case NVPTXISD::TexUnified3DFloatFloat: 2750 Opc = NVPTX::TEX_UNIFIED_3D_F32_F32_R; 2751 break; 2752 case NVPTXISD::TexUnified3DFloatFloatLevel: 2753 Opc = NVPTX::TEX_UNIFIED_3D_F32_F32_LEVEL_R; 2754 break; 2755 case NVPTXISD::TexUnified3DFloatFloatGrad: 2756 Opc = NVPTX::TEX_UNIFIED_3D_F32_F32_GRAD_R; 2757 break; 2758 case NVPTXISD::TexUnified3DS32S32: 2759 Opc = NVPTX::TEX_UNIFIED_3D_S32_S32_R; 2760 break; 2761 case NVPTXISD::TexUnified3DS32Float: 2762 Opc = NVPTX::TEX_UNIFIED_3D_S32_F32_R; 2763 break; 2764 case NVPTXISD::TexUnified3DS32FloatLevel: 2765 Opc = NVPTX::TEX_UNIFIED_3D_S32_F32_LEVEL_R; 2766 break; 2767 case NVPTXISD::TexUnified3DS32FloatGrad: 2768 Opc = NVPTX::TEX_UNIFIED_3D_S32_F32_GRAD_R; 2769 break; 2770 case NVPTXISD::TexUnified3DU32S32: 2771 Opc = NVPTX::TEX_UNIFIED_3D_U32_S32_R; 2772 break; 2773 case NVPTXISD::TexUnified3DU32Float: 2774 Opc = NVPTX::TEX_UNIFIED_3D_U32_F32_R; 2775 break; 2776 case NVPTXISD::TexUnified3DU32FloatLevel: 2777 Opc = NVPTX::TEX_UNIFIED_3D_U32_F32_LEVEL_R; 2778 break; 2779 case NVPTXISD::TexUnified3DU32FloatGrad: 2780 Opc = NVPTX::TEX_UNIFIED_3D_U32_F32_GRAD_R; 2781 break; 2782 case NVPTXISD::TexUnifiedCubeFloatFloat: 2783 Opc = NVPTX::TEX_UNIFIED_CUBE_F32_F32_R; 2784 break; 2785 case NVPTXISD::TexUnifiedCubeFloatFloatLevel: 2786 Opc = NVPTX::TEX_UNIFIED_CUBE_F32_F32_LEVEL_R; 2787 break; 2788 case NVPTXISD::TexUnifiedCubeS32Float: 2789 Opc = NVPTX::TEX_UNIFIED_CUBE_S32_F32_R; 2790 break; 2791 case NVPTXISD::TexUnifiedCubeS32FloatLevel: 2792 Opc = NVPTX::TEX_UNIFIED_CUBE_S32_F32_LEVEL_R; 2793 break; 2794 case NVPTXISD::TexUnifiedCubeU32Float: 2795 Opc = NVPTX::TEX_UNIFIED_CUBE_U32_F32_R; 2796 break; 2797 case NVPTXISD::TexUnifiedCubeU32FloatLevel: 2798 Opc = NVPTX::TEX_UNIFIED_CUBE_U32_F32_LEVEL_R; 2799 break; 2800 case NVPTXISD::TexUnifiedCubeArrayFloatFloat: 2801 Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_F32_F32_R; 2802 break; 2803 case NVPTXISD::TexUnifiedCubeArrayFloatFloatLevel: 2804 Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_F32_F32_LEVEL_R; 2805 break; 2806 case NVPTXISD::TexUnifiedCubeArrayS32Float: 2807 Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_S32_F32_R; 2808 break; 2809 case NVPTXISD::TexUnifiedCubeArrayS32FloatLevel: 2810 Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_S32_F32_LEVEL_R; 2811 break; 2812 case NVPTXISD::TexUnifiedCubeArrayU32Float: 2813 Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_U32_F32_R; 2814 break; 2815 case NVPTXISD::TexUnifiedCubeArrayU32FloatLevel: 2816 Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_U32_F32_LEVEL_R; 2817 break; 2818 case NVPTXISD::Tld4UnifiedR2DFloatFloat: 2819 Opc = NVPTX::TLD4_UNIFIED_R_2D_F32_F32_R; 2820 break; 2821 case NVPTXISD::Tld4UnifiedG2DFloatFloat: 2822 Opc = NVPTX::TLD4_UNIFIED_G_2D_F32_F32_R; 2823 break; 2824 case NVPTXISD::Tld4UnifiedB2DFloatFloat: 2825 Opc = NVPTX::TLD4_UNIFIED_B_2D_F32_F32_R; 2826 break; 2827 case NVPTXISD::Tld4UnifiedA2DFloatFloat: 2828 Opc = NVPTX::TLD4_UNIFIED_A_2D_F32_F32_R; 2829 break; 2830 case NVPTXISD::Tld4UnifiedR2DS64Float: 2831 Opc = NVPTX::TLD4_UNIFIED_R_2D_S32_F32_R; 2832 break; 2833 case NVPTXISD::Tld4UnifiedG2DS64Float: 2834 Opc = NVPTX::TLD4_UNIFIED_G_2D_S32_F32_R; 2835 break; 2836 case NVPTXISD::Tld4UnifiedB2DS64Float: 2837 Opc = NVPTX::TLD4_UNIFIED_B_2D_S32_F32_R; 2838 break; 2839 case NVPTXISD::Tld4UnifiedA2DS64Float: 2840 Opc = NVPTX::TLD4_UNIFIED_A_2D_S32_F32_R; 2841 break; 2842 case NVPTXISD::Tld4UnifiedR2DU64Float: 2843 Opc = NVPTX::TLD4_UNIFIED_R_2D_U32_F32_R; 2844 break; 2845 case NVPTXISD::Tld4UnifiedG2DU64Float: 2846 Opc = NVPTX::TLD4_UNIFIED_G_2D_U32_F32_R; 2847 break; 2848 case NVPTXISD::Tld4UnifiedB2DU64Float: 2849 Opc = NVPTX::TLD4_UNIFIED_B_2D_U32_F32_R; 2850 break; 2851 case NVPTXISD::Tld4UnifiedA2DU64Float: 2852 Opc = NVPTX::TLD4_UNIFIED_A_2D_U32_F32_R; 2853 break; 2854 } 2855 2856 // Copy over operands 2857 SmallVector<SDValue, 8> Ops(drop_begin(N->ops())); 2858 Ops.push_back(N->getOperand(0)); // Move chain to the back. 2859 2860 ReplaceNode(N, CurDAG->getMachineNode(Opc, SDLoc(N), N->getVTList(), Ops)); 2861 return true; 2862 } 2863 2864 bool NVPTXDAGToDAGISel::trySurfaceIntrinsic(SDNode *N) { 2865 unsigned Opc = 0; 2866 switch (N->getOpcode()) { 2867 default: return false; 2868 case NVPTXISD::Suld1DI8Clamp: 2869 Opc = NVPTX::SULD_1D_I8_CLAMP_R; 2870 break; 2871 case NVPTXISD::Suld1DI16Clamp: 2872 Opc = NVPTX::SULD_1D_I16_CLAMP_R; 2873 break; 2874 case NVPTXISD::Suld1DI32Clamp: 2875 Opc = NVPTX::SULD_1D_I32_CLAMP_R; 2876 break; 2877 case NVPTXISD::Suld1DI64Clamp: 2878 Opc = NVPTX::SULD_1D_I64_CLAMP_R; 2879 break; 2880 case NVPTXISD::Suld1DV2I8Clamp: 2881 Opc = NVPTX::SULD_1D_V2I8_CLAMP_R; 2882 break; 2883 case NVPTXISD::Suld1DV2I16Clamp: 2884 Opc = NVPTX::SULD_1D_V2I16_CLAMP_R; 2885 break; 2886 case NVPTXISD::Suld1DV2I32Clamp: 2887 Opc = NVPTX::SULD_1D_V2I32_CLAMP_R; 2888 break; 2889 case NVPTXISD::Suld1DV2I64Clamp: 2890 Opc = NVPTX::SULD_1D_V2I64_CLAMP_R; 2891 break; 2892 case NVPTXISD::Suld1DV4I8Clamp: 2893 Opc = NVPTX::SULD_1D_V4I8_CLAMP_R; 2894 break; 2895 case NVPTXISD::Suld1DV4I16Clamp: 2896 Opc = NVPTX::SULD_1D_V4I16_CLAMP_R; 2897 break; 2898 case NVPTXISD::Suld1DV4I32Clamp: 2899 Opc = NVPTX::SULD_1D_V4I32_CLAMP_R; 2900 break; 2901 case NVPTXISD::Suld1DArrayI8Clamp: 2902 Opc = NVPTX::SULD_1D_ARRAY_I8_CLAMP_R; 2903 break; 2904 case NVPTXISD::Suld1DArrayI16Clamp: 2905 Opc = NVPTX::SULD_1D_ARRAY_I16_CLAMP_R; 2906 break; 2907 case NVPTXISD::Suld1DArrayI32Clamp: 2908 Opc = NVPTX::SULD_1D_ARRAY_I32_CLAMP_R; 2909 break; 2910 case NVPTXISD::Suld1DArrayI64Clamp: 2911 Opc = NVPTX::SULD_1D_ARRAY_I64_CLAMP_R; 2912 break; 2913 case NVPTXISD::Suld1DArrayV2I8Clamp: 2914 Opc = NVPTX::SULD_1D_ARRAY_V2I8_CLAMP_R; 2915 break; 2916 case NVPTXISD::Suld1DArrayV2I16Clamp: 2917 Opc = NVPTX::SULD_1D_ARRAY_V2I16_CLAMP_R; 2918 break; 2919 case NVPTXISD::Suld1DArrayV2I32Clamp: 2920 Opc = NVPTX::SULD_1D_ARRAY_V2I32_CLAMP_R; 2921 break; 2922 case NVPTXISD::Suld1DArrayV2I64Clamp: 2923 Opc = NVPTX::SULD_1D_ARRAY_V2I64_CLAMP_R; 2924 break; 2925 case NVPTXISD::Suld1DArrayV4I8Clamp: 2926 Opc = NVPTX::SULD_1D_ARRAY_V4I8_CLAMP_R; 2927 break; 2928 case NVPTXISD::Suld1DArrayV4I16Clamp: 2929 Opc = NVPTX::SULD_1D_ARRAY_V4I16_CLAMP_R; 2930 break; 2931 case NVPTXISD::Suld1DArrayV4I32Clamp: 2932 Opc = NVPTX::SULD_1D_ARRAY_V4I32_CLAMP_R; 2933 break; 2934 case NVPTXISD::Suld2DI8Clamp: 2935 Opc = NVPTX::SULD_2D_I8_CLAMP_R; 2936 break; 2937 case NVPTXISD::Suld2DI16Clamp: 2938 Opc = NVPTX::SULD_2D_I16_CLAMP_R; 2939 break; 2940 case NVPTXISD::Suld2DI32Clamp: 2941 Opc = NVPTX::SULD_2D_I32_CLAMP_R; 2942 break; 2943 case NVPTXISD::Suld2DI64Clamp: 2944 Opc = NVPTX::SULD_2D_I64_CLAMP_R; 2945 break; 2946 case NVPTXISD::Suld2DV2I8Clamp: 2947 Opc = NVPTX::SULD_2D_V2I8_CLAMP_R; 2948 break; 2949 case NVPTXISD::Suld2DV2I16Clamp: 2950 Opc = NVPTX::SULD_2D_V2I16_CLAMP_R; 2951 break; 2952 case NVPTXISD::Suld2DV2I32Clamp: 2953 Opc = NVPTX::SULD_2D_V2I32_CLAMP_R; 2954 break; 2955 case NVPTXISD::Suld2DV2I64Clamp: 2956 Opc = NVPTX::SULD_2D_V2I64_CLAMP_R; 2957 break; 2958 case NVPTXISD::Suld2DV4I8Clamp: 2959 Opc = NVPTX::SULD_2D_V4I8_CLAMP_R; 2960 break; 2961 case NVPTXISD::Suld2DV4I16Clamp: 2962 Opc = NVPTX::SULD_2D_V4I16_CLAMP_R; 2963 break; 2964 case NVPTXISD::Suld2DV4I32Clamp: 2965 Opc = NVPTX::SULD_2D_V4I32_CLAMP_R; 2966 break; 2967 case NVPTXISD::Suld2DArrayI8Clamp: 2968 Opc = NVPTX::SULD_2D_ARRAY_I8_CLAMP_R; 2969 break; 2970 case NVPTXISD::Suld2DArrayI16Clamp: 2971 Opc = NVPTX::SULD_2D_ARRAY_I16_CLAMP_R; 2972 break; 2973 case NVPTXISD::Suld2DArrayI32Clamp: 2974 Opc = NVPTX::SULD_2D_ARRAY_I32_CLAMP_R; 2975 break; 2976 case NVPTXISD::Suld2DArrayI64Clamp: 2977 Opc = NVPTX::SULD_2D_ARRAY_I64_CLAMP_R; 2978 break; 2979 case NVPTXISD::Suld2DArrayV2I8Clamp: 2980 Opc = NVPTX::SULD_2D_ARRAY_V2I8_CLAMP_R; 2981 break; 2982 case NVPTXISD::Suld2DArrayV2I16Clamp: 2983 Opc = NVPTX::SULD_2D_ARRAY_V2I16_CLAMP_R; 2984 break; 2985 case NVPTXISD::Suld2DArrayV2I32Clamp: 2986 Opc = NVPTX::SULD_2D_ARRAY_V2I32_CLAMP_R; 2987 break; 2988 case NVPTXISD::Suld2DArrayV2I64Clamp: 2989 Opc = NVPTX::SULD_2D_ARRAY_V2I64_CLAMP_R; 2990 break; 2991 case NVPTXISD::Suld2DArrayV4I8Clamp: 2992 Opc = NVPTX::SULD_2D_ARRAY_V4I8_CLAMP_R; 2993 break; 2994 case NVPTXISD::Suld2DArrayV4I16Clamp: 2995 Opc = NVPTX::SULD_2D_ARRAY_V4I16_CLAMP_R; 2996 break; 2997 case NVPTXISD::Suld2DArrayV4I32Clamp: 2998 Opc = NVPTX::SULD_2D_ARRAY_V4I32_CLAMP_R; 2999 break; 3000 case NVPTXISD::Suld3DI8Clamp: 3001 Opc = NVPTX::SULD_3D_I8_CLAMP_R; 3002 break; 3003 case NVPTXISD::Suld3DI16Clamp: 3004 Opc = NVPTX::SULD_3D_I16_CLAMP_R; 3005 break; 3006 case NVPTXISD::Suld3DI32Clamp: 3007 Opc = NVPTX::SULD_3D_I32_CLAMP_R; 3008 break; 3009 case NVPTXISD::Suld3DI64Clamp: 3010 Opc = NVPTX::SULD_3D_I64_CLAMP_R; 3011 break; 3012 case NVPTXISD::Suld3DV2I8Clamp: 3013 Opc = NVPTX::SULD_3D_V2I8_CLAMP_R; 3014 break; 3015 case NVPTXISD::Suld3DV2I16Clamp: 3016 Opc = NVPTX::SULD_3D_V2I16_CLAMP_R; 3017 break; 3018 case NVPTXISD::Suld3DV2I32Clamp: 3019 Opc = NVPTX::SULD_3D_V2I32_CLAMP_R; 3020 break; 3021 case NVPTXISD::Suld3DV2I64Clamp: 3022 Opc = NVPTX::SULD_3D_V2I64_CLAMP_R; 3023 break; 3024 case NVPTXISD::Suld3DV4I8Clamp: 3025 Opc = NVPTX::SULD_3D_V4I8_CLAMP_R; 3026 break; 3027 case NVPTXISD::Suld3DV4I16Clamp: 3028 Opc = NVPTX::SULD_3D_V4I16_CLAMP_R; 3029 break; 3030 case NVPTXISD::Suld3DV4I32Clamp: 3031 Opc = NVPTX::SULD_3D_V4I32_CLAMP_R; 3032 break; 3033 case NVPTXISD::Suld1DI8Trap: 3034 Opc = NVPTX::SULD_1D_I8_TRAP_R; 3035 break; 3036 case NVPTXISD::Suld1DI16Trap: 3037 Opc = NVPTX::SULD_1D_I16_TRAP_R; 3038 break; 3039 case NVPTXISD::Suld1DI32Trap: 3040 Opc = NVPTX::SULD_1D_I32_TRAP_R; 3041 break; 3042 case NVPTXISD::Suld1DI64Trap: 3043 Opc = NVPTX::SULD_1D_I64_TRAP_R; 3044 break; 3045 case NVPTXISD::Suld1DV2I8Trap: 3046 Opc = NVPTX::SULD_1D_V2I8_TRAP_R; 3047 break; 3048 case NVPTXISD::Suld1DV2I16Trap: 3049 Opc = NVPTX::SULD_1D_V2I16_TRAP_R; 3050 break; 3051 case NVPTXISD::Suld1DV2I32Trap: 3052 Opc = NVPTX::SULD_1D_V2I32_TRAP_R; 3053 break; 3054 case NVPTXISD::Suld1DV2I64Trap: 3055 Opc = NVPTX::SULD_1D_V2I64_TRAP_R; 3056 break; 3057 case NVPTXISD::Suld1DV4I8Trap: 3058 Opc = NVPTX::SULD_1D_V4I8_TRAP_R; 3059 break; 3060 case NVPTXISD::Suld1DV4I16Trap: 3061 Opc = NVPTX::SULD_1D_V4I16_TRAP_R; 3062 break; 3063 case NVPTXISD::Suld1DV4I32Trap: 3064 Opc = NVPTX::SULD_1D_V4I32_TRAP_R; 3065 break; 3066 case NVPTXISD::Suld1DArrayI8Trap: 3067 Opc = NVPTX::SULD_1D_ARRAY_I8_TRAP_R; 3068 break; 3069 case NVPTXISD::Suld1DArrayI16Trap: 3070 Opc = NVPTX::SULD_1D_ARRAY_I16_TRAP_R; 3071 break; 3072 case NVPTXISD::Suld1DArrayI32Trap: 3073 Opc = NVPTX::SULD_1D_ARRAY_I32_TRAP_R; 3074 break; 3075 case NVPTXISD::Suld1DArrayI64Trap: 3076 Opc = NVPTX::SULD_1D_ARRAY_I64_TRAP_R; 3077 break; 3078 case NVPTXISD::Suld1DArrayV2I8Trap: 3079 Opc = NVPTX::SULD_1D_ARRAY_V2I8_TRAP_R; 3080 break; 3081 case NVPTXISD::Suld1DArrayV2I16Trap: 3082 Opc = NVPTX::SULD_1D_ARRAY_V2I16_TRAP_R; 3083 break; 3084 case NVPTXISD::Suld1DArrayV2I32Trap: 3085 Opc = NVPTX::SULD_1D_ARRAY_V2I32_TRAP_R; 3086 break; 3087 case NVPTXISD::Suld1DArrayV2I64Trap: 3088 Opc = NVPTX::SULD_1D_ARRAY_V2I64_TRAP_R; 3089 break; 3090 case NVPTXISD::Suld1DArrayV4I8Trap: 3091 Opc = NVPTX::SULD_1D_ARRAY_V4I8_TRAP_R; 3092 break; 3093 case NVPTXISD::Suld1DArrayV4I16Trap: 3094 Opc = NVPTX::SULD_1D_ARRAY_V4I16_TRAP_R; 3095 break; 3096 case NVPTXISD::Suld1DArrayV4I32Trap: 3097 Opc = NVPTX::SULD_1D_ARRAY_V4I32_TRAP_R; 3098 break; 3099 case NVPTXISD::Suld2DI8Trap: 3100 Opc = NVPTX::SULD_2D_I8_TRAP_R; 3101 break; 3102 case NVPTXISD::Suld2DI16Trap: 3103 Opc = NVPTX::SULD_2D_I16_TRAP_R; 3104 break; 3105 case NVPTXISD::Suld2DI32Trap: 3106 Opc = NVPTX::SULD_2D_I32_TRAP_R; 3107 break; 3108 case NVPTXISD::Suld2DI64Trap: 3109 Opc = NVPTX::SULD_2D_I64_TRAP_R; 3110 break; 3111 case NVPTXISD::Suld2DV2I8Trap: 3112 Opc = NVPTX::SULD_2D_V2I8_TRAP_R; 3113 break; 3114 case NVPTXISD::Suld2DV2I16Trap: 3115 Opc = NVPTX::SULD_2D_V2I16_TRAP_R; 3116 break; 3117 case NVPTXISD::Suld2DV2I32Trap: 3118 Opc = NVPTX::SULD_2D_V2I32_TRAP_R; 3119 break; 3120 case NVPTXISD::Suld2DV2I64Trap: 3121 Opc = NVPTX::SULD_2D_V2I64_TRAP_R; 3122 break; 3123 case NVPTXISD::Suld2DV4I8Trap: 3124 Opc = NVPTX::SULD_2D_V4I8_TRAP_R; 3125 break; 3126 case NVPTXISD::Suld2DV4I16Trap: 3127 Opc = NVPTX::SULD_2D_V4I16_TRAP_R; 3128 break; 3129 case NVPTXISD::Suld2DV4I32Trap: 3130 Opc = NVPTX::SULD_2D_V4I32_TRAP_R; 3131 break; 3132 case NVPTXISD::Suld2DArrayI8Trap: 3133 Opc = NVPTX::SULD_2D_ARRAY_I8_TRAP_R; 3134 break; 3135 case NVPTXISD::Suld2DArrayI16Trap: 3136 Opc = NVPTX::SULD_2D_ARRAY_I16_TRAP_R; 3137 break; 3138 case NVPTXISD::Suld2DArrayI32Trap: 3139 Opc = NVPTX::SULD_2D_ARRAY_I32_TRAP_R; 3140 break; 3141 case NVPTXISD::Suld2DArrayI64Trap: 3142 Opc = NVPTX::SULD_2D_ARRAY_I64_TRAP_R; 3143 break; 3144 case NVPTXISD::Suld2DArrayV2I8Trap: 3145 Opc = NVPTX::SULD_2D_ARRAY_V2I8_TRAP_R; 3146 break; 3147 case NVPTXISD::Suld2DArrayV2I16Trap: 3148 Opc = NVPTX::SULD_2D_ARRAY_V2I16_TRAP_R; 3149 break; 3150 case NVPTXISD::Suld2DArrayV2I32Trap: 3151 Opc = NVPTX::SULD_2D_ARRAY_V2I32_TRAP_R; 3152 break; 3153 case NVPTXISD::Suld2DArrayV2I64Trap: 3154 Opc = NVPTX::SULD_2D_ARRAY_V2I64_TRAP_R; 3155 break; 3156 case NVPTXISD::Suld2DArrayV4I8Trap: 3157 Opc = NVPTX::SULD_2D_ARRAY_V4I8_TRAP_R; 3158 break; 3159 case NVPTXISD::Suld2DArrayV4I16Trap: 3160 Opc = NVPTX::SULD_2D_ARRAY_V4I16_TRAP_R; 3161 break; 3162 case NVPTXISD::Suld2DArrayV4I32Trap: 3163 Opc = NVPTX::SULD_2D_ARRAY_V4I32_TRAP_R; 3164 break; 3165 case NVPTXISD::Suld3DI8Trap: 3166 Opc = NVPTX::SULD_3D_I8_TRAP_R; 3167 break; 3168 case NVPTXISD::Suld3DI16Trap: 3169 Opc = NVPTX::SULD_3D_I16_TRAP_R; 3170 break; 3171 case NVPTXISD::Suld3DI32Trap: 3172 Opc = NVPTX::SULD_3D_I32_TRAP_R; 3173 break; 3174 case NVPTXISD::Suld3DI64Trap: 3175 Opc = NVPTX::SULD_3D_I64_TRAP_R; 3176 break; 3177 case NVPTXISD::Suld3DV2I8Trap: 3178 Opc = NVPTX::SULD_3D_V2I8_TRAP_R; 3179 break; 3180 case NVPTXISD::Suld3DV2I16Trap: 3181 Opc = NVPTX::SULD_3D_V2I16_TRAP_R; 3182 break; 3183 case NVPTXISD::Suld3DV2I32Trap: 3184 Opc = NVPTX::SULD_3D_V2I32_TRAP_R; 3185 break; 3186 case NVPTXISD::Suld3DV2I64Trap: 3187 Opc = NVPTX::SULD_3D_V2I64_TRAP_R; 3188 break; 3189 case NVPTXISD::Suld3DV4I8Trap: 3190 Opc = NVPTX::SULD_3D_V4I8_TRAP_R; 3191 break; 3192 case NVPTXISD::Suld3DV4I16Trap: 3193 Opc = NVPTX::SULD_3D_V4I16_TRAP_R; 3194 break; 3195 case NVPTXISD::Suld3DV4I32Trap: 3196 Opc = NVPTX::SULD_3D_V4I32_TRAP_R; 3197 break; 3198 case NVPTXISD::Suld1DI8Zero: 3199 Opc = NVPTX::SULD_1D_I8_ZERO_R; 3200 break; 3201 case NVPTXISD::Suld1DI16Zero: 3202 Opc = NVPTX::SULD_1D_I16_ZERO_R; 3203 break; 3204 case NVPTXISD::Suld1DI32Zero: 3205 Opc = NVPTX::SULD_1D_I32_ZERO_R; 3206 break; 3207 case NVPTXISD::Suld1DI64Zero: 3208 Opc = NVPTX::SULD_1D_I64_ZERO_R; 3209 break; 3210 case NVPTXISD::Suld1DV2I8Zero: 3211 Opc = NVPTX::SULD_1D_V2I8_ZERO_R; 3212 break; 3213 case NVPTXISD::Suld1DV2I16Zero: 3214 Opc = NVPTX::SULD_1D_V2I16_ZERO_R; 3215 break; 3216 case NVPTXISD::Suld1DV2I32Zero: 3217 Opc = NVPTX::SULD_1D_V2I32_ZERO_R; 3218 break; 3219 case NVPTXISD::Suld1DV2I64Zero: 3220 Opc = NVPTX::SULD_1D_V2I64_ZERO_R; 3221 break; 3222 case NVPTXISD::Suld1DV4I8Zero: 3223 Opc = NVPTX::SULD_1D_V4I8_ZERO_R; 3224 break; 3225 case NVPTXISD::Suld1DV4I16Zero: 3226 Opc = NVPTX::SULD_1D_V4I16_ZERO_R; 3227 break; 3228 case NVPTXISD::Suld1DV4I32Zero: 3229 Opc = NVPTX::SULD_1D_V4I32_ZERO_R; 3230 break; 3231 case NVPTXISD::Suld1DArrayI8Zero: 3232 Opc = NVPTX::SULD_1D_ARRAY_I8_ZERO_R; 3233 break; 3234 case NVPTXISD::Suld1DArrayI16Zero: 3235 Opc = NVPTX::SULD_1D_ARRAY_I16_ZERO_R; 3236 break; 3237 case NVPTXISD::Suld1DArrayI32Zero: 3238 Opc = NVPTX::SULD_1D_ARRAY_I32_ZERO_R; 3239 break; 3240 case NVPTXISD::Suld1DArrayI64Zero: 3241 Opc = NVPTX::SULD_1D_ARRAY_I64_ZERO_R; 3242 break; 3243 case NVPTXISD::Suld1DArrayV2I8Zero: 3244 Opc = NVPTX::SULD_1D_ARRAY_V2I8_ZERO_R; 3245 break; 3246 case NVPTXISD::Suld1DArrayV2I16Zero: 3247 Opc = NVPTX::SULD_1D_ARRAY_V2I16_ZERO_R; 3248 break; 3249 case NVPTXISD::Suld1DArrayV2I32Zero: 3250 Opc = NVPTX::SULD_1D_ARRAY_V2I32_ZERO_R; 3251 break; 3252 case NVPTXISD::Suld1DArrayV2I64Zero: 3253 Opc = NVPTX::SULD_1D_ARRAY_V2I64_ZERO_R; 3254 break; 3255 case NVPTXISD::Suld1DArrayV4I8Zero: 3256 Opc = NVPTX::SULD_1D_ARRAY_V4I8_ZERO_R; 3257 break; 3258 case NVPTXISD::Suld1DArrayV4I16Zero: 3259 Opc = NVPTX::SULD_1D_ARRAY_V4I16_ZERO_R; 3260 break; 3261 case NVPTXISD::Suld1DArrayV4I32Zero: 3262 Opc = NVPTX::SULD_1D_ARRAY_V4I32_ZERO_R; 3263 break; 3264 case NVPTXISD::Suld2DI8Zero: 3265 Opc = NVPTX::SULD_2D_I8_ZERO_R; 3266 break; 3267 case NVPTXISD::Suld2DI16Zero: 3268 Opc = NVPTX::SULD_2D_I16_ZERO_R; 3269 break; 3270 case NVPTXISD::Suld2DI32Zero: 3271 Opc = NVPTX::SULD_2D_I32_ZERO_R; 3272 break; 3273 case NVPTXISD::Suld2DI64Zero: 3274 Opc = NVPTX::SULD_2D_I64_ZERO_R; 3275 break; 3276 case NVPTXISD::Suld2DV2I8Zero: 3277 Opc = NVPTX::SULD_2D_V2I8_ZERO_R; 3278 break; 3279 case NVPTXISD::Suld2DV2I16Zero: 3280 Opc = NVPTX::SULD_2D_V2I16_ZERO_R; 3281 break; 3282 case NVPTXISD::Suld2DV2I32Zero: 3283 Opc = NVPTX::SULD_2D_V2I32_ZERO_R; 3284 break; 3285 case NVPTXISD::Suld2DV2I64Zero: 3286 Opc = NVPTX::SULD_2D_V2I64_ZERO_R; 3287 break; 3288 case NVPTXISD::Suld2DV4I8Zero: 3289 Opc = NVPTX::SULD_2D_V4I8_ZERO_R; 3290 break; 3291 case NVPTXISD::Suld2DV4I16Zero: 3292 Opc = NVPTX::SULD_2D_V4I16_ZERO_R; 3293 break; 3294 case NVPTXISD::Suld2DV4I32Zero: 3295 Opc = NVPTX::SULD_2D_V4I32_ZERO_R; 3296 break; 3297 case NVPTXISD::Suld2DArrayI8Zero: 3298 Opc = NVPTX::SULD_2D_ARRAY_I8_ZERO_R; 3299 break; 3300 case NVPTXISD::Suld2DArrayI16Zero: 3301 Opc = NVPTX::SULD_2D_ARRAY_I16_ZERO_R; 3302 break; 3303 case NVPTXISD::Suld2DArrayI32Zero: 3304 Opc = NVPTX::SULD_2D_ARRAY_I32_ZERO_R; 3305 break; 3306 case NVPTXISD::Suld2DArrayI64Zero: 3307 Opc = NVPTX::SULD_2D_ARRAY_I64_ZERO_R; 3308 break; 3309 case NVPTXISD::Suld2DArrayV2I8Zero: 3310 Opc = NVPTX::SULD_2D_ARRAY_V2I8_ZERO_R; 3311 break; 3312 case NVPTXISD::Suld2DArrayV2I16Zero: 3313 Opc = NVPTX::SULD_2D_ARRAY_V2I16_ZERO_R; 3314 break; 3315 case NVPTXISD::Suld2DArrayV2I32Zero: 3316 Opc = NVPTX::SULD_2D_ARRAY_V2I32_ZERO_R; 3317 break; 3318 case NVPTXISD::Suld2DArrayV2I64Zero: 3319 Opc = NVPTX::SULD_2D_ARRAY_V2I64_ZERO_R; 3320 break; 3321 case NVPTXISD::Suld2DArrayV4I8Zero: 3322 Opc = NVPTX::SULD_2D_ARRAY_V4I8_ZERO_R; 3323 break; 3324 case NVPTXISD::Suld2DArrayV4I16Zero: 3325 Opc = NVPTX::SULD_2D_ARRAY_V4I16_ZERO_R; 3326 break; 3327 case NVPTXISD::Suld2DArrayV4I32Zero: 3328 Opc = NVPTX::SULD_2D_ARRAY_V4I32_ZERO_R; 3329 break; 3330 case NVPTXISD::Suld3DI8Zero: 3331 Opc = NVPTX::SULD_3D_I8_ZERO_R; 3332 break; 3333 case NVPTXISD::Suld3DI16Zero: 3334 Opc = NVPTX::SULD_3D_I16_ZERO_R; 3335 break; 3336 case NVPTXISD::Suld3DI32Zero: 3337 Opc = NVPTX::SULD_3D_I32_ZERO_R; 3338 break; 3339 case NVPTXISD::Suld3DI64Zero: 3340 Opc = NVPTX::SULD_3D_I64_ZERO_R; 3341 break; 3342 case NVPTXISD::Suld3DV2I8Zero: 3343 Opc = NVPTX::SULD_3D_V2I8_ZERO_R; 3344 break; 3345 case NVPTXISD::Suld3DV2I16Zero: 3346 Opc = NVPTX::SULD_3D_V2I16_ZERO_R; 3347 break; 3348 case NVPTXISD::Suld3DV2I32Zero: 3349 Opc = NVPTX::SULD_3D_V2I32_ZERO_R; 3350 break; 3351 case NVPTXISD::Suld3DV2I64Zero: 3352 Opc = NVPTX::SULD_3D_V2I64_ZERO_R; 3353 break; 3354 case NVPTXISD::Suld3DV4I8Zero: 3355 Opc = NVPTX::SULD_3D_V4I8_ZERO_R; 3356 break; 3357 case NVPTXISD::Suld3DV4I16Zero: 3358 Opc = NVPTX::SULD_3D_V4I16_ZERO_R; 3359 break; 3360 case NVPTXISD::Suld3DV4I32Zero: 3361 Opc = NVPTX::SULD_3D_V4I32_ZERO_R; 3362 break; 3363 } 3364 3365 // Copy over operands 3366 SmallVector<SDValue, 8> Ops(drop_begin(N->ops())); 3367 Ops.push_back(N->getOperand(0)); // Move chain to the back. 3368 3369 ReplaceNode(N, CurDAG->getMachineNode(Opc, SDLoc(N), N->getVTList(), Ops)); 3370 return true; 3371 } 3372 3373 3374 /// SelectBFE - Look for instruction sequences that can be made more efficient 3375 /// by using the 'bfe' (bit-field extract) PTX instruction 3376 bool NVPTXDAGToDAGISel::tryBFE(SDNode *N) { 3377 SDLoc DL(N); 3378 SDValue LHS = N->getOperand(0); 3379 SDValue RHS = N->getOperand(1); 3380 SDValue Len; 3381 SDValue Start; 3382 SDValue Val; 3383 bool IsSigned = false; 3384 3385 if (N->getOpcode() == ISD::AND) { 3386 // Canonicalize the operands 3387 // We want 'and %val, %mask' 3388 if (isa<ConstantSDNode>(LHS) && !isa<ConstantSDNode>(RHS)) { 3389 std::swap(LHS, RHS); 3390 } 3391 3392 ConstantSDNode *Mask = dyn_cast<ConstantSDNode>(RHS); 3393 if (!Mask) { 3394 // We need a constant mask on the RHS of the AND 3395 return false; 3396 } 3397 3398 // Extract the mask bits 3399 uint64_t MaskVal = Mask->getZExtValue(); 3400 if (!isMask_64(MaskVal)) { 3401 // We *could* handle shifted masks here, but doing so would require an 3402 // 'and' operation to fix up the low-order bits so we would trade 3403 // shr+and for bfe+and, which has the same throughput 3404 return false; 3405 } 3406 3407 // How many bits are in our mask? 3408 uint64_t NumBits = countTrailingOnes(MaskVal); 3409 Len = CurDAG->getTargetConstant(NumBits, DL, MVT::i32); 3410 3411 if (LHS.getOpcode() == ISD::SRL || LHS.getOpcode() == ISD::SRA) { 3412 // We have a 'srl/and' pair, extract the effective start bit and length 3413 Val = LHS.getNode()->getOperand(0); 3414 Start = LHS.getNode()->getOperand(1); 3415 ConstantSDNode *StartConst = dyn_cast<ConstantSDNode>(Start); 3416 if (StartConst) { 3417 uint64_t StartVal = StartConst->getZExtValue(); 3418 // How many "good" bits do we have left? "good" is defined here as bits 3419 // that exist in the original value, not shifted in. 3420 uint64_t GoodBits = Start.getValueSizeInBits() - StartVal; 3421 if (NumBits > GoodBits) { 3422 // Do not handle the case where bits have been shifted in. In theory 3423 // we could handle this, but the cost is likely higher than just 3424 // emitting the srl/and pair. 3425 return false; 3426 } 3427 Start = CurDAG->getTargetConstant(StartVal, DL, MVT::i32); 3428 } else { 3429 // Do not handle the case where the shift amount (can be zero if no srl 3430 // was found) is not constant. We could handle this case, but it would 3431 // require run-time logic that would be more expensive than just 3432 // emitting the srl/and pair. 3433 return false; 3434 } 3435 } else { 3436 // Do not handle the case where the LHS of the and is not a shift. While 3437 // it would be trivial to handle this case, it would just transform 3438 // 'and' -> 'bfe', but 'and' has higher-throughput. 3439 return false; 3440 } 3441 } else if (N->getOpcode() == ISD::SRL || N->getOpcode() == ISD::SRA) { 3442 if (LHS->getOpcode() == ISD::AND) { 3443 ConstantSDNode *ShiftCnst = dyn_cast<ConstantSDNode>(RHS); 3444 if (!ShiftCnst) { 3445 // Shift amount must be constant 3446 return false; 3447 } 3448 3449 uint64_t ShiftAmt = ShiftCnst->getZExtValue(); 3450 3451 SDValue AndLHS = LHS->getOperand(0); 3452 SDValue AndRHS = LHS->getOperand(1); 3453 3454 // Canonicalize the AND to have the mask on the RHS 3455 if (isa<ConstantSDNode>(AndLHS)) { 3456 std::swap(AndLHS, AndRHS); 3457 } 3458 3459 ConstantSDNode *MaskCnst = dyn_cast<ConstantSDNode>(AndRHS); 3460 if (!MaskCnst) { 3461 // Mask must be constant 3462 return false; 3463 } 3464 3465 uint64_t MaskVal = MaskCnst->getZExtValue(); 3466 uint64_t NumZeros; 3467 uint64_t NumBits; 3468 if (isMask_64(MaskVal)) { 3469 NumZeros = 0; 3470 // The number of bits in the result bitfield will be the number of 3471 // trailing ones (the AND) minus the number of bits we shift off 3472 NumBits = countTrailingOnes(MaskVal) - ShiftAmt; 3473 } else if (isShiftedMask_64(MaskVal)) { 3474 NumZeros = countTrailingZeros(MaskVal); 3475 unsigned NumOnes = countTrailingOnes(MaskVal >> NumZeros); 3476 // The number of bits in the result bitfield will be the number of 3477 // trailing zeros plus the number of set bits in the mask minus the 3478 // number of bits we shift off 3479 NumBits = NumZeros + NumOnes - ShiftAmt; 3480 } else { 3481 // This is not a mask we can handle 3482 return false; 3483 } 3484 3485 if (ShiftAmt < NumZeros) { 3486 // Handling this case would require extra logic that would make this 3487 // transformation non-profitable 3488 return false; 3489 } 3490 3491 Val = AndLHS; 3492 Start = CurDAG->getTargetConstant(ShiftAmt, DL, MVT::i32); 3493 Len = CurDAG->getTargetConstant(NumBits, DL, MVT::i32); 3494 } else if (LHS->getOpcode() == ISD::SHL) { 3495 // Here, we have a pattern like: 3496 // 3497 // (sra (shl val, NN), MM) 3498 // or 3499 // (srl (shl val, NN), MM) 3500 // 3501 // If MM >= NN, we can efficiently optimize this with bfe 3502 Val = LHS->getOperand(0); 3503 3504 SDValue ShlRHS = LHS->getOperand(1); 3505 ConstantSDNode *ShlCnst = dyn_cast<ConstantSDNode>(ShlRHS); 3506 if (!ShlCnst) { 3507 // Shift amount must be constant 3508 return false; 3509 } 3510 uint64_t InnerShiftAmt = ShlCnst->getZExtValue(); 3511 3512 SDValue ShrRHS = RHS; 3513 ConstantSDNode *ShrCnst = dyn_cast<ConstantSDNode>(ShrRHS); 3514 if (!ShrCnst) { 3515 // Shift amount must be constant 3516 return false; 3517 } 3518 uint64_t OuterShiftAmt = ShrCnst->getZExtValue(); 3519 3520 // To avoid extra codegen and be profitable, we need Outer >= Inner 3521 if (OuterShiftAmt < InnerShiftAmt) { 3522 return false; 3523 } 3524 3525 // If the outer shift is more than the type size, we have no bitfield to 3526 // extract (since we also check that the inner shift is <= the outer shift 3527 // then this also implies that the inner shift is < the type size) 3528 if (OuterShiftAmt >= Val.getValueSizeInBits()) { 3529 return false; 3530 } 3531 3532 Start = CurDAG->getTargetConstant(OuterShiftAmt - InnerShiftAmt, DL, 3533 MVT::i32); 3534 Len = CurDAG->getTargetConstant(Val.getValueSizeInBits() - OuterShiftAmt, 3535 DL, MVT::i32); 3536 3537 if (N->getOpcode() == ISD::SRA) { 3538 // If we have a arithmetic right shift, we need to use the signed bfe 3539 // variant 3540 IsSigned = true; 3541 } 3542 } else { 3543 // No can do... 3544 return false; 3545 } 3546 } else { 3547 // No can do... 3548 return false; 3549 } 3550 3551 3552 unsigned Opc; 3553 // For the BFE operations we form here from "and" and "srl", always use the 3554 // unsigned variants. 3555 if (Val.getValueType() == MVT::i32) { 3556 if (IsSigned) { 3557 Opc = NVPTX::BFE_S32rii; 3558 } else { 3559 Opc = NVPTX::BFE_U32rii; 3560 } 3561 } else if (Val.getValueType() == MVT::i64) { 3562 if (IsSigned) { 3563 Opc = NVPTX::BFE_S64rii; 3564 } else { 3565 Opc = NVPTX::BFE_U64rii; 3566 } 3567 } else { 3568 // We cannot handle this type 3569 return false; 3570 } 3571 3572 SDValue Ops[] = { 3573 Val, Start, Len 3574 }; 3575 3576 ReplaceNode(N, CurDAG->getMachineNode(Opc, DL, N->getVTList(), Ops)); 3577 return true; 3578 } 3579 3580 // SelectDirectAddr - Match a direct address for DAG. 3581 // A direct address could be a globaladdress or externalsymbol. 3582 bool NVPTXDAGToDAGISel::SelectDirectAddr(SDValue N, SDValue &Address) { 3583 // Return true if TGA or ES. 3584 if (N.getOpcode() == ISD::TargetGlobalAddress || 3585 N.getOpcode() == ISD::TargetExternalSymbol) { 3586 Address = N; 3587 return true; 3588 } 3589 if (N.getOpcode() == NVPTXISD::Wrapper) { 3590 Address = N.getOperand(0); 3591 return true; 3592 } 3593 // addrspacecast(MoveParam(arg_symbol) to addrspace(PARAM)) -> arg_symbol 3594 if (AddrSpaceCastSDNode *CastN = dyn_cast<AddrSpaceCastSDNode>(N)) { 3595 if (CastN->getSrcAddressSpace() == ADDRESS_SPACE_GENERIC && 3596 CastN->getDestAddressSpace() == ADDRESS_SPACE_PARAM && 3597 CastN->getOperand(0).getOpcode() == NVPTXISD::MoveParam) 3598 return SelectDirectAddr(CastN->getOperand(0).getOperand(0), Address); 3599 } 3600 return false; 3601 } 3602 3603 // symbol+offset 3604 bool NVPTXDAGToDAGISel::SelectADDRsi_imp( 3605 SDNode *OpNode, SDValue Addr, SDValue &Base, SDValue &Offset, MVT mvt) { 3606 if (Addr.getOpcode() == ISD::ADD) { 3607 if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1))) { 3608 SDValue base = Addr.getOperand(0); 3609 if (SelectDirectAddr(base, Base)) { 3610 Offset = CurDAG->getTargetConstant(CN->getZExtValue(), SDLoc(OpNode), 3611 mvt); 3612 return true; 3613 } 3614 } 3615 } 3616 return false; 3617 } 3618 3619 // symbol+offset 3620 bool NVPTXDAGToDAGISel::SelectADDRsi(SDNode *OpNode, SDValue Addr, 3621 SDValue &Base, SDValue &Offset) { 3622 return SelectADDRsi_imp(OpNode, Addr, Base, Offset, MVT::i32); 3623 } 3624 3625 // symbol+offset 3626 bool NVPTXDAGToDAGISel::SelectADDRsi64(SDNode *OpNode, SDValue Addr, 3627 SDValue &Base, SDValue &Offset) { 3628 return SelectADDRsi_imp(OpNode, Addr, Base, Offset, MVT::i64); 3629 } 3630 3631 // register+offset 3632 bool NVPTXDAGToDAGISel::SelectADDRri_imp( 3633 SDNode *OpNode, SDValue Addr, SDValue &Base, SDValue &Offset, MVT mvt) { 3634 if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) { 3635 Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), mvt); 3636 Offset = CurDAG->getTargetConstant(0, SDLoc(OpNode), mvt); 3637 return true; 3638 } 3639 if (Addr.getOpcode() == ISD::TargetExternalSymbol || 3640 Addr.getOpcode() == ISD::TargetGlobalAddress) 3641 return false; // direct calls. 3642 3643 if (Addr.getOpcode() == ISD::ADD) { 3644 if (SelectDirectAddr(Addr.getOperand(0), Addr)) { 3645 return false; 3646 } 3647 if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1))) { 3648 if (FrameIndexSDNode *FIN = 3649 dyn_cast<FrameIndexSDNode>(Addr.getOperand(0))) 3650 // Constant offset from frame ref. 3651 Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), mvt); 3652 else 3653 Base = Addr.getOperand(0); 3654 Offset = CurDAG->getTargetConstant(CN->getZExtValue(), SDLoc(OpNode), 3655 mvt); 3656 return true; 3657 } 3658 } 3659 return false; 3660 } 3661 3662 // register+offset 3663 bool NVPTXDAGToDAGISel::SelectADDRri(SDNode *OpNode, SDValue Addr, 3664 SDValue &Base, SDValue &Offset) { 3665 return SelectADDRri_imp(OpNode, Addr, Base, Offset, MVT::i32); 3666 } 3667 3668 // register+offset 3669 bool NVPTXDAGToDAGISel::SelectADDRri64(SDNode *OpNode, SDValue Addr, 3670 SDValue &Base, SDValue &Offset) { 3671 return SelectADDRri_imp(OpNode, Addr, Base, Offset, MVT::i64); 3672 } 3673 3674 bool NVPTXDAGToDAGISel::ChkMemSDNodeAddressSpace(SDNode *N, 3675 unsigned int spN) const { 3676 const Value *Src = nullptr; 3677 if (MemSDNode *mN = dyn_cast<MemSDNode>(N)) { 3678 if (spN == 0 && mN->getMemOperand()->getPseudoValue()) 3679 return true; 3680 Src = mN->getMemOperand()->getValue(); 3681 } 3682 if (!Src) 3683 return false; 3684 if (auto *PT = dyn_cast<PointerType>(Src->getType())) 3685 return (PT->getAddressSpace() == spN); 3686 return false; 3687 } 3688 3689 /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for 3690 /// inline asm expressions. 3691 bool NVPTXDAGToDAGISel::SelectInlineAsmMemoryOperand( 3692 const SDValue &Op, unsigned ConstraintID, std::vector<SDValue> &OutOps) { 3693 SDValue Op0, Op1; 3694 switch (ConstraintID) { 3695 default: 3696 return true; 3697 case InlineAsm::Constraint_m: // memory 3698 if (SelectDirectAddr(Op, Op0)) { 3699 OutOps.push_back(Op0); 3700 OutOps.push_back(CurDAG->getTargetConstant(0, SDLoc(Op), MVT::i32)); 3701 return false; 3702 } 3703 if (SelectADDRri(Op.getNode(), Op, Op0, Op1)) { 3704 OutOps.push_back(Op0); 3705 OutOps.push_back(Op1); 3706 return false; 3707 } 3708 break; 3709 } 3710 return true; 3711 } 3712 3713 /// GetConvertOpcode - Returns the CVT_ instruction opcode that implements a 3714 /// conversion from \p SrcTy to \p DestTy. 3715 unsigned NVPTXDAGToDAGISel::GetConvertOpcode(MVT DestTy, MVT SrcTy, 3716 bool IsSigned) { 3717 switch (SrcTy.SimpleTy) { 3718 default: 3719 llvm_unreachable("Unhandled source type"); 3720 case MVT::i8: 3721 switch (DestTy.SimpleTy) { 3722 default: 3723 llvm_unreachable("Unhandled dest type"); 3724 case MVT::i16: 3725 return IsSigned ? NVPTX::CVT_s16_s8 : NVPTX::CVT_u16_u8; 3726 case MVT::i32: 3727 return IsSigned ? NVPTX::CVT_s32_s8 : NVPTX::CVT_u32_u8; 3728 case MVT::i64: 3729 return IsSigned ? NVPTX::CVT_s64_s8 : NVPTX::CVT_u64_u8; 3730 } 3731 case MVT::i16: 3732 switch (DestTy.SimpleTy) { 3733 default: 3734 llvm_unreachable("Unhandled dest type"); 3735 case MVT::i8: 3736 return IsSigned ? NVPTX::CVT_s8_s16 : NVPTX::CVT_u8_u16; 3737 case MVT::i32: 3738 return IsSigned ? NVPTX::CVT_s32_s16 : NVPTX::CVT_u32_u16; 3739 case MVT::i64: 3740 return IsSigned ? NVPTX::CVT_s64_s16 : NVPTX::CVT_u64_u16; 3741 } 3742 case MVT::i32: 3743 switch (DestTy.SimpleTy) { 3744 default: 3745 llvm_unreachable("Unhandled dest type"); 3746 case MVT::i8: 3747 return IsSigned ? NVPTX::CVT_s8_s32 : NVPTX::CVT_u8_u32; 3748 case MVT::i16: 3749 return IsSigned ? NVPTX::CVT_s16_s32 : NVPTX::CVT_u16_u32; 3750 case MVT::i64: 3751 return IsSigned ? NVPTX::CVT_s64_s32 : NVPTX::CVT_u64_u32; 3752 } 3753 case MVT::i64: 3754 switch (DestTy.SimpleTy) { 3755 default: 3756 llvm_unreachable("Unhandled dest type"); 3757 case MVT::i8: 3758 return IsSigned ? NVPTX::CVT_s8_s64 : NVPTX::CVT_u8_u64; 3759 case MVT::i16: 3760 return IsSigned ? NVPTX::CVT_s16_s64 : NVPTX::CVT_u16_u64; 3761 case MVT::i32: 3762 return IsSigned ? NVPTX::CVT_s32_s64 : NVPTX::CVT_u32_u64; 3763 } 3764 } 3765 } 3766