1 //===-- NVPTXISelDAGToDAG.cpp - A dag to dag inst selector for NVPTX ------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file defines an instruction selector for the NVPTX target. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "NVPTXISelDAGToDAG.h" 14 #include "MCTargetDesc/NVPTXBaseInfo.h" 15 #include "NVPTXUtilities.h" 16 #include "llvm/Analysis/ValueTracking.h" 17 #include "llvm/IR/GlobalValue.h" 18 #include "llvm/IR/Instructions.h" 19 #include "llvm/IR/IntrinsicsNVPTX.h" 20 #include "llvm/Support/AtomicOrdering.h" 21 #include "llvm/Support/CommandLine.h" 22 #include "llvm/Support/Debug.h" 23 #include "llvm/Support/ErrorHandling.h" 24 #include "llvm/Support/raw_ostream.h" 25 #include "llvm/Target/TargetIntrinsicInfo.h" 26 27 using namespace llvm; 28 29 #define DEBUG_TYPE "nvptx-isel" 30 31 /// createNVPTXISelDag - This pass converts a legalized DAG into a 32 /// NVPTX-specific DAG, ready for instruction scheduling. 33 FunctionPass *llvm::createNVPTXISelDag(NVPTXTargetMachine &TM, 34 llvm::CodeGenOpt::Level OptLevel) { 35 return new NVPTXDAGToDAGISel(TM, OptLevel); 36 } 37 38 NVPTXDAGToDAGISel::NVPTXDAGToDAGISel(NVPTXTargetMachine &tm, 39 CodeGenOpt::Level OptLevel) 40 : SelectionDAGISel(tm, OptLevel), TM(tm) { 41 doMulWide = (OptLevel > 0); 42 } 43 44 bool NVPTXDAGToDAGISel::runOnMachineFunction(MachineFunction &MF) { 45 Subtarget = &static_cast<const NVPTXSubtarget &>(MF.getSubtarget()); 46 return SelectionDAGISel::runOnMachineFunction(MF); 47 } 48 49 int NVPTXDAGToDAGISel::getDivF32Level() const { 50 return Subtarget->getTargetLowering()->getDivF32Level(); 51 } 52 53 bool NVPTXDAGToDAGISel::usePrecSqrtF32() const { 54 return Subtarget->getTargetLowering()->usePrecSqrtF32(); 55 } 56 57 bool NVPTXDAGToDAGISel::useF32FTZ() const { 58 return Subtarget->getTargetLowering()->useF32FTZ(*MF); 59 } 60 61 bool NVPTXDAGToDAGISel::allowFMA() const { 62 const NVPTXTargetLowering *TL = Subtarget->getTargetLowering(); 63 return TL->allowFMA(*MF, OptLevel); 64 } 65 66 bool NVPTXDAGToDAGISel::allowUnsafeFPMath() const { 67 const NVPTXTargetLowering *TL = Subtarget->getTargetLowering(); 68 return TL->allowUnsafeFPMath(*MF); 69 } 70 71 bool NVPTXDAGToDAGISel::useShortPointers() const { 72 return TM.useShortPointers(); 73 } 74 75 /// Select - Select instructions not customized! Used for 76 /// expanded, promoted and normal instructions. 77 void NVPTXDAGToDAGISel::Select(SDNode *N) { 78 79 if (N->isMachineOpcode()) { 80 N->setNodeId(-1); 81 return; // Already selected. 82 } 83 84 switch (N->getOpcode()) { 85 case ISD::LOAD: 86 case ISD::ATOMIC_LOAD: 87 if (tryLoad(N)) 88 return; 89 break; 90 case ISD::STORE: 91 case ISD::ATOMIC_STORE: 92 if (tryStore(N)) 93 return; 94 break; 95 case ISD::EXTRACT_VECTOR_ELT: 96 if (tryEXTRACT_VECTOR_ELEMENT(N)) 97 return; 98 break; 99 case NVPTXISD::SETP_F16X2: 100 SelectSETP_F16X2(N); 101 return; 102 103 case NVPTXISD::LoadV2: 104 case NVPTXISD::LoadV4: 105 if (tryLoadVector(N)) 106 return; 107 break; 108 case NVPTXISD::LDGV2: 109 case NVPTXISD::LDGV4: 110 case NVPTXISD::LDUV2: 111 case NVPTXISD::LDUV4: 112 if (tryLDGLDU(N)) 113 return; 114 break; 115 case NVPTXISD::StoreV2: 116 case NVPTXISD::StoreV4: 117 if (tryStoreVector(N)) 118 return; 119 break; 120 case NVPTXISD::LoadParam: 121 case NVPTXISD::LoadParamV2: 122 case NVPTXISD::LoadParamV4: 123 if (tryLoadParam(N)) 124 return; 125 break; 126 case NVPTXISD::StoreRetval: 127 case NVPTXISD::StoreRetvalV2: 128 case NVPTXISD::StoreRetvalV4: 129 if (tryStoreRetval(N)) 130 return; 131 break; 132 case NVPTXISD::StoreParam: 133 case NVPTXISD::StoreParamV2: 134 case NVPTXISD::StoreParamV4: 135 case NVPTXISD::StoreParamS32: 136 case NVPTXISD::StoreParamU32: 137 if (tryStoreParam(N)) 138 return; 139 break; 140 case ISD::INTRINSIC_WO_CHAIN: 141 if (tryIntrinsicNoChain(N)) 142 return; 143 break; 144 case ISD::INTRINSIC_W_CHAIN: 145 if (tryIntrinsicChain(N)) 146 return; 147 break; 148 case NVPTXISD::Tex1DFloatS32: 149 case NVPTXISD::Tex1DFloatFloat: 150 case NVPTXISD::Tex1DFloatFloatLevel: 151 case NVPTXISD::Tex1DFloatFloatGrad: 152 case NVPTXISD::Tex1DS32S32: 153 case NVPTXISD::Tex1DS32Float: 154 case NVPTXISD::Tex1DS32FloatLevel: 155 case NVPTXISD::Tex1DS32FloatGrad: 156 case NVPTXISD::Tex1DU32S32: 157 case NVPTXISD::Tex1DU32Float: 158 case NVPTXISD::Tex1DU32FloatLevel: 159 case NVPTXISD::Tex1DU32FloatGrad: 160 case NVPTXISD::Tex1DArrayFloatS32: 161 case NVPTXISD::Tex1DArrayFloatFloat: 162 case NVPTXISD::Tex1DArrayFloatFloatLevel: 163 case NVPTXISD::Tex1DArrayFloatFloatGrad: 164 case NVPTXISD::Tex1DArrayS32S32: 165 case NVPTXISD::Tex1DArrayS32Float: 166 case NVPTXISD::Tex1DArrayS32FloatLevel: 167 case NVPTXISD::Tex1DArrayS32FloatGrad: 168 case NVPTXISD::Tex1DArrayU32S32: 169 case NVPTXISD::Tex1DArrayU32Float: 170 case NVPTXISD::Tex1DArrayU32FloatLevel: 171 case NVPTXISD::Tex1DArrayU32FloatGrad: 172 case NVPTXISD::Tex2DFloatS32: 173 case NVPTXISD::Tex2DFloatFloat: 174 case NVPTXISD::Tex2DFloatFloatLevel: 175 case NVPTXISD::Tex2DFloatFloatGrad: 176 case NVPTXISD::Tex2DS32S32: 177 case NVPTXISD::Tex2DS32Float: 178 case NVPTXISD::Tex2DS32FloatLevel: 179 case NVPTXISD::Tex2DS32FloatGrad: 180 case NVPTXISD::Tex2DU32S32: 181 case NVPTXISD::Tex2DU32Float: 182 case NVPTXISD::Tex2DU32FloatLevel: 183 case NVPTXISD::Tex2DU32FloatGrad: 184 case NVPTXISD::Tex2DArrayFloatS32: 185 case NVPTXISD::Tex2DArrayFloatFloat: 186 case NVPTXISD::Tex2DArrayFloatFloatLevel: 187 case NVPTXISD::Tex2DArrayFloatFloatGrad: 188 case NVPTXISD::Tex2DArrayS32S32: 189 case NVPTXISD::Tex2DArrayS32Float: 190 case NVPTXISD::Tex2DArrayS32FloatLevel: 191 case NVPTXISD::Tex2DArrayS32FloatGrad: 192 case NVPTXISD::Tex2DArrayU32S32: 193 case NVPTXISD::Tex2DArrayU32Float: 194 case NVPTXISD::Tex2DArrayU32FloatLevel: 195 case NVPTXISD::Tex2DArrayU32FloatGrad: 196 case NVPTXISD::Tex3DFloatS32: 197 case NVPTXISD::Tex3DFloatFloat: 198 case NVPTXISD::Tex3DFloatFloatLevel: 199 case NVPTXISD::Tex3DFloatFloatGrad: 200 case NVPTXISD::Tex3DS32S32: 201 case NVPTXISD::Tex3DS32Float: 202 case NVPTXISD::Tex3DS32FloatLevel: 203 case NVPTXISD::Tex3DS32FloatGrad: 204 case NVPTXISD::Tex3DU32S32: 205 case NVPTXISD::Tex3DU32Float: 206 case NVPTXISD::Tex3DU32FloatLevel: 207 case NVPTXISD::Tex3DU32FloatGrad: 208 case NVPTXISD::TexCubeFloatFloat: 209 case NVPTXISD::TexCubeFloatFloatLevel: 210 case NVPTXISD::TexCubeS32Float: 211 case NVPTXISD::TexCubeS32FloatLevel: 212 case NVPTXISD::TexCubeU32Float: 213 case NVPTXISD::TexCubeU32FloatLevel: 214 case NVPTXISD::TexCubeArrayFloatFloat: 215 case NVPTXISD::TexCubeArrayFloatFloatLevel: 216 case NVPTXISD::TexCubeArrayS32Float: 217 case NVPTXISD::TexCubeArrayS32FloatLevel: 218 case NVPTXISD::TexCubeArrayU32Float: 219 case NVPTXISD::TexCubeArrayU32FloatLevel: 220 case NVPTXISD::Tld4R2DFloatFloat: 221 case NVPTXISD::Tld4G2DFloatFloat: 222 case NVPTXISD::Tld4B2DFloatFloat: 223 case NVPTXISD::Tld4A2DFloatFloat: 224 case NVPTXISD::Tld4R2DS64Float: 225 case NVPTXISD::Tld4G2DS64Float: 226 case NVPTXISD::Tld4B2DS64Float: 227 case NVPTXISD::Tld4A2DS64Float: 228 case NVPTXISD::Tld4R2DU64Float: 229 case NVPTXISD::Tld4G2DU64Float: 230 case NVPTXISD::Tld4B2DU64Float: 231 case NVPTXISD::Tld4A2DU64Float: 232 case NVPTXISD::TexUnified1DFloatS32: 233 case NVPTXISD::TexUnified1DFloatFloat: 234 case NVPTXISD::TexUnified1DFloatFloatLevel: 235 case NVPTXISD::TexUnified1DFloatFloatGrad: 236 case NVPTXISD::TexUnified1DS32S32: 237 case NVPTXISD::TexUnified1DS32Float: 238 case NVPTXISD::TexUnified1DS32FloatLevel: 239 case NVPTXISD::TexUnified1DS32FloatGrad: 240 case NVPTXISD::TexUnified1DU32S32: 241 case NVPTXISD::TexUnified1DU32Float: 242 case NVPTXISD::TexUnified1DU32FloatLevel: 243 case NVPTXISD::TexUnified1DU32FloatGrad: 244 case NVPTXISD::TexUnified1DArrayFloatS32: 245 case NVPTXISD::TexUnified1DArrayFloatFloat: 246 case NVPTXISD::TexUnified1DArrayFloatFloatLevel: 247 case NVPTXISD::TexUnified1DArrayFloatFloatGrad: 248 case NVPTXISD::TexUnified1DArrayS32S32: 249 case NVPTXISD::TexUnified1DArrayS32Float: 250 case NVPTXISD::TexUnified1DArrayS32FloatLevel: 251 case NVPTXISD::TexUnified1DArrayS32FloatGrad: 252 case NVPTXISD::TexUnified1DArrayU32S32: 253 case NVPTXISD::TexUnified1DArrayU32Float: 254 case NVPTXISD::TexUnified1DArrayU32FloatLevel: 255 case NVPTXISD::TexUnified1DArrayU32FloatGrad: 256 case NVPTXISD::TexUnified2DFloatS32: 257 case NVPTXISD::TexUnified2DFloatFloat: 258 case NVPTXISD::TexUnified2DFloatFloatLevel: 259 case NVPTXISD::TexUnified2DFloatFloatGrad: 260 case NVPTXISD::TexUnified2DS32S32: 261 case NVPTXISD::TexUnified2DS32Float: 262 case NVPTXISD::TexUnified2DS32FloatLevel: 263 case NVPTXISD::TexUnified2DS32FloatGrad: 264 case NVPTXISD::TexUnified2DU32S32: 265 case NVPTXISD::TexUnified2DU32Float: 266 case NVPTXISD::TexUnified2DU32FloatLevel: 267 case NVPTXISD::TexUnified2DU32FloatGrad: 268 case NVPTXISD::TexUnified2DArrayFloatS32: 269 case NVPTXISD::TexUnified2DArrayFloatFloat: 270 case NVPTXISD::TexUnified2DArrayFloatFloatLevel: 271 case NVPTXISD::TexUnified2DArrayFloatFloatGrad: 272 case NVPTXISD::TexUnified2DArrayS32S32: 273 case NVPTXISD::TexUnified2DArrayS32Float: 274 case NVPTXISD::TexUnified2DArrayS32FloatLevel: 275 case NVPTXISD::TexUnified2DArrayS32FloatGrad: 276 case NVPTXISD::TexUnified2DArrayU32S32: 277 case NVPTXISD::TexUnified2DArrayU32Float: 278 case NVPTXISD::TexUnified2DArrayU32FloatLevel: 279 case NVPTXISD::TexUnified2DArrayU32FloatGrad: 280 case NVPTXISD::TexUnified3DFloatS32: 281 case NVPTXISD::TexUnified3DFloatFloat: 282 case NVPTXISD::TexUnified3DFloatFloatLevel: 283 case NVPTXISD::TexUnified3DFloatFloatGrad: 284 case NVPTXISD::TexUnified3DS32S32: 285 case NVPTXISD::TexUnified3DS32Float: 286 case NVPTXISD::TexUnified3DS32FloatLevel: 287 case NVPTXISD::TexUnified3DS32FloatGrad: 288 case NVPTXISD::TexUnified3DU32S32: 289 case NVPTXISD::TexUnified3DU32Float: 290 case NVPTXISD::TexUnified3DU32FloatLevel: 291 case NVPTXISD::TexUnified3DU32FloatGrad: 292 case NVPTXISD::TexUnifiedCubeFloatFloat: 293 case NVPTXISD::TexUnifiedCubeFloatFloatLevel: 294 case NVPTXISD::TexUnifiedCubeS32Float: 295 case NVPTXISD::TexUnifiedCubeS32FloatLevel: 296 case NVPTXISD::TexUnifiedCubeU32Float: 297 case NVPTXISD::TexUnifiedCubeU32FloatLevel: 298 case NVPTXISD::TexUnifiedCubeArrayFloatFloat: 299 case NVPTXISD::TexUnifiedCubeArrayFloatFloatLevel: 300 case NVPTXISD::TexUnifiedCubeArrayS32Float: 301 case NVPTXISD::TexUnifiedCubeArrayS32FloatLevel: 302 case NVPTXISD::TexUnifiedCubeArrayU32Float: 303 case NVPTXISD::TexUnifiedCubeArrayU32FloatLevel: 304 case NVPTXISD::Tld4UnifiedR2DFloatFloat: 305 case NVPTXISD::Tld4UnifiedG2DFloatFloat: 306 case NVPTXISD::Tld4UnifiedB2DFloatFloat: 307 case NVPTXISD::Tld4UnifiedA2DFloatFloat: 308 case NVPTXISD::Tld4UnifiedR2DS64Float: 309 case NVPTXISD::Tld4UnifiedG2DS64Float: 310 case NVPTXISD::Tld4UnifiedB2DS64Float: 311 case NVPTXISD::Tld4UnifiedA2DS64Float: 312 case NVPTXISD::Tld4UnifiedR2DU64Float: 313 case NVPTXISD::Tld4UnifiedG2DU64Float: 314 case NVPTXISD::Tld4UnifiedB2DU64Float: 315 case NVPTXISD::Tld4UnifiedA2DU64Float: 316 if (tryTextureIntrinsic(N)) 317 return; 318 break; 319 case NVPTXISD::Suld1DI8Clamp: 320 case NVPTXISD::Suld1DI16Clamp: 321 case NVPTXISD::Suld1DI32Clamp: 322 case NVPTXISD::Suld1DI64Clamp: 323 case NVPTXISD::Suld1DV2I8Clamp: 324 case NVPTXISD::Suld1DV2I16Clamp: 325 case NVPTXISD::Suld1DV2I32Clamp: 326 case NVPTXISD::Suld1DV2I64Clamp: 327 case NVPTXISD::Suld1DV4I8Clamp: 328 case NVPTXISD::Suld1DV4I16Clamp: 329 case NVPTXISD::Suld1DV4I32Clamp: 330 case NVPTXISD::Suld1DArrayI8Clamp: 331 case NVPTXISD::Suld1DArrayI16Clamp: 332 case NVPTXISD::Suld1DArrayI32Clamp: 333 case NVPTXISD::Suld1DArrayI64Clamp: 334 case NVPTXISD::Suld1DArrayV2I8Clamp: 335 case NVPTXISD::Suld1DArrayV2I16Clamp: 336 case NVPTXISD::Suld1DArrayV2I32Clamp: 337 case NVPTXISD::Suld1DArrayV2I64Clamp: 338 case NVPTXISD::Suld1DArrayV4I8Clamp: 339 case NVPTXISD::Suld1DArrayV4I16Clamp: 340 case NVPTXISD::Suld1DArrayV4I32Clamp: 341 case NVPTXISD::Suld2DI8Clamp: 342 case NVPTXISD::Suld2DI16Clamp: 343 case NVPTXISD::Suld2DI32Clamp: 344 case NVPTXISD::Suld2DI64Clamp: 345 case NVPTXISD::Suld2DV2I8Clamp: 346 case NVPTXISD::Suld2DV2I16Clamp: 347 case NVPTXISD::Suld2DV2I32Clamp: 348 case NVPTXISD::Suld2DV2I64Clamp: 349 case NVPTXISD::Suld2DV4I8Clamp: 350 case NVPTXISD::Suld2DV4I16Clamp: 351 case NVPTXISD::Suld2DV4I32Clamp: 352 case NVPTXISD::Suld2DArrayI8Clamp: 353 case NVPTXISD::Suld2DArrayI16Clamp: 354 case NVPTXISD::Suld2DArrayI32Clamp: 355 case NVPTXISD::Suld2DArrayI64Clamp: 356 case NVPTXISD::Suld2DArrayV2I8Clamp: 357 case NVPTXISD::Suld2DArrayV2I16Clamp: 358 case NVPTXISD::Suld2DArrayV2I32Clamp: 359 case NVPTXISD::Suld2DArrayV2I64Clamp: 360 case NVPTXISD::Suld2DArrayV4I8Clamp: 361 case NVPTXISD::Suld2DArrayV4I16Clamp: 362 case NVPTXISD::Suld2DArrayV4I32Clamp: 363 case NVPTXISD::Suld3DI8Clamp: 364 case NVPTXISD::Suld3DI16Clamp: 365 case NVPTXISD::Suld3DI32Clamp: 366 case NVPTXISD::Suld3DI64Clamp: 367 case NVPTXISD::Suld3DV2I8Clamp: 368 case NVPTXISD::Suld3DV2I16Clamp: 369 case NVPTXISD::Suld3DV2I32Clamp: 370 case NVPTXISD::Suld3DV2I64Clamp: 371 case NVPTXISD::Suld3DV4I8Clamp: 372 case NVPTXISD::Suld3DV4I16Clamp: 373 case NVPTXISD::Suld3DV4I32Clamp: 374 case NVPTXISD::Suld1DI8Trap: 375 case NVPTXISD::Suld1DI16Trap: 376 case NVPTXISD::Suld1DI32Trap: 377 case NVPTXISD::Suld1DI64Trap: 378 case NVPTXISD::Suld1DV2I8Trap: 379 case NVPTXISD::Suld1DV2I16Trap: 380 case NVPTXISD::Suld1DV2I32Trap: 381 case NVPTXISD::Suld1DV2I64Trap: 382 case NVPTXISD::Suld1DV4I8Trap: 383 case NVPTXISD::Suld1DV4I16Trap: 384 case NVPTXISD::Suld1DV4I32Trap: 385 case NVPTXISD::Suld1DArrayI8Trap: 386 case NVPTXISD::Suld1DArrayI16Trap: 387 case NVPTXISD::Suld1DArrayI32Trap: 388 case NVPTXISD::Suld1DArrayI64Trap: 389 case NVPTXISD::Suld1DArrayV2I8Trap: 390 case NVPTXISD::Suld1DArrayV2I16Trap: 391 case NVPTXISD::Suld1DArrayV2I32Trap: 392 case NVPTXISD::Suld1DArrayV2I64Trap: 393 case NVPTXISD::Suld1DArrayV4I8Trap: 394 case NVPTXISD::Suld1DArrayV4I16Trap: 395 case NVPTXISD::Suld1DArrayV4I32Trap: 396 case NVPTXISD::Suld2DI8Trap: 397 case NVPTXISD::Suld2DI16Trap: 398 case NVPTXISD::Suld2DI32Trap: 399 case NVPTXISD::Suld2DI64Trap: 400 case NVPTXISD::Suld2DV2I8Trap: 401 case NVPTXISD::Suld2DV2I16Trap: 402 case NVPTXISD::Suld2DV2I32Trap: 403 case NVPTXISD::Suld2DV2I64Trap: 404 case NVPTXISD::Suld2DV4I8Trap: 405 case NVPTXISD::Suld2DV4I16Trap: 406 case NVPTXISD::Suld2DV4I32Trap: 407 case NVPTXISD::Suld2DArrayI8Trap: 408 case NVPTXISD::Suld2DArrayI16Trap: 409 case NVPTXISD::Suld2DArrayI32Trap: 410 case NVPTXISD::Suld2DArrayI64Trap: 411 case NVPTXISD::Suld2DArrayV2I8Trap: 412 case NVPTXISD::Suld2DArrayV2I16Trap: 413 case NVPTXISD::Suld2DArrayV2I32Trap: 414 case NVPTXISD::Suld2DArrayV2I64Trap: 415 case NVPTXISD::Suld2DArrayV4I8Trap: 416 case NVPTXISD::Suld2DArrayV4I16Trap: 417 case NVPTXISD::Suld2DArrayV4I32Trap: 418 case NVPTXISD::Suld3DI8Trap: 419 case NVPTXISD::Suld3DI16Trap: 420 case NVPTXISD::Suld3DI32Trap: 421 case NVPTXISD::Suld3DI64Trap: 422 case NVPTXISD::Suld3DV2I8Trap: 423 case NVPTXISD::Suld3DV2I16Trap: 424 case NVPTXISD::Suld3DV2I32Trap: 425 case NVPTXISD::Suld3DV2I64Trap: 426 case NVPTXISD::Suld3DV4I8Trap: 427 case NVPTXISD::Suld3DV4I16Trap: 428 case NVPTXISD::Suld3DV4I32Trap: 429 case NVPTXISD::Suld1DI8Zero: 430 case NVPTXISD::Suld1DI16Zero: 431 case NVPTXISD::Suld1DI32Zero: 432 case NVPTXISD::Suld1DI64Zero: 433 case NVPTXISD::Suld1DV2I8Zero: 434 case NVPTXISD::Suld1DV2I16Zero: 435 case NVPTXISD::Suld1DV2I32Zero: 436 case NVPTXISD::Suld1DV2I64Zero: 437 case NVPTXISD::Suld1DV4I8Zero: 438 case NVPTXISD::Suld1DV4I16Zero: 439 case NVPTXISD::Suld1DV4I32Zero: 440 case NVPTXISD::Suld1DArrayI8Zero: 441 case NVPTXISD::Suld1DArrayI16Zero: 442 case NVPTXISD::Suld1DArrayI32Zero: 443 case NVPTXISD::Suld1DArrayI64Zero: 444 case NVPTXISD::Suld1DArrayV2I8Zero: 445 case NVPTXISD::Suld1DArrayV2I16Zero: 446 case NVPTXISD::Suld1DArrayV2I32Zero: 447 case NVPTXISD::Suld1DArrayV2I64Zero: 448 case NVPTXISD::Suld1DArrayV4I8Zero: 449 case NVPTXISD::Suld1DArrayV4I16Zero: 450 case NVPTXISD::Suld1DArrayV4I32Zero: 451 case NVPTXISD::Suld2DI8Zero: 452 case NVPTXISD::Suld2DI16Zero: 453 case NVPTXISD::Suld2DI32Zero: 454 case NVPTXISD::Suld2DI64Zero: 455 case NVPTXISD::Suld2DV2I8Zero: 456 case NVPTXISD::Suld2DV2I16Zero: 457 case NVPTXISD::Suld2DV2I32Zero: 458 case NVPTXISD::Suld2DV2I64Zero: 459 case NVPTXISD::Suld2DV4I8Zero: 460 case NVPTXISD::Suld2DV4I16Zero: 461 case NVPTXISD::Suld2DV4I32Zero: 462 case NVPTXISD::Suld2DArrayI8Zero: 463 case NVPTXISD::Suld2DArrayI16Zero: 464 case NVPTXISD::Suld2DArrayI32Zero: 465 case NVPTXISD::Suld2DArrayI64Zero: 466 case NVPTXISD::Suld2DArrayV2I8Zero: 467 case NVPTXISD::Suld2DArrayV2I16Zero: 468 case NVPTXISD::Suld2DArrayV2I32Zero: 469 case NVPTXISD::Suld2DArrayV2I64Zero: 470 case NVPTXISD::Suld2DArrayV4I8Zero: 471 case NVPTXISD::Suld2DArrayV4I16Zero: 472 case NVPTXISD::Suld2DArrayV4I32Zero: 473 case NVPTXISD::Suld3DI8Zero: 474 case NVPTXISD::Suld3DI16Zero: 475 case NVPTXISD::Suld3DI32Zero: 476 case NVPTXISD::Suld3DI64Zero: 477 case NVPTXISD::Suld3DV2I8Zero: 478 case NVPTXISD::Suld3DV2I16Zero: 479 case NVPTXISD::Suld3DV2I32Zero: 480 case NVPTXISD::Suld3DV2I64Zero: 481 case NVPTXISD::Suld3DV4I8Zero: 482 case NVPTXISD::Suld3DV4I16Zero: 483 case NVPTXISD::Suld3DV4I32Zero: 484 if (trySurfaceIntrinsic(N)) 485 return; 486 break; 487 case ISD::AND: 488 case ISD::SRA: 489 case ISD::SRL: 490 // Try to select BFE 491 if (tryBFE(N)) 492 return; 493 break; 494 case ISD::ADDRSPACECAST: 495 SelectAddrSpaceCast(N); 496 return; 497 case ISD::ConstantFP: 498 if (tryConstantFP16(N)) 499 return; 500 break; 501 default: 502 break; 503 } 504 SelectCode(N); 505 } 506 507 bool NVPTXDAGToDAGISel::tryIntrinsicChain(SDNode *N) { 508 unsigned IID = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue(); 509 switch (IID) { 510 default: 511 return false; 512 case Intrinsic::nvvm_ldg_global_f: 513 case Intrinsic::nvvm_ldg_global_i: 514 case Intrinsic::nvvm_ldg_global_p: 515 case Intrinsic::nvvm_ldu_global_f: 516 case Intrinsic::nvvm_ldu_global_i: 517 case Intrinsic::nvvm_ldu_global_p: 518 return tryLDGLDU(N); 519 } 520 } 521 522 // There's no way to specify FP16 immediates in .f16 ops, so we have to 523 // load them into an .f16 register first. 524 bool NVPTXDAGToDAGISel::tryConstantFP16(SDNode *N) { 525 if (N->getValueType(0) != MVT::f16) 526 return false; 527 SDValue Val = CurDAG->getTargetConstantFP( 528 cast<ConstantFPSDNode>(N)->getValueAPF(), SDLoc(N), MVT::f16); 529 SDNode *LoadConstF16 = 530 CurDAG->getMachineNode(NVPTX::LOAD_CONST_F16, SDLoc(N), MVT::f16, Val); 531 ReplaceNode(N, LoadConstF16); 532 return true; 533 } 534 535 // Map ISD:CONDCODE value to appropriate CmpMode expected by 536 // NVPTXInstPrinter::printCmpMode() 537 static unsigned getPTXCmpMode(const CondCodeSDNode &CondCode, bool FTZ) { 538 using NVPTX::PTXCmpMode::CmpMode; 539 unsigned PTXCmpMode = [](ISD::CondCode CC) { 540 switch (CC) { 541 default: 542 llvm_unreachable("Unexpected condition code."); 543 case ISD::SETOEQ: 544 return CmpMode::EQ; 545 case ISD::SETOGT: 546 return CmpMode::GT; 547 case ISD::SETOGE: 548 return CmpMode::GE; 549 case ISD::SETOLT: 550 return CmpMode::LT; 551 case ISD::SETOLE: 552 return CmpMode::LE; 553 case ISD::SETONE: 554 return CmpMode::NE; 555 case ISD::SETO: 556 return CmpMode::NUM; 557 case ISD::SETUO: 558 return CmpMode::NotANumber; 559 case ISD::SETUEQ: 560 return CmpMode::EQU; 561 case ISD::SETUGT: 562 return CmpMode::GTU; 563 case ISD::SETUGE: 564 return CmpMode::GEU; 565 case ISD::SETULT: 566 return CmpMode::LTU; 567 case ISD::SETULE: 568 return CmpMode::LEU; 569 case ISD::SETUNE: 570 return CmpMode::NEU; 571 case ISD::SETEQ: 572 return CmpMode::EQ; 573 case ISD::SETGT: 574 return CmpMode::GT; 575 case ISD::SETGE: 576 return CmpMode::GE; 577 case ISD::SETLT: 578 return CmpMode::LT; 579 case ISD::SETLE: 580 return CmpMode::LE; 581 case ISD::SETNE: 582 return CmpMode::NE; 583 } 584 }(CondCode.get()); 585 586 if (FTZ) 587 PTXCmpMode |= NVPTX::PTXCmpMode::FTZ_FLAG; 588 589 return PTXCmpMode; 590 } 591 592 bool NVPTXDAGToDAGISel::SelectSETP_F16X2(SDNode *N) { 593 unsigned PTXCmpMode = 594 getPTXCmpMode(*cast<CondCodeSDNode>(N->getOperand(2)), useF32FTZ()); 595 SDLoc DL(N); 596 SDNode *SetP = CurDAG->getMachineNode( 597 NVPTX::SETP_f16x2rr, DL, MVT::i1, MVT::i1, N->getOperand(0), 598 N->getOperand(1), CurDAG->getTargetConstant(PTXCmpMode, DL, MVT::i32)); 599 ReplaceNode(N, SetP); 600 return true; 601 } 602 603 // Find all instances of extract_vector_elt that use this v2f16 vector 604 // and coalesce them into a scattering move instruction. 605 bool NVPTXDAGToDAGISel::tryEXTRACT_VECTOR_ELEMENT(SDNode *N) { 606 SDValue Vector = N->getOperand(0); 607 608 // We only care about f16x2 as it's the only real vector type we 609 // need to deal with. 610 if (Vector.getSimpleValueType() != MVT::v2f16) 611 return false; 612 613 // Find and record all uses of this vector that extract element 0 or 1. 614 SmallVector<SDNode *, 4> E0, E1; 615 for (auto U : Vector.getNode()->uses()) { 616 if (U->getOpcode() != ISD::EXTRACT_VECTOR_ELT) 617 continue; 618 if (U->getOperand(0) != Vector) 619 continue; 620 if (const ConstantSDNode *IdxConst = 621 dyn_cast<ConstantSDNode>(U->getOperand(1))) { 622 if (IdxConst->getZExtValue() == 0) 623 E0.push_back(U); 624 else if (IdxConst->getZExtValue() == 1) 625 E1.push_back(U); 626 else 627 llvm_unreachable("Invalid vector index."); 628 } 629 } 630 631 // There's no point scattering f16x2 if we only ever access one 632 // element of it. 633 if (E0.empty() || E1.empty()) 634 return false; 635 636 unsigned Op = NVPTX::SplitF16x2; 637 // If the vector has been BITCAST'ed from i32, we can use original 638 // value directly and avoid register-to-register move. 639 SDValue Source = Vector; 640 if (Vector->getOpcode() == ISD::BITCAST) { 641 Op = NVPTX::SplitI32toF16x2; 642 Source = Vector->getOperand(0); 643 } 644 // Merge (f16 extractelt(V, 0), f16 extractelt(V,1)) 645 // into f16,f16 SplitF16x2(V) 646 SDNode *ScatterOp = 647 CurDAG->getMachineNode(Op, SDLoc(N), MVT::f16, MVT::f16, Source); 648 for (auto *Node : E0) 649 ReplaceUses(SDValue(Node, 0), SDValue(ScatterOp, 0)); 650 for (auto *Node : E1) 651 ReplaceUses(SDValue(Node, 0), SDValue(ScatterOp, 1)); 652 653 return true; 654 } 655 656 static unsigned int getCodeAddrSpace(MemSDNode *N) { 657 const Value *Src = N->getMemOperand()->getValue(); 658 659 if (!Src) 660 return NVPTX::PTXLdStInstCode::GENERIC; 661 662 if (auto *PT = dyn_cast<PointerType>(Src->getType())) { 663 switch (PT->getAddressSpace()) { 664 case llvm::ADDRESS_SPACE_LOCAL: return NVPTX::PTXLdStInstCode::LOCAL; 665 case llvm::ADDRESS_SPACE_GLOBAL: return NVPTX::PTXLdStInstCode::GLOBAL; 666 case llvm::ADDRESS_SPACE_SHARED: return NVPTX::PTXLdStInstCode::SHARED; 667 case llvm::ADDRESS_SPACE_GENERIC: return NVPTX::PTXLdStInstCode::GENERIC; 668 case llvm::ADDRESS_SPACE_PARAM: return NVPTX::PTXLdStInstCode::PARAM; 669 case llvm::ADDRESS_SPACE_CONST: return NVPTX::PTXLdStInstCode::CONSTANT; 670 default: break; 671 } 672 } 673 return NVPTX::PTXLdStInstCode::GENERIC; 674 } 675 676 static bool canLowerToLDG(MemSDNode *N, const NVPTXSubtarget &Subtarget, 677 unsigned CodeAddrSpace, MachineFunction *F) { 678 // We use ldg (i.e. ld.global.nc) for invariant loads from the global address 679 // space. 680 // 681 // We have two ways of identifying invariant loads: Loads may be explicitly 682 // marked as invariant, or we may infer them to be invariant. 683 // 684 // We currently infer invariance for loads from 685 // - constant global variables, and 686 // - kernel function pointer params that are noalias (i.e. __restrict) and 687 // never written to. 688 // 689 // TODO: Perform a more powerful invariance analysis (ideally IPO, and ideally 690 // not during the SelectionDAG phase). 691 // 692 // TODO: Infer invariance only at -O2. We still want to use ldg at -O0 for 693 // explicitly invariant loads because these are how clang tells us to use ldg 694 // when the user uses a builtin. 695 if (!Subtarget.hasLDG() || CodeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL) 696 return false; 697 698 if (N->isInvariant()) 699 return true; 700 701 bool IsKernelFn = isKernelFunction(F->getFunction()); 702 703 // We use GetUnderlyingObjects() here instead of GetUnderlyingObject() mainly 704 // because the former looks through phi nodes while the latter does not. We 705 // need to look through phi nodes to handle pointer induction variables. 706 SmallVector<const Value *, 8> Objs; 707 GetUnderlyingObjects(N->getMemOperand()->getValue(), 708 Objs, F->getDataLayout()); 709 710 return all_of(Objs, [&](const Value *V) { 711 if (auto *A = dyn_cast<const Argument>(V)) 712 return IsKernelFn && A->onlyReadsMemory() && A->hasNoAliasAttr(); 713 if (auto *GV = dyn_cast<const GlobalVariable>(V)) 714 return GV->isConstant(); 715 return false; 716 }); 717 } 718 719 bool NVPTXDAGToDAGISel::tryIntrinsicNoChain(SDNode *N) { 720 unsigned IID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue(); 721 switch (IID) { 722 default: 723 return false; 724 case Intrinsic::nvvm_texsurf_handle_internal: 725 SelectTexSurfHandle(N); 726 return true; 727 } 728 } 729 730 void NVPTXDAGToDAGISel::SelectTexSurfHandle(SDNode *N) { 731 // Op 0 is the intrinsic ID 732 SDValue Wrapper = N->getOperand(1); 733 SDValue GlobalVal = Wrapper.getOperand(0); 734 ReplaceNode(N, CurDAG->getMachineNode(NVPTX::texsurf_handles, SDLoc(N), 735 MVT::i64, GlobalVal)); 736 } 737 738 void NVPTXDAGToDAGISel::SelectAddrSpaceCast(SDNode *N) { 739 SDValue Src = N->getOperand(0); 740 AddrSpaceCastSDNode *CastN = cast<AddrSpaceCastSDNode>(N); 741 unsigned SrcAddrSpace = CastN->getSrcAddressSpace(); 742 unsigned DstAddrSpace = CastN->getDestAddressSpace(); 743 assert(SrcAddrSpace != DstAddrSpace && 744 "addrspacecast must be between different address spaces"); 745 746 if (DstAddrSpace == ADDRESS_SPACE_GENERIC) { 747 // Specific to generic 748 unsigned Opc; 749 switch (SrcAddrSpace) { 750 default: report_fatal_error("Bad address space in addrspacecast"); 751 case ADDRESS_SPACE_GLOBAL: 752 Opc = TM.is64Bit() ? NVPTX::cvta_global_yes_64 : NVPTX::cvta_global_yes; 753 break; 754 case ADDRESS_SPACE_SHARED: 755 Opc = TM.is64Bit() ? (useShortPointers() ? NVPTX::cvta_shared_yes_6432 756 : NVPTX::cvta_shared_yes_64) 757 : NVPTX::cvta_shared_yes; 758 break; 759 case ADDRESS_SPACE_CONST: 760 Opc = TM.is64Bit() ? (useShortPointers() ? NVPTX::cvta_const_yes_6432 761 : NVPTX::cvta_const_yes_64) 762 : NVPTX::cvta_const_yes; 763 break; 764 case ADDRESS_SPACE_LOCAL: 765 Opc = TM.is64Bit() ? (useShortPointers() ? NVPTX::cvta_local_yes_6432 766 : NVPTX::cvta_local_yes_64) 767 : NVPTX::cvta_local_yes; 768 break; 769 } 770 ReplaceNode(N, CurDAG->getMachineNode(Opc, SDLoc(N), N->getValueType(0), 771 Src)); 772 return; 773 } else { 774 // Generic to specific 775 if (SrcAddrSpace != 0) 776 report_fatal_error("Cannot cast between two non-generic address spaces"); 777 unsigned Opc; 778 switch (DstAddrSpace) { 779 default: report_fatal_error("Bad address space in addrspacecast"); 780 case ADDRESS_SPACE_GLOBAL: 781 Opc = TM.is64Bit() ? NVPTX::cvta_to_global_yes_64 782 : NVPTX::cvta_to_global_yes; 783 break; 784 case ADDRESS_SPACE_SHARED: 785 Opc = TM.is64Bit() ? (useShortPointers() ? NVPTX::cvta_to_shared_yes_3264 786 : NVPTX::cvta_to_shared_yes_64) 787 : NVPTX::cvta_to_shared_yes; 788 break; 789 case ADDRESS_SPACE_CONST: 790 Opc = TM.is64Bit() ? (useShortPointers() ? NVPTX::cvta_to_const_yes_3264 791 : NVPTX::cvta_to_const_yes_64) 792 : NVPTX::cvta_to_const_yes; 793 break; 794 case ADDRESS_SPACE_LOCAL: 795 Opc = TM.is64Bit() ? (useShortPointers() ? NVPTX::cvta_to_local_yes_3264 796 : NVPTX::cvta_to_local_yes_64) 797 : NVPTX::cvta_to_local_yes; 798 break; 799 case ADDRESS_SPACE_PARAM: 800 Opc = TM.is64Bit() ? NVPTX::nvvm_ptr_gen_to_param_64 801 : NVPTX::nvvm_ptr_gen_to_param; 802 break; 803 } 804 ReplaceNode(N, CurDAG->getMachineNode(Opc, SDLoc(N), N->getValueType(0), 805 Src)); 806 return; 807 } 808 } 809 810 // Helper function template to reduce amount of boilerplate code for 811 // opcode selection. 812 static Optional<unsigned> pickOpcodeForVT( 813 MVT::SimpleValueType VT, unsigned Opcode_i8, unsigned Opcode_i16, 814 unsigned Opcode_i32, Optional<unsigned> Opcode_i64, unsigned Opcode_f16, 815 unsigned Opcode_f16x2, unsigned Opcode_f32, Optional<unsigned> Opcode_f64) { 816 switch (VT) { 817 case MVT::i1: 818 case MVT::i8: 819 return Opcode_i8; 820 case MVT::i16: 821 return Opcode_i16; 822 case MVT::i32: 823 return Opcode_i32; 824 case MVT::i64: 825 return Opcode_i64; 826 case MVT::f16: 827 return Opcode_f16; 828 case MVT::v2f16: 829 return Opcode_f16x2; 830 case MVT::f32: 831 return Opcode_f32; 832 case MVT::f64: 833 return Opcode_f64; 834 default: 835 return None; 836 } 837 } 838 839 bool NVPTXDAGToDAGISel::tryLoad(SDNode *N) { 840 SDLoc dl(N); 841 MemSDNode *LD = cast<MemSDNode>(N); 842 assert(LD->readMem() && "Expected load"); 843 LoadSDNode *PlainLoad = dyn_cast<LoadSDNode>(N); 844 EVT LoadedVT = LD->getMemoryVT(); 845 SDNode *NVPTXLD = nullptr; 846 847 // do not support pre/post inc/dec 848 if (PlainLoad && PlainLoad->isIndexed()) 849 return false; 850 851 if (!LoadedVT.isSimple()) 852 return false; 853 854 AtomicOrdering Ordering = LD->getOrdering(); 855 // In order to lower atomic loads with stronger guarantees we would need to 856 // use load.acquire or insert fences. However these features were only added 857 // with PTX ISA 6.0 / sm_70. 858 // TODO: Check if we can actually use the new instructions and implement them. 859 if (isStrongerThanMonotonic(Ordering)) 860 return false; 861 862 // Address Space Setting 863 unsigned int CodeAddrSpace = getCodeAddrSpace(LD); 864 if (canLowerToLDG(LD, *Subtarget, CodeAddrSpace, MF)) { 865 return tryLDGLDU(N); 866 } 867 868 unsigned int PointerSize = 869 CurDAG->getDataLayout().getPointerSizeInBits(LD->getAddressSpace()); 870 871 // Volatile Setting 872 // - .volatile is only available for .global and .shared 873 // - .volatile has the same memory synchronization semantics as .relaxed.sys 874 bool isVolatile = LD->isVolatile() || Ordering == AtomicOrdering::Monotonic; 875 if (CodeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL && 876 CodeAddrSpace != NVPTX::PTXLdStInstCode::SHARED && 877 CodeAddrSpace != NVPTX::PTXLdStInstCode::GENERIC) 878 isVolatile = false; 879 880 // Type Setting: fromType + fromTypeWidth 881 // 882 // Sign : ISD::SEXTLOAD 883 // Unsign : ISD::ZEXTLOAD, ISD::NON_EXTLOAD or ISD::EXTLOAD and the 884 // type is integer 885 // Float : ISD::NON_EXTLOAD or ISD::EXTLOAD and the type is float 886 MVT SimpleVT = LoadedVT.getSimpleVT(); 887 MVT ScalarVT = SimpleVT.getScalarType(); 888 // Read at least 8 bits (predicates are stored as 8-bit values) 889 unsigned fromTypeWidth = std::max(8U, (unsigned)ScalarVT.getSizeInBits()); 890 unsigned int fromType; 891 892 // Vector Setting 893 unsigned vecType = NVPTX::PTXLdStInstCode::Scalar; 894 if (SimpleVT.isVector()) { 895 assert(LoadedVT == MVT::v2f16 && "Unexpected vector type"); 896 // v2f16 is loaded using ld.b32 897 fromTypeWidth = 32; 898 } 899 900 if (PlainLoad && (PlainLoad->getExtensionType() == ISD::SEXTLOAD)) 901 fromType = NVPTX::PTXLdStInstCode::Signed; 902 else if (ScalarVT.isFloatingPoint()) 903 // f16 uses .b16 as its storage type. 904 fromType = ScalarVT.SimpleTy == MVT::f16 ? NVPTX::PTXLdStInstCode::Untyped 905 : NVPTX::PTXLdStInstCode::Float; 906 else 907 fromType = NVPTX::PTXLdStInstCode::Unsigned; 908 909 // Create the machine instruction DAG 910 SDValue Chain = N->getOperand(0); 911 SDValue N1 = N->getOperand(1); 912 SDValue Addr; 913 SDValue Offset, Base; 914 Optional<unsigned> Opcode; 915 MVT::SimpleValueType TargetVT = LD->getSimpleValueType(0).SimpleTy; 916 917 if (SelectDirectAddr(N1, Addr)) { 918 Opcode = pickOpcodeForVT( 919 TargetVT, NVPTX::LD_i8_avar, NVPTX::LD_i16_avar, NVPTX::LD_i32_avar, 920 NVPTX::LD_i64_avar, NVPTX::LD_f16_avar, NVPTX::LD_f16x2_avar, 921 NVPTX::LD_f32_avar, NVPTX::LD_f64_avar); 922 if (!Opcode) 923 return false; 924 SDValue Ops[] = { getI32Imm(isVolatile, dl), getI32Imm(CodeAddrSpace, dl), 925 getI32Imm(vecType, dl), getI32Imm(fromType, dl), 926 getI32Imm(fromTypeWidth, dl), Addr, Chain }; 927 NVPTXLD = CurDAG->getMachineNode(Opcode.getValue(), dl, TargetVT, 928 MVT::Other, Ops); 929 } else if (PointerSize == 64 ? SelectADDRsi64(N1.getNode(), N1, Base, Offset) 930 : SelectADDRsi(N1.getNode(), N1, Base, Offset)) { 931 Opcode = pickOpcodeForVT(TargetVT, NVPTX::LD_i8_asi, NVPTX::LD_i16_asi, 932 NVPTX::LD_i32_asi, NVPTX::LD_i64_asi, 933 NVPTX::LD_f16_asi, NVPTX::LD_f16x2_asi, 934 NVPTX::LD_f32_asi, NVPTX::LD_f64_asi); 935 if (!Opcode) 936 return false; 937 SDValue Ops[] = { getI32Imm(isVolatile, dl), getI32Imm(CodeAddrSpace, dl), 938 getI32Imm(vecType, dl), getI32Imm(fromType, dl), 939 getI32Imm(fromTypeWidth, dl), Base, Offset, Chain }; 940 NVPTXLD = CurDAG->getMachineNode(Opcode.getValue(), dl, TargetVT, 941 MVT::Other, Ops); 942 } else if (PointerSize == 64 ? SelectADDRri64(N1.getNode(), N1, Base, Offset) 943 : SelectADDRri(N1.getNode(), N1, Base, Offset)) { 944 if (PointerSize == 64) 945 Opcode = pickOpcodeForVT( 946 TargetVT, NVPTX::LD_i8_ari_64, NVPTX::LD_i16_ari_64, 947 NVPTX::LD_i32_ari_64, NVPTX::LD_i64_ari_64, NVPTX::LD_f16_ari_64, 948 NVPTX::LD_f16x2_ari_64, NVPTX::LD_f32_ari_64, NVPTX::LD_f64_ari_64); 949 else 950 Opcode = pickOpcodeForVT( 951 TargetVT, NVPTX::LD_i8_ari, NVPTX::LD_i16_ari, NVPTX::LD_i32_ari, 952 NVPTX::LD_i64_ari, NVPTX::LD_f16_ari, NVPTX::LD_f16x2_ari, 953 NVPTX::LD_f32_ari, NVPTX::LD_f64_ari); 954 if (!Opcode) 955 return false; 956 SDValue Ops[] = { getI32Imm(isVolatile, dl), getI32Imm(CodeAddrSpace, dl), 957 getI32Imm(vecType, dl), getI32Imm(fromType, dl), 958 getI32Imm(fromTypeWidth, dl), Base, Offset, Chain }; 959 NVPTXLD = CurDAG->getMachineNode(Opcode.getValue(), dl, TargetVT, 960 MVT::Other, Ops); 961 } else { 962 if (PointerSize == 64) 963 Opcode = pickOpcodeForVT( 964 TargetVT, NVPTX::LD_i8_areg_64, NVPTX::LD_i16_areg_64, 965 NVPTX::LD_i32_areg_64, NVPTX::LD_i64_areg_64, NVPTX::LD_f16_areg_64, 966 NVPTX::LD_f16x2_areg_64, NVPTX::LD_f32_areg_64, 967 NVPTX::LD_f64_areg_64); 968 else 969 Opcode = pickOpcodeForVT( 970 TargetVT, NVPTX::LD_i8_areg, NVPTX::LD_i16_areg, NVPTX::LD_i32_areg, 971 NVPTX::LD_i64_areg, NVPTX::LD_f16_areg, NVPTX::LD_f16x2_areg, 972 NVPTX::LD_f32_areg, NVPTX::LD_f64_areg); 973 if (!Opcode) 974 return false; 975 SDValue Ops[] = { getI32Imm(isVolatile, dl), getI32Imm(CodeAddrSpace, dl), 976 getI32Imm(vecType, dl), getI32Imm(fromType, dl), 977 getI32Imm(fromTypeWidth, dl), N1, Chain }; 978 NVPTXLD = CurDAG->getMachineNode(Opcode.getValue(), dl, TargetVT, 979 MVT::Other, Ops); 980 } 981 982 if (!NVPTXLD) 983 return false; 984 985 MachineMemOperand *MemRef = cast<MemSDNode>(N)->getMemOperand(); 986 CurDAG->setNodeMemRefs(cast<MachineSDNode>(NVPTXLD), {MemRef}); 987 988 ReplaceNode(N, NVPTXLD); 989 return true; 990 } 991 992 bool NVPTXDAGToDAGISel::tryLoadVector(SDNode *N) { 993 994 SDValue Chain = N->getOperand(0); 995 SDValue Op1 = N->getOperand(1); 996 SDValue Addr, Offset, Base; 997 Optional<unsigned> Opcode; 998 SDLoc DL(N); 999 SDNode *LD; 1000 MemSDNode *MemSD = cast<MemSDNode>(N); 1001 EVT LoadedVT = MemSD->getMemoryVT(); 1002 1003 if (!LoadedVT.isSimple()) 1004 return false; 1005 1006 // Address Space Setting 1007 unsigned int CodeAddrSpace = getCodeAddrSpace(MemSD); 1008 if (canLowerToLDG(MemSD, *Subtarget, CodeAddrSpace, MF)) { 1009 return tryLDGLDU(N); 1010 } 1011 1012 unsigned int PointerSize = 1013 CurDAG->getDataLayout().getPointerSizeInBits(MemSD->getAddressSpace()); 1014 1015 // Volatile Setting 1016 // - .volatile is only availalble for .global and .shared 1017 bool IsVolatile = MemSD->isVolatile(); 1018 if (CodeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL && 1019 CodeAddrSpace != NVPTX::PTXLdStInstCode::SHARED && 1020 CodeAddrSpace != NVPTX::PTXLdStInstCode::GENERIC) 1021 IsVolatile = false; 1022 1023 // Vector Setting 1024 MVT SimpleVT = LoadedVT.getSimpleVT(); 1025 1026 // Type Setting: fromType + fromTypeWidth 1027 // 1028 // Sign : ISD::SEXTLOAD 1029 // Unsign : ISD::ZEXTLOAD, ISD::NON_EXTLOAD or ISD::EXTLOAD and the 1030 // type is integer 1031 // Float : ISD::NON_EXTLOAD or ISD::EXTLOAD and the type is float 1032 MVT ScalarVT = SimpleVT.getScalarType(); 1033 // Read at least 8 bits (predicates are stored as 8-bit values) 1034 unsigned FromTypeWidth = std::max(8U, (unsigned)ScalarVT.getSizeInBits()); 1035 unsigned int FromType; 1036 // The last operand holds the original LoadSDNode::getExtensionType() value 1037 unsigned ExtensionType = cast<ConstantSDNode>( 1038 N->getOperand(N->getNumOperands() - 1))->getZExtValue(); 1039 if (ExtensionType == ISD::SEXTLOAD) 1040 FromType = NVPTX::PTXLdStInstCode::Signed; 1041 else if (ScalarVT.isFloatingPoint()) 1042 FromType = ScalarVT.SimpleTy == MVT::f16 ? NVPTX::PTXLdStInstCode::Untyped 1043 : NVPTX::PTXLdStInstCode::Float; 1044 else 1045 FromType = NVPTX::PTXLdStInstCode::Unsigned; 1046 1047 unsigned VecType; 1048 1049 switch (N->getOpcode()) { 1050 case NVPTXISD::LoadV2: 1051 VecType = NVPTX::PTXLdStInstCode::V2; 1052 break; 1053 case NVPTXISD::LoadV4: 1054 VecType = NVPTX::PTXLdStInstCode::V4; 1055 break; 1056 default: 1057 return false; 1058 } 1059 1060 EVT EltVT = N->getValueType(0); 1061 1062 // v8f16 is a special case. PTX doesn't have ld.v8.f16 1063 // instruction. Instead, we split the vector into v2f16 chunks and 1064 // load them with ld.v4.b32. 1065 if (EltVT == MVT::v2f16) { 1066 assert(N->getOpcode() == NVPTXISD::LoadV4 && "Unexpected load opcode."); 1067 EltVT = MVT::i32; 1068 FromType = NVPTX::PTXLdStInstCode::Untyped; 1069 FromTypeWidth = 32; 1070 } 1071 1072 if (SelectDirectAddr(Op1, Addr)) { 1073 switch (N->getOpcode()) { 1074 default: 1075 return false; 1076 case NVPTXISD::LoadV2: 1077 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, 1078 NVPTX::LDV_i8_v2_avar, NVPTX::LDV_i16_v2_avar, 1079 NVPTX::LDV_i32_v2_avar, NVPTX::LDV_i64_v2_avar, 1080 NVPTX::LDV_f16_v2_avar, NVPTX::LDV_f16x2_v2_avar, 1081 NVPTX::LDV_f32_v2_avar, NVPTX::LDV_f64_v2_avar); 1082 break; 1083 case NVPTXISD::LoadV4: 1084 Opcode = 1085 pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, NVPTX::LDV_i8_v4_avar, 1086 NVPTX::LDV_i16_v4_avar, NVPTX::LDV_i32_v4_avar, None, 1087 NVPTX::LDV_f16_v4_avar, NVPTX::LDV_f16x2_v4_avar, 1088 NVPTX::LDV_f32_v4_avar, None); 1089 break; 1090 } 1091 if (!Opcode) 1092 return false; 1093 SDValue Ops[] = { getI32Imm(IsVolatile, DL), getI32Imm(CodeAddrSpace, DL), 1094 getI32Imm(VecType, DL), getI32Imm(FromType, DL), 1095 getI32Imm(FromTypeWidth, DL), Addr, Chain }; 1096 LD = CurDAG->getMachineNode(Opcode.getValue(), DL, N->getVTList(), Ops); 1097 } else if (PointerSize == 64 1098 ? SelectADDRsi64(Op1.getNode(), Op1, Base, Offset) 1099 : SelectADDRsi(Op1.getNode(), Op1, Base, Offset)) { 1100 switch (N->getOpcode()) { 1101 default: 1102 return false; 1103 case NVPTXISD::LoadV2: 1104 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, 1105 NVPTX::LDV_i8_v2_asi, NVPTX::LDV_i16_v2_asi, 1106 NVPTX::LDV_i32_v2_asi, NVPTX::LDV_i64_v2_asi, 1107 NVPTX::LDV_f16_v2_asi, NVPTX::LDV_f16x2_v2_asi, 1108 NVPTX::LDV_f32_v2_asi, NVPTX::LDV_f64_v2_asi); 1109 break; 1110 case NVPTXISD::LoadV4: 1111 Opcode = 1112 pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, NVPTX::LDV_i8_v4_asi, 1113 NVPTX::LDV_i16_v4_asi, NVPTX::LDV_i32_v4_asi, None, 1114 NVPTX::LDV_f16_v4_asi, NVPTX::LDV_f16x2_v4_asi, 1115 NVPTX::LDV_f32_v4_asi, None); 1116 break; 1117 } 1118 if (!Opcode) 1119 return false; 1120 SDValue Ops[] = { getI32Imm(IsVolatile, DL), getI32Imm(CodeAddrSpace, DL), 1121 getI32Imm(VecType, DL), getI32Imm(FromType, DL), 1122 getI32Imm(FromTypeWidth, DL), Base, Offset, Chain }; 1123 LD = CurDAG->getMachineNode(Opcode.getValue(), DL, N->getVTList(), Ops); 1124 } else if (PointerSize == 64 1125 ? SelectADDRri64(Op1.getNode(), Op1, Base, Offset) 1126 : SelectADDRri(Op1.getNode(), Op1, Base, Offset)) { 1127 if (PointerSize == 64) { 1128 switch (N->getOpcode()) { 1129 default: 1130 return false; 1131 case NVPTXISD::LoadV2: 1132 Opcode = pickOpcodeForVT( 1133 EltVT.getSimpleVT().SimpleTy, NVPTX::LDV_i8_v2_ari_64, 1134 NVPTX::LDV_i16_v2_ari_64, NVPTX::LDV_i32_v2_ari_64, 1135 NVPTX::LDV_i64_v2_ari_64, NVPTX::LDV_f16_v2_ari_64, 1136 NVPTX::LDV_f16x2_v2_ari_64, NVPTX::LDV_f32_v2_ari_64, 1137 NVPTX::LDV_f64_v2_ari_64); 1138 break; 1139 case NVPTXISD::LoadV4: 1140 Opcode = pickOpcodeForVT( 1141 EltVT.getSimpleVT().SimpleTy, NVPTX::LDV_i8_v4_ari_64, 1142 NVPTX::LDV_i16_v4_ari_64, NVPTX::LDV_i32_v4_ari_64, None, 1143 NVPTX::LDV_f16_v4_ari_64, NVPTX::LDV_f16x2_v4_ari_64, 1144 NVPTX::LDV_f32_v4_ari_64, None); 1145 break; 1146 } 1147 } else { 1148 switch (N->getOpcode()) { 1149 default: 1150 return false; 1151 case NVPTXISD::LoadV2: 1152 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, 1153 NVPTX::LDV_i8_v2_ari, NVPTX::LDV_i16_v2_ari, 1154 NVPTX::LDV_i32_v2_ari, NVPTX::LDV_i64_v2_ari, 1155 NVPTX::LDV_f16_v2_ari, NVPTX::LDV_f16x2_v2_ari, 1156 NVPTX::LDV_f32_v2_ari, NVPTX::LDV_f64_v2_ari); 1157 break; 1158 case NVPTXISD::LoadV4: 1159 Opcode = 1160 pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, NVPTX::LDV_i8_v4_ari, 1161 NVPTX::LDV_i16_v4_ari, NVPTX::LDV_i32_v4_ari, None, 1162 NVPTX::LDV_f16_v4_ari, NVPTX::LDV_f16x2_v4_ari, 1163 NVPTX::LDV_f32_v4_ari, None); 1164 break; 1165 } 1166 } 1167 if (!Opcode) 1168 return false; 1169 SDValue Ops[] = { getI32Imm(IsVolatile, DL), getI32Imm(CodeAddrSpace, DL), 1170 getI32Imm(VecType, DL), getI32Imm(FromType, DL), 1171 getI32Imm(FromTypeWidth, DL), Base, Offset, Chain }; 1172 1173 LD = CurDAG->getMachineNode(Opcode.getValue(), DL, N->getVTList(), Ops); 1174 } else { 1175 if (PointerSize == 64) { 1176 switch (N->getOpcode()) { 1177 default: 1178 return false; 1179 case NVPTXISD::LoadV2: 1180 Opcode = pickOpcodeForVT( 1181 EltVT.getSimpleVT().SimpleTy, NVPTX::LDV_i8_v2_areg_64, 1182 NVPTX::LDV_i16_v2_areg_64, NVPTX::LDV_i32_v2_areg_64, 1183 NVPTX::LDV_i64_v2_areg_64, NVPTX::LDV_f16_v2_areg_64, 1184 NVPTX::LDV_f16x2_v2_areg_64, NVPTX::LDV_f32_v2_areg_64, 1185 NVPTX::LDV_f64_v2_areg_64); 1186 break; 1187 case NVPTXISD::LoadV4: 1188 Opcode = pickOpcodeForVT( 1189 EltVT.getSimpleVT().SimpleTy, NVPTX::LDV_i8_v4_areg_64, 1190 NVPTX::LDV_i16_v4_areg_64, NVPTX::LDV_i32_v4_areg_64, None, 1191 NVPTX::LDV_f16_v4_areg_64, NVPTX::LDV_f16x2_v4_areg_64, 1192 NVPTX::LDV_f32_v4_areg_64, None); 1193 break; 1194 } 1195 } else { 1196 switch (N->getOpcode()) { 1197 default: 1198 return false; 1199 case NVPTXISD::LoadV2: 1200 Opcode = 1201 pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, NVPTX::LDV_i8_v2_areg, 1202 NVPTX::LDV_i16_v2_areg, NVPTX::LDV_i32_v2_areg, 1203 NVPTX::LDV_i64_v2_areg, NVPTX::LDV_f16_v2_areg, 1204 NVPTX::LDV_f16x2_v2_areg, NVPTX::LDV_f32_v2_areg, 1205 NVPTX::LDV_f64_v2_areg); 1206 break; 1207 case NVPTXISD::LoadV4: 1208 Opcode = pickOpcodeForVT( 1209 EltVT.getSimpleVT().SimpleTy, NVPTX::LDV_i8_v4_areg, 1210 NVPTX::LDV_i16_v4_areg, NVPTX::LDV_i32_v4_areg, None, 1211 NVPTX::LDV_f16_v4_areg, NVPTX::LDV_f16x2_v4_areg, 1212 NVPTX::LDV_f32_v4_areg, None); 1213 break; 1214 } 1215 } 1216 if (!Opcode) 1217 return false; 1218 SDValue Ops[] = { getI32Imm(IsVolatile, DL), getI32Imm(CodeAddrSpace, DL), 1219 getI32Imm(VecType, DL), getI32Imm(FromType, DL), 1220 getI32Imm(FromTypeWidth, DL), Op1, Chain }; 1221 LD = CurDAG->getMachineNode(Opcode.getValue(), DL, N->getVTList(), Ops); 1222 } 1223 1224 MachineMemOperand *MemRef = cast<MemSDNode>(N)->getMemOperand(); 1225 CurDAG->setNodeMemRefs(cast<MachineSDNode>(LD), {MemRef}); 1226 1227 ReplaceNode(N, LD); 1228 return true; 1229 } 1230 1231 bool NVPTXDAGToDAGISel::tryLDGLDU(SDNode *N) { 1232 1233 SDValue Chain = N->getOperand(0); 1234 SDValue Op1; 1235 MemSDNode *Mem; 1236 bool IsLDG = true; 1237 1238 // If this is an LDG intrinsic, the address is the third operand. If its an 1239 // LDG/LDU SD node (from custom vector handling), then its the second operand 1240 if (N->getOpcode() == ISD::INTRINSIC_W_CHAIN) { 1241 Op1 = N->getOperand(2); 1242 Mem = cast<MemIntrinsicSDNode>(N); 1243 unsigned IID = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue(); 1244 switch (IID) { 1245 default: 1246 return false; 1247 case Intrinsic::nvvm_ldg_global_f: 1248 case Intrinsic::nvvm_ldg_global_i: 1249 case Intrinsic::nvvm_ldg_global_p: 1250 IsLDG = true; 1251 break; 1252 case Intrinsic::nvvm_ldu_global_f: 1253 case Intrinsic::nvvm_ldu_global_i: 1254 case Intrinsic::nvvm_ldu_global_p: 1255 IsLDG = false; 1256 break; 1257 } 1258 } else { 1259 Op1 = N->getOperand(1); 1260 Mem = cast<MemSDNode>(N); 1261 } 1262 1263 Optional<unsigned> Opcode; 1264 SDLoc DL(N); 1265 SDNode *LD; 1266 SDValue Base, Offset, Addr; 1267 1268 EVT EltVT = Mem->getMemoryVT(); 1269 unsigned NumElts = 1; 1270 if (EltVT.isVector()) { 1271 NumElts = EltVT.getVectorNumElements(); 1272 EltVT = EltVT.getVectorElementType(); 1273 // vectors of f16 are loaded/stored as multiples of v2f16 elements. 1274 if (EltVT == MVT::f16 && N->getValueType(0) == MVT::v2f16) { 1275 assert(NumElts % 2 == 0 && "Vector must have even number of elements"); 1276 EltVT = MVT::v2f16; 1277 NumElts /= 2; 1278 } 1279 } 1280 1281 // Build the "promoted" result VTList for the load. If we are really loading 1282 // i8s, then the return type will be promoted to i16 since we do not expose 1283 // 8-bit registers in NVPTX. 1284 EVT NodeVT = (EltVT == MVT::i8) ? MVT::i16 : EltVT; 1285 SmallVector<EVT, 5> InstVTs; 1286 for (unsigned i = 0; i != NumElts; ++i) { 1287 InstVTs.push_back(NodeVT); 1288 } 1289 InstVTs.push_back(MVT::Other); 1290 SDVTList InstVTList = CurDAG->getVTList(InstVTs); 1291 1292 if (SelectDirectAddr(Op1, Addr)) { 1293 switch (N->getOpcode()) { 1294 default: 1295 return false; 1296 case ISD::LOAD: 1297 case ISD::INTRINSIC_W_CHAIN: 1298 if (IsLDG) 1299 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, 1300 NVPTX::INT_PTX_LDG_GLOBAL_i8avar, 1301 NVPTX::INT_PTX_LDG_GLOBAL_i16avar, 1302 NVPTX::INT_PTX_LDG_GLOBAL_i32avar, 1303 NVPTX::INT_PTX_LDG_GLOBAL_i64avar, 1304 NVPTX::INT_PTX_LDG_GLOBAL_f16avar, 1305 NVPTX::INT_PTX_LDG_GLOBAL_f16x2avar, 1306 NVPTX::INT_PTX_LDG_GLOBAL_f32avar, 1307 NVPTX::INT_PTX_LDG_GLOBAL_f64avar); 1308 else 1309 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, 1310 NVPTX::INT_PTX_LDU_GLOBAL_i8avar, 1311 NVPTX::INT_PTX_LDU_GLOBAL_i16avar, 1312 NVPTX::INT_PTX_LDU_GLOBAL_i32avar, 1313 NVPTX::INT_PTX_LDU_GLOBAL_i64avar, 1314 NVPTX::INT_PTX_LDU_GLOBAL_f16avar, 1315 NVPTX::INT_PTX_LDU_GLOBAL_f16x2avar, 1316 NVPTX::INT_PTX_LDU_GLOBAL_f32avar, 1317 NVPTX::INT_PTX_LDU_GLOBAL_f64avar); 1318 break; 1319 case NVPTXISD::LoadV2: 1320 case NVPTXISD::LDGV2: 1321 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, 1322 NVPTX::INT_PTX_LDG_G_v2i8_ELE_avar, 1323 NVPTX::INT_PTX_LDG_G_v2i16_ELE_avar, 1324 NVPTX::INT_PTX_LDG_G_v2i32_ELE_avar, 1325 NVPTX::INT_PTX_LDG_G_v2i64_ELE_avar, 1326 NVPTX::INT_PTX_LDG_G_v2f16_ELE_avar, 1327 NVPTX::INT_PTX_LDG_G_v2f16x2_ELE_avar, 1328 NVPTX::INT_PTX_LDG_G_v2f32_ELE_avar, 1329 NVPTX::INT_PTX_LDG_G_v2f64_ELE_avar); 1330 break; 1331 case NVPTXISD::LDUV2: 1332 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, 1333 NVPTX::INT_PTX_LDU_G_v2i8_ELE_avar, 1334 NVPTX::INT_PTX_LDU_G_v2i16_ELE_avar, 1335 NVPTX::INT_PTX_LDU_G_v2i32_ELE_avar, 1336 NVPTX::INT_PTX_LDU_G_v2i64_ELE_avar, 1337 NVPTX::INT_PTX_LDU_G_v2f16_ELE_avar, 1338 NVPTX::INT_PTX_LDU_G_v2f16x2_ELE_avar, 1339 NVPTX::INT_PTX_LDU_G_v2f32_ELE_avar, 1340 NVPTX::INT_PTX_LDU_G_v2f64_ELE_avar); 1341 break; 1342 case NVPTXISD::LoadV4: 1343 case NVPTXISD::LDGV4: 1344 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, 1345 NVPTX::INT_PTX_LDG_G_v4i8_ELE_avar, 1346 NVPTX::INT_PTX_LDG_G_v4i16_ELE_avar, 1347 NVPTX::INT_PTX_LDG_G_v4i32_ELE_avar, None, 1348 NVPTX::INT_PTX_LDG_G_v4f16_ELE_avar, 1349 NVPTX::INT_PTX_LDG_G_v4f16x2_ELE_avar, 1350 NVPTX::INT_PTX_LDG_G_v4f32_ELE_avar, None); 1351 break; 1352 case NVPTXISD::LDUV4: 1353 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, 1354 NVPTX::INT_PTX_LDU_G_v4i8_ELE_avar, 1355 NVPTX::INT_PTX_LDU_G_v4i16_ELE_avar, 1356 NVPTX::INT_PTX_LDU_G_v4i32_ELE_avar, None, 1357 NVPTX::INT_PTX_LDU_G_v4f16_ELE_avar, 1358 NVPTX::INT_PTX_LDU_G_v4f16x2_ELE_avar, 1359 NVPTX::INT_PTX_LDU_G_v4f32_ELE_avar, None); 1360 break; 1361 } 1362 if (!Opcode) 1363 return false; 1364 SDValue Ops[] = { Addr, Chain }; 1365 LD = CurDAG->getMachineNode(Opcode.getValue(), DL, InstVTList, Ops); 1366 } else if (TM.is64Bit() ? SelectADDRri64(Op1.getNode(), Op1, Base, Offset) 1367 : SelectADDRri(Op1.getNode(), Op1, Base, Offset)) { 1368 if (TM.is64Bit()) { 1369 switch (N->getOpcode()) { 1370 default: 1371 return false; 1372 case ISD::LOAD: 1373 case ISD::INTRINSIC_W_CHAIN: 1374 if (IsLDG) 1375 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, 1376 NVPTX::INT_PTX_LDG_GLOBAL_i8ari64, 1377 NVPTX::INT_PTX_LDG_GLOBAL_i16ari64, 1378 NVPTX::INT_PTX_LDG_GLOBAL_i32ari64, 1379 NVPTX::INT_PTX_LDG_GLOBAL_i64ari64, 1380 NVPTX::INT_PTX_LDG_GLOBAL_f16ari64, 1381 NVPTX::INT_PTX_LDG_GLOBAL_f16x2ari64, 1382 NVPTX::INT_PTX_LDG_GLOBAL_f32ari64, 1383 NVPTX::INT_PTX_LDG_GLOBAL_f64ari64); 1384 else 1385 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, 1386 NVPTX::INT_PTX_LDU_GLOBAL_i8ari64, 1387 NVPTX::INT_PTX_LDU_GLOBAL_i16ari64, 1388 NVPTX::INT_PTX_LDU_GLOBAL_i32ari64, 1389 NVPTX::INT_PTX_LDU_GLOBAL_i64ari64, 1390 NVPTX::INT_PTX_LDU_GLOBAL_f16ari64, 1391 NVPTX::INT_PTX_LDU_GLOBAL_f16x2ari64, 1392 NVPTX::INT_PTX_LDU_GLOBAL_f32ari64, 1393 NVPTX::INT_PTX_LDU_GLOBAL_f64ari64); 1394 break; 1395 case NVPTXISD::LoadV2: 1396 case NVPTXISD::LDGV2: 1397 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, 1398 NVPTX::INT_PTX_LDG_G_v2i8_ELE_ari64, 1399 NVPTX::INT_PTX_LDG_G_v2i16_ELE_ari64, 1400 NVPTX::INT_PTX_LDG_G_v2i32_ELE_ari64, 1401 NVPTX::INT_PTX_LDG_G_v2i64_ELE_ari64, 1402 NVPTX::INT_PTX_LDG_G_v2f16_ELE_ari64, 1403 NVPTX::INT_PTX_LDG_G_v2f16x2_ELE_ari64, 1404 NVPTX::INT_PTX_LDG_G_v2f32_ELE_ari64, 1405 NVPTX::INT_PTX_LDG_G_v2f64_ELE_ari64); 1406 break; 1407 case NVPTXISD::LDUV2: 1408 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, 1409 NVPTX::INT_PTX_LDU_G_v2i8_ELE_ari64, 1410 NVPTX::INT_PTX_LDU_G_v2i16_ELE_ari64, 1411 NVPTX::INT_PTX_LDU_G_v2i32_ELE_ari64, 1412 NVPTX::INT_PTX_LDU_G_v2i64_ELE_ari64, 1413 NVPTX::INT_PTX_LDU_G_v2f16_ELE_ari64, 1414 NVPTX::INT_PTX_LDU_G_v2f16x2_ELE_ari64, 1415 NVPTX::INT_PTX_LDU_G_v2f32_ELE_ari64, 1416 NVPTX::INT_PTX_LDU_G_v2f64_ELE_ari64); 1417 break; 1418 case NVPTXISD::LoadV4: 1419 case NVPTXISD::LDGV4: 1420 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, 1421 NVPTX::INT_PTX_LDG_G_v4i8_ELE_ari64, 1422 NVPTX::INT_PTX_LDG_G_v4i16_ELE_ari64, 1423 NVPTX::INT_PTX_LDG_G_v4i32_ELE_ari64, None, 1424 NVPTX::INT_PTX_LDG_G_v4f16_ELE_ari64, 1425 NVPTX::INT_PTX_LDG_G_v4f16x2_ELE_ari64, 1426 NVPTX::INT_PTX_LDG_G_v4f32_ELE_ari64, None); 1427 break; 1428 case NVPTXISD::LDUV4: 1429 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, 1430 NVPTX::INT_PTX_LDU_G_v4i8_ELE_ari64, 1431 NVPTX::INT_PTX_LDU_G_v4i16_ELE_ari64, 1432 NVPTX::INT_PTX_LDU_G_v4i32_ELE_ari64, None, 1433 NVPTX::INT_PTX_LDU_G_v4f16_ELE_ari64, 1434 NVPTX::INT_PTX_LDU_G_v4f16x2_ELE_ari64, 1435 NVPTX::INT_PTX_LDU_G_v4f32_ELE_ari64, None); 1436 break; 1437 } 1438 } else { 1439 switch (N->getOpcode()) { 1440 default: 1441 return false; 1442 case ISD::LOAD: 1443 case ISD::INTRINSIC_W_CHAIN: 1444 if (IsLDG) 1445 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, 1446 NVPTX::INT_PTX_LDG_GLOBAL_i8ari, 1447 NVPTX::INT_PTX_LDG_GLOBAL_i16ari, 1448 NVPTX::INT_PTX_LDG_GLOBAL_i32ari, 1449 NVPTX::INT_PTX_LDG_GLOBAL_i64ari, 1450 NVPTX::INT_PTX_LDG_GLOBAL_f16ari, 1451 NVPTX::INT_PTX_LDG_GLOBAL_f16x2ari, 1452 NVPTX::INT_PTX_LDG_GLOBAL_f32ari, 1453 NVPTX::INT_PTX_LDG_GLOBAL_f64ari); 1454 else 1455 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, 1456 NVPTX::INT_PTX_LDU_GLOBAL_i8ari, 1457 NVPTX::INT_PTX_LDU_GLOBAL_i16ari, 1458 NVPTX::INT_PTX_LDU_GLOBAL_i32ari, 1459 NVPTX::INT_PTX_LDU_GLOBAL_i64ari, 1460 NVPTX::INT_PTX_LDU_GLOBAL_f16ari, 1461 NVPTX::INT_PTX_LDU_GLOBAL_f16x2ari, 1462 NVPTX::INT_PTX_LDU_GLOBAL_f32ari, 1463 NVPTX::INT_PTX_LDU_GLOBAL_f64ari); 1464 break; 1465 case NVPTXISD::LoadV2: 1466 case NVPTXISD::LDGV2: 1467 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, 1468 NVPTX::INT_PTX_LDG_G_v2i8_ELE_ari32, 1469 NVPTX::INT_PTX_LDG_G_v2i16_ELE_ari32, 1470 NVPTX::INT_PTX_LDG_G_v2i32_ELE_ari32, 1471 NVPTX::INT_PTX_LDG_G_v2i64_ELE_ari32, 1472 NVPTX::INT_PTX_LDG_G_v2f16_ELE_ari32, 1473 NVPTX::INT_PTX_LDG_G_v2f16x2_ELE_ari32, 1474 NVPTX::INT_PTX_LDG_G_v2f32_ELE_ari32, 1475 NVPTX::INT_PTX_LDG_G_v2f64_ELE_ari32); 1476 break; 1477 case NVPTXISD::LDUV2: 1478 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, 1479 NVPTX::INT_PTX_LDU_G_v2i8_ELE_ari32, 1480 NVPTX::INT_PTX_LDU_G_v2i16_ELE_ari32, 1481 NVPTX::INT_PTX_LDU_G_v2i32_ELE_ari32, 1482 NVPTX::INT_PTX_LDU_G_v2i64_ELE_ari32, 1483 NVPTX::INT_PTX_LDU_G_v2f16_ELE_ari32, 1484 NVPTX::INT_PTX_LDU_G_v2f16x2_ELE_ari32, 1485 NVPTX::INT_PTX_LDU_G_v2f32_ELE_ari32, 1486 NVPTX::INT_PTX_LDU_G_v2f64_ELE_ari32); 1487 break; 1488 case NVPTXISD::LoadV4: 1489 case NVPTXISD::LDGV4: 1490 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, 1491 NVPTX::INT_PTX_LDG_G_v4i8_ELE_ari32, 1492 NVPTX::INT_PTX_LDG_G_v4i16_ELE_ari32, 1493 NVPTX::INT_PTX_LDG_G_v4i32_ELE_ari32, None, 1494 NVPTX::INT_PTX_LDG_G_v4f16_ELE_ari32, 1495 NVPTX::INT_PTX_LDG_G_v4f16x2_ELE_ari32, 1496 NVPTX::INT_PTX_LDG_G_v4f32_ELE_ari32, None); 1497 break; 1498 case NVPTXISD::LDUV4: 1499 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, 1500 NVPTX::INT_PTX_LDU_G_v4i8_ELE_ari32, 1501 NVPTX::INT_PTX_LDU_G_v4i16_ELE_ari32, 1502 NVPTX::INT_PTX_LDU_G_v4i32_ELE_ari32, None, 1503 NVPTX::INT_PTX_LDU_G_v4f16_ELE_ari32, 1504 NVPTX::INT_PTX_LDU_G_v4f16x2_ELE_ari32, 1505 NVPTX::INT_PTX_LDU_G_v4f32_ELE_ari32, None); 1506 break; 1507 } 1508 } 1509 if (!Opcode) 1510 return false; 1511 SDValue Ops[] = {Base, Offset, Chain}; 1512 LD = CurDAG->getMachineNode(Opcode.getValue(), DL, InstVTList, Ops); 1513 } else { 1514 if (TM.is64Bit()) { 1515 switch (N->getOpcode()) { 1516 default: 1517 return false; 1518 case ISD::LOAD: 1519 case ISD::INTRINSIC_W_CHAIN: 1520 if (IsLDG) 1521 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, 1522 NVPTX::INT_PTX_LDG_GLOBAL_i8areg64, 1523 NVPTX::INT_PTX_LDG_GLOBAL_i16areg64, 1524 NVPTX::INT_PTX_LDG_GLOBAL_i32areg64, 1525 NVPTX::INT_PTX_LDG_GLOBAL_i64areg64, 1526 NVPTX::INT_PTX_LDG_GLOBAL_f16areg64, 1527 NVPTX::INT_PTX_LDG_GLOBAL_f16x2areg64, 1528 NVPTX::INT_PTX_LDG_GLOBAL_f32areg64, 1529 NVPTX::INT_PTX_LDG_GLOBAL_f64areg64); 1530 else 1531 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, 1532 NVPTX::INT_PTX_LDU_GLOBAL_i8areg64, 1533 NVPTX::INT_PTX_LDU_GLOBAL_i16areg64, 1534 NVPTX::INT_PTX_LDU_GLOBAL_i32areg64, 1535 NVPTX::INT_PTX_LDU_GLOBAL_i64areg64, 1536 NVPTX::INT_PTX_LDU_GLOBAL_f16areg64, 1537 NVPTX::INT_PTX_LDU_GLOBAL_f16x2areg64, 1538 NVPTX::INT_PTX_LDU_GLOBAL_f32areg64, 1539 NVPTX::INT_PTX_LDU_GLOBAL_f64areg64); 1540 break; 1541 case NVPTXISD::LoadV2: 1542 case NVPTXISD::LDGV2: 1543 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, 1544 NVPTX::INT_PTX_LDG_G_v2i8_ELE_areg64, 1545 NVPTX::INT_PTX_LDG_G_v2i16_ELE_areg64, 1546 NVPTX::INT_PTX_LDG_G_v2i32_ELE_areg64, 1547 NVPTX::INT_PTX_LDG_G_v2i64_ELE_areg64, 1548 NVPTX::INT_PTX_LDG_G_v2f16_ELE_areg64, 1549 NVPTX::INT_PTX_LDG_G_v2f16x2_ELE_areg64, 1550 NVPTX::INT_PTX_LDG_G_v2f32_ELE_areg64, 1551 NVPTX::INT_PTX_LDG_G_v2f64_ELE_areg64); 1552 break; 1553 case NVPTXISD::LDUV2: 1554 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, 1555 NVPTX::INT_PTX_LDU_G_v2i8_ELE_areg64, 1556 NVPTX::INT_PTX_LDU_G_v2i16_ELE_areg64, 1557 NVPTX::INT_PTX_LDU_G_v2i32_ELE_areg64, 1558 NVPTX::INT_PTX_LDU_G_v2i64_ELE_areg64, 1559 NVPTX::INT_PTX_LDU_G_v2f16_ELE_areg64, 1560 NVPTX::INT_PTX_LDU_G_v2f16x2_ELE_areg64, 1561 NVPTX::INT_PTX_LDU_G_v2f32_ELE_areg64, 1562 NVPTX::INT_PTX_LDU_G_v2f64_ELE_areg64); 1563 break; 1564 case NVPTXISD::LoadV4: 1565 case NVPTXISD::LDGV4: 1566 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, 1567 NVPTX::INT_PTX_LDG_G_v4i8_ELE_areg64, 1568 NVPTX::INT_PTX_LDG_G_v4i16_ELE_areg64, 1569 NVPTX::INT_PTX_LDG_G_v4i32_ELE_areg64, None, 1570 NVPTX::INT_PTX_LDG_G_v4f16_ELE_areg64, 1571 NVPTX::INT_PTX_LDG_G_v4f16x2_ELE_areg64, 1572 NVPTX::INT_PTX_LDG_G_v4f32_ELE_areg64, None); 1573 break; 1574 case NVPTXISD::LDUV4: 1575 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, 1576 NVPTX::INT_PTX_LDU_G_v4i8_ELE_areg64, 1577 NVPTX::INT_PTX_LDU_G_v4i16_ELE_areg64, 1578 NVPTX::INT_PTX_LDU_G_v4i32_ELE_areg64, None, 1579 NVPTX::INT_PTX_LDU_G_v4f16_ELE_areg64, 1580 NVPTX::INT_PTX_LDU_G_v4f16x2_ELE_areg64, 1581 NVPTX::INT_PTX_LDU_G_v4f32_ELE_areg64, None); 1582 break; 1583 } 1584 } else { 1585 switch (N->getOpcode()) { 1586 default: 1587 return false; 1588 case ISD::LOAD: 1589 case ISD::INTRINSIC_W_CHAIN: 1590 if (IsLDG) 1591 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, 1592 NVPTX::INT_PTX_LDG_GLOBAL_i8areg, 1593 NVPTX::INT_PTX_LDG_GLOBAL_i16areg, 1594 NVPTX::INT_PTX_LDG_GLOBAL_i32areg, 1595 NVPTX::INT_PTX_LDG_GLOBAL_i64areg, 1596 NVPTX::INT_PTX_LDG_GLOBAL_f16areg, 1597 NVPTX::INT_PTX_LDG_GLOBAL_f16x2areg, 1598 NVPTX::INT_PTX_LDG_GLOBAL_f32areg, 1599 NVPTX::INT_PTX_LDG_GLOBAL_f64areg); 1600 else 1601 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, 1602 NVPTX::INT_PTX_LDU_GLOBAL_i8areg, 1603 NVPTX::INT_PTX_LDU_GLOBAL_i16areg, 1604 NVPTX::INT_PTX_LDU_GLOBAL_i32areg, 1605 NVPTX::INT_PTX_LDU_GLOBAL_i64areg, 1606 NVPTX::INT_PTX_LDU_GLOBAL_f16areg, 1607 NVPTX::INT_PTX_LDU_GLOBAL_f16x2areg, 1608 NVPTX::INT_PTX_LDU_GLOBAL_f32areg, 1609 NVPTX::INT_PTX_LDU_GLOBAL_f64areg); 1610 break; 1611 case NVPTXISD::LoadV2: 1612 case NVPTXISD::LDGV2: 1613 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, 1614 NVPTX::INT_PTX_LDG_G_v2i8_ELE_areg32, 1615 NVPTX::INT_PTX_LDG_G_v2i16_ELE_areg32, 1616 NVPTX::INT_PTX_LDG_G_v2i32_ELE_areg32, 1617 NVPTX::INT_PTX_LDG_G_v2i64_ELE_areg32, 1618 NVPTX::INT_PTX_LDG_G_v2f16_ELE_areg32, 1619 NVPTX::INT_PTX_LDG_G_v2f16x2_ELE_areg32, 1620 NVPTX::INT_PTX_LDG_G_v2f32_ELE_areg32, 1621 NVPTX::INT_PTX_LDG_G_v2f64_ELE_areg32); 1622 break; 1623 case NVPTXISD::LDUV2: 1624 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, 1625 NVPTX::INT_PTX_LDU_G_v2i8_ELE_areg32, 1626 NVPTX::INT_PTX_LDU_G_v2i16_ELE_areg32, 1627 NVPTX::INT_PTX_LDU_G_v2i32_ELE_areg32, 1628 NVPTX::INT_PTX_LDU_G_v2i64_ELE_areg32, 1629 NVPTX::INT_PTX_LDU_G_v2f16_ELE_areg32, 1630 NVPTX::INT_PTX_LDU_G_v2f16x2_ELE_areg32, 1631 NVPTX::INT_PTX_LDU_G_v2f32_ELE_areg32, 1632 NVPTX::INT_PTX_LDU_G_v2f64_ELE_areg32); 1633 break; 1634 case NVPTXISD::LoadV4: 1635 case NVPTXISD::LDGV4: 1636 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, 1637 NVPTX::INT_PTX_LDG_G_v4i8_ELE_areg32, 1638 NVPTX::INT_PTX_LDG_G_v4i16_ELE_areg32, 1639 NVPTX::INT_PTX_LDG_G_v4i32_ELE_areg32, None, 1640 NVPTX::INT_PTX_LDG_G_v4f16_ELE_areg32, 1641 NVPTX::INT_PTX_LDG_G_v4f16x2_ELE_areg32, 1642 NVPTX::INT_PTX_LDG_G_v4f32_ELE_areg32, None); 1643 break; 1644 case NVPTXISD::LDUV4: 1645 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, 1646 NVPTX::INT_PTX_LDU_G_v4i8_ELE_areg32, 1647 NVPTX::INT_PTX_LDU_G_v4i16_ELE_areg32, 1648 NVPTX::INT_PTX_LDU_G_v4i32_ELE_areg32, None, 1649 NVPTX::INT_PTX_LDU_G_v4f16_ELE_areg32, 1650 NVPTX::INT_PTX_LDU_G_v4f16x2_ELE_areg32, 1651 NVPTX::INT_PTX_LDU_G_v4f32_ELE_areg32, None); 1652 break; 1653 } 1654 } 1655 if (!Opcode) 1656 return false; 1657 SDValue Ops[] = { Op1, Chain }; 1658 LD = CurDAG->getMachineNode(Opcode.getValue(), DL, InstVTList, Ops); 1659 } 1660 1661 MachineMemOperand *MemRef = Mem->getMemOperand(); 1662 CurDAG->setNodeMemRefs(cast<MachineSDNode>(LD), {MemRef}); 1663 1664 // For automatic generation of LDG (through SelectLoad[Vector], not the 1665 // intrinsics), we may have an extending load like: 1666 // 1667 // i32,ch = load<LD1[%data1(addrspace=1)], zext from i8> t0, t7, undef:i64 1668 // 1669 // In this case, the matching logic above will select a load for the original 1670 // memory type (in this case, i8) and our types will not match (the node needs 1671 // to return an i32 in this case). Our LDG/LDU nodes do not support the 1672 // concept of sign-/zero-extension, so emulate it here by adding an explicit 1673 // CVT instruction. Ptxas should clean up any redundancies here. 1674 1675 EVT OrigType = N->getValueType(0); 1676 LoadSDNode *LdNode = dyn_cast<LoadSDNode>(N); 1677 1678 if (OrigType != EltVT && LdNode) { 1679 // We have an extending-load. The instruction we selected operates on the 1680 // smaller type, but the SDNode we are replacing has the larger type. We 1681 // need to emit a CVT to make the types match. 1682 bool IsSigned = LdNode->getExtensionType() == ISD::SEXTLOAD; 1683 unsigned CvtOpc = GetConvertOpcode(OrigType.getSimpleVT(), 1684 EltVT.getSimpleVT(), IsSigned); 1685 1686 // For each output value, apply the manual sign/zero-extension and make sure 1687 // all users of the load go through that CVT. 1688 for (unsigned i = 0; i != NumElts; ++i) { 1689 SDValue Res(LD, i); 1690 SDValue OrigVal(N, i); 1691 1692 SDNode *CvtNode = 1693 CurDAG->getMachineNode(CvtOpc, DL, OrigType, Res, 1694 CurDAG->getTargetConstant(NVPTX::PTXCvtMode::NONE, 1695 DL, MVT::i32)); 1696 ReplaceUses(OrigVal, SDValue(CvtNode, 0)); 1697 } 1698 } 1699 1700 ReplaceNode(N, LD); 1701 return true; 1702 } 1703 1704 bool NVPTXDAGToDAGISel::tryStore(SDNode *N) { 1705 SDLoc dl(N); 1706 MemSDNode *ST = cast<MemSDNode>(N); 1707 assert(ST->writeMem() && "Expected store"); 1708 StoreSDNode *PlainStore = dyn_cast<StoreSDNode>(N); 1709 AtomicSDNode *AtomicStore = dyn_cast<AtomicSDNode>(N); 1710 assert((PlainStore || AtomicStore) && "Expected store"); 1711 EVT StoreVT = ST->getMemoryVT(); 1712 SDNode *NVPTXST = nullptr; 1713 1714 // do not support pre/post inc/dec 1715 if (PlainStore && PlainStore->isIndexed()) 1716 return false; 1717 1718 if (!StoreVT.isSimple()) 1719 return false; 1720 1721 AtomicOrdering Ordering = ST->getOrdering(); 1722 // In order to lower atomic loads with stronger guarantees we would need to 1723 // use store.release or insert fences. However these features were only added 1724 // with PTX ISA 6.0 / sm_70. 1725 // TODO: Check if we can actually use the new instructions and implement them. 1726 if (isStrongerThanMonotonic(Ordering)) 1727 return false; 1728 1729 // Address Space Setting 1730 unsigned int CodeAddrSpace = getCodeAddrSpace(ST); 1731 unsigned int PointerSize = 1732 CurDAG->getDataLayout().getPointerSizeInBits(ST->getAddressSpace()); 1733 1734 // Volatile Setting 1735 // - .volatile is only available for .global and .shared 1736 // - .volatile has the same memory synchronization semantics as .relaxed.sys 1737 bool isVolatile = ST->isVolatile() || Ordering == AtomicOrdering::Monotonic; 1738 if (CodeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL && 1739 CodeAddrSpace != NVPTX::PTXLdStInstCode::SHARED && 1740 CodeAddrSpace != NVPTX::PTXLdStInstCode::GENERIC) 1741 isVolatile = false; 1742 1743 // Vector Setting 1744 MVT SimpleVT = StoreVT.getSimpleVT(); 1745 unsigned vecType = NVPTX::PTXLdStInstCode::Scalar; 1746 1747 // Type Setting: toType + toTypeWidth 1748 // - for integer type, always use 'u' 1749 // 1750 MVT ScalarVT = SimpleVT.getScalarType(); 1751 unsigned toTypeWidth = ScalarVT.getSizeInBits(); 1752 if (SimpleVT.isVector()) { 1753 assert(StoreVT == MVT::v2f16 && "Unexpected vector type"); 1754 // v2f16 is stored using st.b32 1755 toTypeWidth = 32; 1756 } 1757 1758 unsigned int toType; 1759 if (ScalarVT.isFloatingPoint()) 1760 // f16 uses .b16 as its storage type. 1761 toType = ScalarVT.SimpleTy == MVT::f16 ? NVPTX::PTXLdStInstCode::Untyped 1762 : NVPTX::PTXLdStInstCode::Float; 1763 else 1764 toType = NVPTX::PTXLdStInstCode::Unsigned; 1765 1766 // Create the machine instruction DAG 1767 SDValue Chain = ST->getChain(); 1768 SDValue Value = PlainStore ? PlainStore->getValue() : AtomicStore->getVal(); 1769 SDValue BasePtr = ST->getBasePtr(); 1770 SDValue Addr; 1771 SDValue Offset, Base; 1772 Optional<unsigned> Opcode; 1773 MVT::SimpleValueType SourceVT = 1774 Value.getNode()->getSimpleValueType(0).SimpleTy; 1775 1776 if (SelectDirectAddr(BasePtr, Addr)) { 1777 Opcode = pickOpcodeForVT(SourceVT, NVPTX::ST_i8_avar, NVPTX::ST_i16_avar, 1778 NVPTX::ST_i32_avar, NVPTX::ST_i64_avar, 1779 NVPTX::ST_f16_avar, NVPTX::ST_f16x2_avar, 1780 NVPTX::ST_f32_avar, NVPTX::ST_f64_avar); 1781 if (!Opcode) 1782 return false; 1783 SDValue Ops[] = {Value, 1784 getI32Imm(isVolatile, dl), 1785 getI32Imm(CodeAddrSpace, dl), 1786 getI32Imm(vecType, dl), 1787 getI32Imm(toType, dl), 1788 getI32Imm(toTypeWidth, dl), 1789 Addr, 1790 Chain}; 1791 NVPTXST = CurDAG->getMachineNode(Opcode.getValue(), dl, MVT::Other, Ops); 1792 } else if (PointerSize == 64 1793 ? SelectADDRsi64(BasePtr.getNode(), BasePtr, Base, Offset) 1794 : SelectADDRsi(BasePtr.getNode(), BasePtr, Base, Offset)) { 1795 Opcode = pickOpcodeForVT(SourceVT, NVPTX::ST_i8_asi, NVPTX::ST_i16_asi, 1796 NVPTX::ST_i32_asi, NVPTX::ST_i64_asi, 1797 NVPTX::ST_f16_asi, NVPTX::ST_f16x2_asi, 1798 NVPTX::ST_f32_asi, NVPTX::ST_f64_asi); 1799 if (!Opcode) 1800 return false; 1801 SDValue Ops[] = {Value, 1802 getI32Imm(isVolatile, dl), 1803 getI32Imm(CodeAddrSpace, dl), 1804 getI32Imm(vecType, dl), 1805 getI32Imm(toType, dl), 1806 getI32Imm(toTypeWidth, dl), 1807 Base, 1808 Offset, 1809 Chain}; 1810 NVPTXST = CurDAG->getMachineNode(Opcode.getValue(), dl, MVT::Other, Ops); 1811 } else if (PointerSize == 64 1812 ? SelectADDRri64(BasePtr.getNode(), BasePtr, Base, Offset) 1813 : SelectADDRri(BasePtr.getNode(), BasePtr, Base, Offset)) { 1814 if (PointerSize == 64) 1815 Opcode = pickOpcodeForVT( 1816 SourceVT, NVPTX::ST_i8_ari_64, NVPTX::ST_i16_ari_64, 1817 NVPTX::ST_i32_ari_64, NVPTX::ST_i64_ari_64, NVPTX::ST_f16_ari_64, 1818 NVPTX::ST_f16x2_ari_64, NVPTX::ST_f32_ari_64, NVPTX::ST_f64_ari_64); 1819 else 1820 Opcode = pickOpcodeForVT(SourceVT, NVPTX::ST_i8_ari, NVPTX::ST_i16_ari, 1821 NVPTX::ST_i32_ari, NVPTX::ST_i64_ari, 1822 NVPTX::ST_f16_ari, NVPTX::ST_f16x2_ari, 1823 NVPTX::ST_f32_ari, NVPTX::ST_f64_ari); 1824 if (!Opcode) 1825 return false; 1826 1827 SDValue Ops[] = {Value, 1828 getI32Imm(isVolatile, dl), 1829 getI32Imm(CodeAddrSpace, dl), 1830 getI32Imm(vecType, dl), 1831 getI32Imm(toType, dl), 1832 getI32Imm(toTypeWidth, dl), 1833 Base, 1834 Offset, 1835 Chain}; 1836 NVPTXST = CurDAG->getMachineNode(Opcode.getValue(), dl, MVT::Other, Ops); 1837 } else { 1838 if (PointerSize == 64) 1839 Opcode = 1840 pickOpcodeForVT(SourceVT, NVPTX::ST_i8_areg_64, NVPTX::ST_i16_areg_64, 1841 NVPTX::ST_i32_areg_64, NVPTX::ST_i64_areg_64, 1842 NVPTX::ST_f16_areg_64, NVPTX::ST_f16x2_areg_64, 1843 NVPTX::ST_f32_areg_64, NVPTX::ST_f64_areg_64); 1844 else 1845 Opcode = pickOpcodeForVT(SourceVT, NVPTX::ST_i8_areg, NVPTX::ST_i16_areg, 1846 NVPTX::ST_i32_areg, NVPTX::ST_i64_areg, 1847 NVPTX::ST_f16_areg, NVPTX::ST_f16x2_areg, 1848 NVPTX::ST_f32_areg, NVPTX::ST_f64_areg); 1849 if (!Opcode) 1850 return false; 1851 SDValue Ops[] = {Value, 1852 getI32Imm(isVolatile, dl), 1853 getI32Imm(CodeAddrSpace, dl), 1854 getI32Imm(vecType, dl), 1855 getI32Imm(toType, dl), 1856 getI32Imm(toTypeWidth, dl), 1857 BasePtr, 1858 Chain}; 1859 NVPTXST = CurDAG->getMachineNode(Opcode.getValue(), dl, MVT::Other, Ops); 1860 } 1861 1862 if (!NVPTXST) 1863 return false; 1864 1865 MachineMemOperand *MemRef = cast<MemSDNode>(N)->getMemOperand(); 1866 CurDAG->setNodeMemRefs(cast<MachineSDNode>(NVPTXST), {MemRef}); 1867 ReplaceNode(N, NVPTXST); 1868 return true; 1869 } 1870 1871 bool NVPTXDAGToDAGISel::tryStoreVector(SDNode *N) { 1872 SDValue Chain = N->getOperand(0); 1873 SDValue Op1 = N->getOperand(1); 1874 SDValue Addr, Offset, Base; 1875 Optional<unsigned> Opcode; 1876 SDLoc DL(N); 1877 SDNode *ST; 1878 EVT EltVT = Op1.getValueType(); 1879 MemSDNode *MemSD = cast<MemSDNode>(N); 1880 EVT StoreVT = MemSD->getMemoryVT(); 1881 1882 // Address Space Setting 1883 unsigned CodeAddrSpace = getCodeAddrSpace(MemSD); 1884 if (CodeAddrSpace == NVPTX::PTXLdStInstCode::CONSTANT) { 1885 report_fatal_error("Cannot store to pointer that points to constant " 1886 "memory space"); 1887 } 1888 unsigned int PointerSize = 1889 CurDAG->getDataLayout().getPointerSizeInBits(MemSD->getAddressSpace()); 1890 1891 // Volatile Setting 1892 // - .volatile is only availalble for .global and .shared 1893 bool IsVolatile = MemSD->isVolatile(); 1894 if (CodeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL && 1895 CodeAddrSpace != NVPTX::PTXLdStInstCode::SHARED && 1896 CodeAddrSpace != NVPTX::PTXLdStInstCode::GENERIC) 1897 IsVolatile = false; 1898 1899 // Type Setting: toType + toTypeWidth 1900 // - for integer type, always use 'u' 1901 assert(StoreVT.isSimple() && "Store value is not simple"); 1902 MVT ScalarVT = StoreVT.getSimpleVT().getScalarType(); 1903 unsigned ToTypeWidth = ScalarVT.getSizeInBits(); 1904 unsigned ToType; 1905 if (ScalarVT.isFloatingPoint()) 1906 ToType = ScalarVT.SimpleTy == MVT::f16 ? NVPTX::PTXLdStInstCode::Untyped 1907 : NVPTX::PTXLdStInstCode::Float; 1908 else 1909 ToType = NVPTX::PTXLdStInstCode::Unsigned; 1910 1911 SmallVector<SDValue, 12> StOps; 1912 SDValue N2; 1913 unsigned VecType; 1914 1915 switch (N->getOpcode()) { 1916 case NVPTXISD::StoreV2: 1917 VecType = NVPTX::PTXLdStInstCode::V2; 1918 StOps.push_back(N->getOperand(1)); 1919 StOps.push_back(N->getOperand(2)); 1920 N2 = N->getOperand(3); 1921 break; 1922 case NVPTXISD::StoreV4: 1923 VecType = NVPTX::PTXLdStInstCode::V4; 1924 StOps.push_back(N->getOperand(1)); 1925 StOps.push_back(N->getOperand(2)); 1926 StOps.push_back(N->getOperand(3)); 1927 StOps.push_back(N->getOperand(4)); 1928 N2 = N->getOperand(5); 1929 break; 1930 default: 1931 return false; 1932 } 1933 1934 // v8f16 is a special case. PTX doesn't have st.v8.f16 1935 // instruction. Instead, we split the vector into v2f16 chunks and 1936 // store them with st.v4.b32. 1937 if (EltVT == MVT::v2f16) { 1938 assert(N->getOpcode() == NVPTXISD::StoreV4 && "Unexpected load opcode."); 1939 EltVT = MVT::i32; 1940 ToType = NVPTX::PTXLdStInstCode::Untyped; 1941 ToTypeWidth = 32; 1942 } 1943 1944 StOps.push_back(getI32Imm(IsVolatile, DL)); 1945 StOps.push_back(getI32Imm(CodeAddrSpace, DL)); 1946 StOps.push_back(getI32Imm(VecType, DL)); 1947 StOps.push_back(getI32Imm(ToType, DL)); 1948 StOps.push_back(getI32Imm(ToTypeWidth, DL)); 1949 1950 if (SelectDirectAddr(N2, Addr)) { 1951 switch (N->getOpcode()) { 1952 default: 1953 return false; 1954 case NVPTXISD::StoreV2: 1955 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, 1956 NVPTX::STV_i8_v2_avar, NVPTX::STV_i16_v2_avar, 1957 NVPTX::STV_i32_v2_avar, NVPTX::STV_i64_v2_avar, 1958 NVPTX::STV_f16_v2_avar, NVPTX::STV_f16x2_v2_avar, 1959 NVPTX::STV_f32_v2_avar, NVPTX::STV_f64_v2_avar); 1960 break; 1961 case NVPTXISD::StoreV4: 1962 Opcode = 1963 pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, NVPTX::STV_i8_v4_avar, 1964 NVPTX::STV_i16_v4_avar, NVPTX::STV_i32_v4_avar, None, 1965 NVPTX::STV_f16_v4_avar, NVPTX::STV_f16x2_v4_avar, 1966 NVPTX::STV_f32_v4_avar, None); 1967 break; 1968 } 1969 StOps.push_back(Addr); 1970 } else if (PointerSize == 64 ? SelectADDRsi64(N2.getNode(), N2, Base, Offset) 1971 : SelectADDRsi(N2.getNode(), N2, Base, Offset)) { 1972 switch (N->getOpcode()) { 1973 default: 1974 return false; 1975 case NVPTXISD::StoreV2: 1976 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, 1977 NVPTX::STV_i8_v2_asi, NVPTX::STV_i16_v2_asi, 1978 NVPTX::STV_i32_v2_asi, NVPTX::STV_i64_v2_asi, 1979 NVPTX::STV_f16_v2_asi, NVPTX::STV_f16x2_v2_asi, 1980 NVPTX::STV_f32_v2_asi, NVPTX::STV_f64_v2_asi); 1981 break; 1982 case NVPTXISD::StoreV4: 1983 Opcode = 1984 pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, NVPTX::STV_i8_v4_asi, 1985 NVPTX::STV_i16_v4_asi, NVPTX::STV_i32_v4_asi, None, 1986 NVPTX::STV_f16_v4_asi, NVPTX::STV_f16x2_v4_asi, 1987 NVPTX::STV_f32_v4_asi, None); 1988 break; 1989 } 1990 StOps.push_back(Base); 1991 StOps.push_back(Offset); 1992 } else if (PointerSize == 64 ? SelectADDRri64(N2.getNode(), N2, Base, Offset) 1993 : SelectADDRri(N2.getNode(), N2, Base, Offset)) { 1994 if (PointerSize == 64) { 1995 switch (N->getOpcode()) { 1996 default: 1997 return false; 1998 case NVPTXISD::StoreV2: 1999 Opcode = pickOpcodeForVT( 2000 EltVT.getSimpleVT().SimpleTy, NVPTX::STV_i8_v2_ari_64, 2001 NVPTX::STV_i16_v2_ari_64, NVPTX::STV_i32_v2_ari_64, 2002 NVPTX::STV_i64_v2_ari_64, NVPTX::STV_f16_v2_ari_64, 2003 NVPTX::STV_f16x2_v2_ari_64, NVPTX::STV_f32_v2_ari_64, 2004 NVPTX::STV_f64_v2_ari_64); 2005 break; 2006 case NVPTXISD::StoreV4: 2007 Opcode = pickOpcodeForVT( 2008 EltVT.getSimpleVT().SimpleTy, NVPTX::STV_i8_v4_ari_64, 2009 NVPTX::STV_i16_v4_ari_64, NVPTX::STV_i32_v4_ari_64, None, 2010 NVPTX::STV_f16_v4_ari_64, NVPTX::STV_f16x2_v4_ari_64, 2011 NVPTX::STV_f32_v4_ari_64, None); 2012 break; 2013 } 2014 } else { 2015 switch (N->getOpcode()) { 2016 default: 2017 return false; 2018 case NVPTXISD::StoreV2: 2019 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, 2020 NVPTX::STV_i8_v2_ari, NVPTX::STV_i16_v2_ari, 2021 NVPTX::STV_i32_v2_ari, NVPTX::STV_i64_v2_ari, 2022 NVPTX::STV_f16_v2_ari, NVPTX::STV_f16x2_v2_ari, 2023 NVPTX::STV_f32_v2_ari, NVPTX::STV_f64_v2_ari); 2024 break; 2025 case NVPTXISD::StoreV4: 2026 Opcode = 2027 pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, NVPTX::STV_i8_v4_ari, 2028 NVPTX::STV_i16_v4_ari, NVPTX::STV_i32_v4_ari, None, 2029 NVPTX::STV_f16_v4_ari, NVPTX::STV_f16x2_v4_ari, 2030 NVPTX::STV_f32_v4_ari, None); 2031 break; 2032 } 2033 } 2034 StOps.push_back(Base); 2035 StOps.push_back(Offset); 2036 } else { 2037 if (PointerSize == 64) { 2038 switch (N->getOpcode()) { 2039 default: 2040 return false; 2041 case NVPTXISD::StoreV2: 2042 Opcode = pickOpcodeForVT( 2043 EltVT.getSimpleVT().SimpleTy, NVPTX::STV_i8_v2_areg_64, 2044 NVPTX::STV_i16_v2_areg_64, NVPTX::STV_i32_v2_areg_64, 2045 NVPTX::STV_i64_v2_areg_64, NVPTX::STV_f16_v2_areg_64, 2046 NVPTX::STV_f16x2_v2_areg_64, NVPTX::STV_f32_v2_areg_64, 2047 NVPTX::STV_f64_v2_areg_64); 2048 break; 2049 case NVPTXISD::StoreV4: 2050 Opcode = pickOpcodeForVT( 2051 EltVT.getSimpleVT().SimpleTy, NVPTX::STV_i8_v4_areg_64, 2052 NVPTX::STV_i16_v4_areg_64, NVPTX::STV_i32_v4_areg_64, None, 2053 NVPTX::STV_f16_v4_areg_64, NVPTX::STV_f16x2_v4_areg_64, 2054 NVPTX::STV_f32_v4_areg_64, None); 2055 break; 2056 } 2057 } else { 2058 switch (N->getOpcode()) { 2059 default: 2060 return false; 2061 case NVPTXISD::StoreV2: 2062 Opcode = 2063 pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, NVPTX::STV_i8_v2_areg, 2064 NVPTX::STV_i16_v2_areg, NVPTX::STV_i32_v2_areg, 2065 NVPTX::STV_i64_v2_areg, NVPTX::STV_f16_v2_areg, 2066 NVPTX::STV_f16x2_v2_areg, NVPTX::STV_f32_v2_areg, 2067 NVPTX::STV_f64_v2_areg); 2068 break; 2069 case NVPTXISD::StoreV4: 2070 Opcode = 2071 pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, NVPTX::STV_i8_v4_areg, 2072 NVPTX::STV_i16_v4_areg, NVPTX::STV_i32_v4_areg, None, 2073 NVPTX::STV_f16_v4_areg, NVPTX::STV_f16x2_v4_areg, 2074 NVPTX::STV_f32_v4_areg, None); 2075 break; 2076 } 2077 } 2078 StOps.push_back(N2); 2079 } 2080 2081 if (!Opcode) 2082 return false; 2083 2084 StOps.push_back(Chain); 2085 2086 ST = CurDAG->getMachineNode(Opcode.getValue(), DL, MVT::Other, StOps); 2087 2088 MachineMemOperand *MemRef = cast<MemSDNode>(N)->getMemOperand(); 2089 CurDAG->setNodeMemRefs(cast<MachineSDNode>(ST), {MemRef}); 2090 2091 ReplaceNode(N, ST); 2092 return true; 2093 } 2094 2095 bool NVPTXDAGToDAGISel::tryLoadParam(SDNode *Node) { 2096 SDValue Chain = Node->getOperand(0); 2097 SDValue Offset = Node->getOperand(2); 2098 SDValue Flag = Node->getOperand(3); 2099 SDLoc DL(Node); 2100 MemSDNode *Mem = cast<MemSDNode>(Node); 2101 2102 unsigned VecSize; 2103 switch (Node->getOpcode()) { 2104 default: 2105 return false; 2106 case NVPTXISD::LoadParam: 2107 VecSize = 1; 2108 break; 2109 case NVPTXISD::LoadParamV2: 2110 VecSize = 2; 2111 break; 2112 case NVPTXISD::LoadParamV4: 2113 VecSize = 4; 2114 break; 2115 } 2116 2117 EVT EltVT = Node->getValueType(0); 2118 EVT MemVT = Mem->getMemoryVT(); 2119 2120 Optional<unsigned> Opcode; 2121 2122 switch (VecSize) { 2123 default: 2124 return false; 2125 case 1: 2126 Opcode = pickOpcodeForVT(MemVT.getSimpleVT().SimpleTy, 2127 NVPTX::LoadParamMemI8, NVPTX::LoadParamMemI16, 2128 NVPTX::LoadParamMemI32, NVPTX::LoadParamMemI64, 2129 NVPTX::LoadParamMemF16, NVPTX::LoadParamMemF16x2, 2130 NVPTX::LoadParamMemF32, NVPTX::LoadParamMemF64); 2131 break; 2132 case 2: 2133 Opcode = 2134 pickOpcodeForVT(MemVT.getSimpleVT().SimpleTy, NVPTX::LoadParamMemV2I8, 2135 NVPTX::LoadParamMemV2I16, NVPTX::LoadParamMemV2I32, 2136 NVPTX::LoadParamMemV2I64, NVPTX::LoadParamMemV2F16, 2137 NVPTX::LoadParamMemV2F16x2, NVPTX::LoadParamMemV2F32, 2138 NVPTX::LoadParamMemV2F64); 2139 break; 2140 case 4: 2141 Opcode = pickOpcodeForVT( 2142 MemVT.getSimpleVT().SimpleTy, NVPTX::LoadParamMemV4I8, 2143 NVPTX::LoadParamMemV4I16, NVPTX::LoadParamMemV4I32, None, 2144 NVPTX::LoadParamMemV4F16, NVPTX::LoadParamMemV4F16x2, 2145 NVPTX::LoadParamMemV4F32, None); 2146 break; 2147 } 2148 if (!Opcode) 2149 return false; 2150 2151 SDVTList VTs; 2152 if (VecSize == 1) { 2153 VTs = CurDAG->getVTList(EltVT, MVT::Other, MVT::Glue); 2154 } else if (VecSize == 2) { 2155 VTs = CurDAG->getVTList(EltVT, EltVT, MVT::Other, MVT::Glue); 2156 } else { 2157 EVT EVTs[] = { EltVT, EltVT, EltVT, EltVT, MVT::Other, MVT::Glue }; 2158 VTs = CurDAG->getVTList(EVTs); 2159 } 2160 2161 unsigned OffsetVal = cast<ConstantSDNode>(Offset)->getZExtValue(); 2162 2163 SmallVector<SDValue, 2> Ops; 2164 Ops.push_back(CurDAG->getTargetConstant(OffsetVal, DL, MVT::i32)); 2165 Ops.push_back(Chain); 2166 Ops.push_back(Flag); 2167 2168 ReplaceNode(Node, CurDAG->getMachineNode(Opcode.getValue(), DL, VTs, Ops)); 2169 return true; 2170 } 2171 2172 bool NVPTXDAGToDAGISel::tryStoreRetval(SDNode *N) { 2173 SDLoc DL(N); 2174 SDValue Chain = N->getOperand(0); 2175 SDValue Offset = N->getOperand(1); 2176 unsigned OffsetVal = cast<ConstantSDNode>(Offset)->getZExtValue(); 2177 MemSDNode *Mem = cast<MemSDNode>(N); 2178 2179 // How many elements do we have? 2180 unsigned NumElts = 1; 2181 switch (N->getOpcode()) { 2182 default: 2183 return false; 2184 case NVPTXISD::StoreRetval: 2185 NumElts = 1; 2186 break; 2187 case NVPTXISD::StoreRetvalV2: 2188 NumElts = 2; 2189 break; 2190 case NVPTXISD::StoreRetvalV4: 2191 NumElts = 4; 2192 break; 2193 } 2194 2195 // Build vector of operands 2196 SmallVector<SDValue, 6> Ops; 2197 for (unsigned i = 0; i < NumElts; ++i) 2198 Ops.push_back(N->getOperand(i + 2)); 2199 Ops.push_back(CurDAG->getTargetConstant(OffsetVal, DL, MVT::i32)); 2200 Ops.push_back(Chain); 2201 2202 // Determine target opcode 2203 // If we have an i1, use an 8-bit store. The lowering code in 2204 // NVPTXISelLowering will have already emitted an upcast. 2205 Optional<unsigned> Opcode = 0; 2206 switch (NumElts) { 2207 default: 2208 return false; 2209 case 1: 2210 Opcode = pickOpcodeForVT(Mem->getMemoryVT().getSimpleVT().SimpleTy, 2211 NVPTX::StoreRetvalI8, NVPTX::StoreRetvalI16, 2212 NVPTX::StoreRetvalI32, NVPTX::StoreRetvalI64, 2213 NVPTX::StoreRetvalF16, NVPTX::StoreRetvalF16x2, 2214 NVPTX::StoreRetvalF32, NVPTX::StoreRetvalF64); 2215 break; 2216 case 2: 2217 Opcode = pickOpcodeForVT(Mem->getMemoryVT().getSimpleVT().SimpleTy, 2218 NVPTX::StoreRetvalV2I8, NVPTX::StoreRetvalV2I16, 2219 NVPTX::StoreRetvalV2I32, NVPTX::StoreRetvalV2I64, 2220 NVPTX::StoreRetvalV2F16, NVPTX::StoreRetvalV2F16x2, 2221 NVPTX::StoreRetvalV2F32, NVPTX::StoreRetvalV2F64); 2222 break; 2223 case 4: 2224 Opcode = pickOpcodeForVT(Mem->getMemoryVT().getSimpleVT().SimpleTy, 2225 NVPTX::StoreRetvalV4I8, NVPTX::StoreRetvalV4I16, 2226 NVPTX::StoreRetvalV4I32, None, 2227 NVPTX::StoreRetvalV4F16, NVPTX::StoreRetvalV4F16x2, 2228 NVPTX::StoreRetvalV4F32, None); 2229 break; 2230 } 2231 if (!Opcode) 2232 return false; 2233 2234 SDNode *Ret = CurDAG->getMachineNode(Opcode.getValue(), DL, MVT::Other, Ops); 2235 MachineMemOperand *MemRef = cast<MemSDNode>(N)->getMemOperand(); 2236 CurDAG->setNodeMemRefs(cast<MachineSDNode>(Ret), {MemRef}); 2237 2238 ReplaceNode(N, Ret); 2239 return true; 2240 } 2241 2242 bool NVPTXDAGToDAGISel::tryStoreParam(SDNode *N) { 2243 SDLoc DL(N); 2244 SDValue Chain = N->getOperand(0); 2245 SDValue Param = N->getOperand(1); 2246 unsigned ParamVal = cast<ConstantSDNode>(Param)->getZExtValue(); 2247 SDValue Offset = N->getOperand(2); 2248 unsigned OffsetVal = cast<ConstantSDNode>(Offset)->getZExtValue(); 2249 MemSDNode *Mem = cast<MemSDNode>(N); 2250 SDValue Flag = N->getOperand(N->getNumOperands() - 1); 2251 2252 // How many elements do we have? 2253 unsigned NumElts = 1; 2254 switch (N->getOpcode()) { 2255 default: 2256 return false; 2257 case NVPTXISD::StoreParamU32: 2258 case NVPTXISD::StoreParamS32: 2259 case NVPTXISD::StoreParam: 2260 NumElts = 1; 2261 break; 2262 case NVPTXISD::StoreParamV2: 2263 NumElts = 2; 2264 break; 2265 case NVPTXISD::StoreParamV4: 2266 NumElts = 4; 2267 break; 2268 } 2269 2270 // Build vector of operands 2271 SmallVector<SDValue, 8> Ops; 2272 for (unsigned i = 0; i < NumElts; ++i) 2273 Ops.push_back(N->getOperand(i + 3)); 2274 Ops.push_back(CurDAG->getTargetConstant(ParamVal, DL, MVT::i32)); 2275 Ops.push_back(CurDAG->getTargetConstant(OffsetVal, DL, MVT::i32)); 2276 Ops.push_back(Chain); 2277 Ops.push_back(Flag); 2278 2279 // Determine target opcode 2280 // If we have an i1, use an 8-bit store. The lowering code in 2281 // NVPTXISelLowering will have already emitted an upcast. 2282 Optional<unsigned> Opcode = 0; 2283 switch (N->getOpcode()) { 2284 default: 2285 switch (NumElts) { 2286 default: 2287 return false; 2288 case 1: 2289 Opcode = pickOpcodeForVT(Mem->getMemoryVT().getSimpleVT().SimpleTy, 2290 NVPTX::StoreParamI8, NVPTX::StoreParamI16, 2291 NVPTX::StoreParamI32, NVPTX::StoreParamI64, 2292 NVPTX::StoreParamF16, NVPTX::StoreParamF16x2, 2293 NVPTX::StoreParamF32, NVPTX::StoreParamF64); 2294 break; 2295 case 2: 2296 Opcode = pickOpcodeForVT(Mem->getMemoryVT().getSimpleVT().SimpleTy, 2297 NVPTX::StoreParamV2I8, NVPTX::StoreParamV2I16, 2298 NVPTX::StoreParamV2I32, NVPTX::StoreParamV2I64, 2299 NVPTX::StoreParamV2F16, NVPTX::StoreParamV2F16x2, 2300 NVPTX::StoreParamV2F32, NVPTX::StoreParamV2F64); 2301 break; 2302 case 4: 2303 Opcode = pickOpcodeForVT(Mem->getMemoryVT().getSimpleVT().SimpleTy, 2304 NVPTX::StoreParamV4I8, NVPTX::StoreParamV4I16, 2305 NVPTX::StoreParamV4I32, None, 2306 NVPTX::StoreParamV4F16, NVPTX::StoreParamV4F16x2, 2307 NVPTX::StoreParamV4F32, None); 2308 break; 2309 } 2310 if (!Opcode) 2311 return false; 2312 break; 2313 // Special case: if we have a sign-extend/zero-extend node, insert the 2314 // conversion instruction first, and use that as the value operand to 2315 // the selected StoreParam node. 2316 case NVPTXISD::StoreParamU32: { 2317 Opcode = NVPTX::StoreParamI32; 2318 SDValue CvtNone = CurDAG->getTargetConstant(NVPTX::PTXCvtMode::NONE, DL, 2319 MVT::i32); 2320 SDNode *Cvt = CurDAG->getMachineNode(NVPTX::CVT_u32_u16, DL, 2321 MVT::i32, Ops[0], CvtNone); 2322 Ops[0] = SDValue(Cvt, 0); 2323 break; 2324 } 2325 case NVPTXISD::StoreParamS32: { 2326 Opcode = NVPTX::StoreParamI32; 2327 SDValue CvtNone = CurDAG->getTargetConstant(NVPTX::PTXCvtMode::NONE, DL, 2328 MVT::i32); 2329 SDNode *Cvt = CurDAG->getMachineNode(NVPTX::CVT_s32_s16, DL, 2330 MVT::i32, Ops[0], CvtNone); 2331 Ops[0] = SDValue(Cvt, 0); 2332 break; 2333 } 2334 } 2335 2336 SDVTList RetVTs = CurDAG->getVTList(MVT::Other, MVT::Glue); 2337 SDNode *Ret = 2338 CurDAG->getMachineNode(Opcode.getValue(), DL, RetVTs, Ops); 2339 MachineMemOperand *MemRef = cast<MemSDNode>(N)->getMemOperand(); 2340 CurDAG->setNodeMemRefs(cast<MachineSDNode>(Ret), {MemRef}); 2341 2342 ReplaceNode(N, Ret); 2343 return true; 2344 } 2345 2346 bool NVPTXDAGToDAGISel::tryTextureIntrinsic(SDNode *N) { 2347 unsigned Opc = 0; 2348 2349 switch (N->getOpcode()) { 2350 default: return false; 2351 case NVPTXISD::Tex1DFloatS32: 2352 Opc = NVPTX::TEX_1D_F32_S32; 2353 break; 2354 case NVPTXISD::Tex1DFloatFloat: 2355 Opc = NVPTX::TEX_1D_F32_F32; 2356 break; 2357 case NVPTXISD::Tex1DFloatFloatLevel: 2358 Opc = NVPTX::TEX_1D_F32_F32_LEVEL; 2359 break; 2360 case NVPTXISD::Tex1DFloatFloatGrad: 2361 Opc = NVPTX::TEX_1D_F32_F32_GRAD; 2362 break; 2363 case NVPTXISD::Tex1DS32S32: 2364 Opc = NVPTX::TEX_1D_S32_S32; 2365 break; 2366 case NVPTXISD::Tex1DS32Float: 2367 Opc = NVPTX::TEX_1D_S32_F32; 2368 break; 2369 case NVPTXISD::Tex1DS32FloatLevel: 2370 Opc = NVPTX::TEX_1D_S32_F32_LEVEL; 2371 break; 2372 case NVPTXISD::Tex1DS32FloatGrad: 2373 Opc = NVPTX::TEX_1D_S32_F32_GRAD; 2374 break; 2375 case NVPTXISD::Tex1DU32S32: 2376 Opc = NVPTX::TEX_1D_U32_S32; 2377 break; 2378 case NVPTXISD::Tex1DU32Float: 2379 Opc = NVPTX::TEX_1D_U32_F32; 2380 break; 2381 case NVPTXISD::Tex1DU32FloatLevel: 2382 Opc = NVPTX::TEX_1D_U32_F32_LEVEL; 2383 break; 2384 case NVPTXISD::Tex1DU32FloatGrad: 2385 Opc = NVPTX::TEX_1D_U32_F32_GRAD; 2386 break; 2387 case NVPTXISD::Tex1DArrayFloatS32: 2388 Opc = NVPTX::TEX_1D_ARRAY_F32_S32; 2389 break; 2390 case NVPTXISD::Tex1DArrayFloatFloat: 2391 Opc = NVPTX::TEX_1D_ARRAY_F32_F32; 2392 break; 2393 case NVPTXISD::Tex1DArrayFloatFloatLevel: 2394 Opc = NVPTX::TEX_1D_ARRAY_F32_F32_LEVEL; 2395 break; 2396 case NVPTXISD::Tex1DArrayFloatFloatGrad: 2397 Opc = NVPTX::TEX_1D_ARRAY_F32_F32_GRAD; 2398 break; 2399 case NVPTXISD::Tex1DArrayS32S32: 2400 Opc = NVPTX::TEX_1D_ARRAY_S32_S32; 2401 break; 2402 case NVPTXISD::Tex1DArrayS32Float: 2403 Opc = NVPTX::TEX_1D_ARRAY_S32_F32; 2404 break; 2405 case NVPTXISD::Tex1DArrayS32FloatLevel: 2406 Opc = NVPTX::TEX_1D_ARRAY_S32_F32_LEVEL; 2407 break; 2408 case NVPTXISD::Tex1DArrayS32FloatGrad: 2409 Opc = NVPTX::TEX_1D_ARRAY_S32_F32_GRAD; 2410 break; 2411 case NVPTXISD::Tex1DArrayU32S32: 2412 Opc = NVPTX::TEX_1D_ARRAY_U32_S32; 2413 break; 2414 case NVPTXISD::Tex1DArrayU32Float: 2415 Opc = NVPTX::TEX_1D_ARRAY_U32_F32; 2416 break; 2417 case NVPTXISD::Tex1DArrayU32FloatLevel: 2418 Opc = NVPTX::TEX_1D_ARRAY_U32_F32_LEVEL; 2419 break; 2420 case NVPTXISD::Tex1DArrayU32FloatGrad: 2421 Opc = NVPTX::TEX_1D_ARRAY_U32_F32_GRAD; 2422 break; 2423 case NVPTXISD::Tex2DFloatS32: 2424 Opc = NVPTX::TEX_2D_F32_S32; 2425 break; 2426 case NVPTXISD::Tex2DFloatFloat: 2427 Opc = NVPTX::TEX_2D_F32_F32; 2428 break; 2429 case NVPTXISD::Tex2DFloatFloatLevel: 2430 Opc = NVPTX::TEX_2D_F32_F32_LEVEL; 2431 break; 2432 case NVPTXISD::Tex2DFloatFloatGrad: 2433 Opc = NVPTX::TEX_2D_F32_F32_GRAD; 2434 break; 2435 case NVPTXISD::Tex2DS32S32: 2436 Opc = NVPTX::TEX_2D_S32_S32; 2437 break; 2438 case NVPTXISD::Tex2DS32Float: 2439 Opc = NVPTX::TEX_2D_S32_F32; 2440 break; 2441 case NVPTXISD::Tex2DS32FloatLevel: 2442 Opc = NVPTX::TEX_2D_S32_F32_LEVEL; 2443 break; 2444 case NVPTXISD::Tex2DS32FloatGrad: 2445 Opc = NVPTX::TEX_2D_S32_F32_GRAD; 2446 break; 2447 case NVPTXISD::Tex2DU32S32: 2448 Opc = NVPTX::TEX_2D_U32_S32; 2449 break; 2450 case NVPTXISD::Tex2DU32Float: 2451 Opc = NVPTX::TEX_2D_U32_F32; 2452 break; 2453 case NVPTXISD::Tex2DU32FloatLevel: 2454 Opc = NVPTX::TEX_2D_U32_F32_LEVEL; 2455 break; 2456 case NVPTXISD::Tex2DU32FloatGrad: 2457 Opc = NVPTX::TEX_2D_U32_F32_GRAD; 2458 break; 2459 case NVPTXISD::Tex2DArrayFloatS32: 2460 Opc = NVPTX::TEX_2D_ARRAY_F32_S32; 2461 break; 2462 case NVPTXISD::Tex2DArrayFloatFloat: 2463 Opc = NVPTX::TEX_2D_ARRAY_F32_F32; 2464 break; 2465 case NVPTXISD::Tex2DArrayFloatFloatLevel: 2466 Opc = NVPTX::TEX_2D_ARRAY_F32_F32_LEVEL; 2467 break; 2468 case NVPTXISD::Tex2DArrayFloatFloatGrad: 2469 Opc = NVPTX::TEX_2D_ARRAY_F32_F32_GRAD; 2470 break; 2471 case NVPTXISD::Tex2DArrayS32S32: 2472 Opc = NVPTX::TEX_2D_ARRAY_S32_S32; 2473 break; 2474 case NVPTXISD::Tex2DArrayS32Float: 2475 Opc = NVPTX::TEX_2D_ARRAY_S32_F32; 2476 break; 2477 case NVPTXISD::Tex2DArrayS32FloatLevel: 2478 Opc = NVPTX::TEX_2D_ARRAY_S32_F32_LEVEL; 2479 break; 2480 case NVPTXISD::Tex2DArrayS32FloatGrad: 2481 Opc = NVPTX::TEX_2D_ARRAY_S32_F32_GRAD; 2482 break; 2483 case NVPTXISD::Tex2DArrayU32S32: 2484 Opc = NVPTX::TEX_2D_ARRAY_U32_S32; 2485 break; 2486 case NVPTXISD::Tex2DArrayU32Float: 2487 Opc = NVPTX::TEX_2D_ARRAY_U32_F32; 2488 break; 2489 case NVPTXISD::Tex2DArrayU32FloatLevel: 2490 Opc = NVPTX::TEX_2D_ARRAY_U32_F32_LEVEL; 2491 break; 2492 case NVPTXISD::Tex2DArrayU32FloatGrad: 2493 Opc = NVPTX::TEX_2D_ARRAY_U32_F32_GRAD; 2494 break; 2495 case NVPTXISD::Tex3DFloatS32: 2496 Opc = NVPTX::TEX_3D_F32_S32; 2497 break; 2498 case NVPTXISD::Tex3DFloatFloat: 2499 Opc = NVPTX::TEX_3D_F32_F32; 2500 break; 2501 case NVPTXISD::Tex3DFloatFloatLevel: 2502 Opc = NVPTX::TEX_3D_F32_F32_LEVEL; 2503 break; 2504 case NVPTXISD::Tex3DFloatFloatGrad: 2505 Opc = NVPTX::TEX_3D_F32_F32_GRAD; 2506 break; 2507 case NVPTXISD::Tex3DS32S32: 2508 Opc = NVPTX::TEX_3D_S32_S32; 2509 break; 2510 case NVPTXISD::Tex3DS32Float: 2511 Opc = NVPTX::TEX_3D_S32_F32; 2512 break; 2513 case NVPTXISD::Tex3DS32FloatLevel: 2514 Opc = NVPTX::TEX_3D_S32_F32_LEVEL; 2515 break; 2516 case NVPTXISD::Tex3DS32FloatGrad: 2517 Opc = NVPTX::TEX_3D_S32_F32_GRAD; 2518 break; 2519 case NVPTXISD::Tex3DU32S32: 2520 Opc = NVPTX::TEX_3D_U32_S32; 2521 break; 2522 case NVPTXISD::Tex3DU32Float: 2523 Opc = NVPTX::TEX_3D_U32_F32; 2524 break; 2525 case NVPTXISD::Tex3DU32FloatLevel: 2526 Opc = NVPTX::TEX_3D_U32_F32_LEVEL; 2527 break; 2528 case NVPTXISD::Tex3DU32FloatGrad: 2529 Opc = NVPTX::TEX_3D_U32_F32_GRAD; 2530 break; 2531 case NVPTXISD::TexCubeFloatFloat: 2532 Opc = NVPTX::TEX_CUBE_F32_F32; 2533 break; 2534 case NVPTXISD::TexCubeFloatFloatLevel: 2535 Opc = NVPTX::TEX_CUBE_F32_F32_LEVEL; 2536 break; 2537 case NVPTXISD::TexCubeS32Float: 2538 Opc = NVPTX::TEX_CUBE_S32_F32; 2539 break; 2540 case NVPTXISD::TexCubeS32FloatLevel: 2541 Opc = NVPTX::TEX_CUBE_S32_F32_LEVEL; 2542 break; 2543 case NVPTXISD::TexCubeU32Float: 2544 Opc = NVPTX::TEX_CUBE_U32_F32; 2545 break; 2546 case NVPTXISD::TexCubeU32FloatLevel: 2547 Opc = NVPTX::TEX_CUBE_U32_F32_LEVEL; 2548 break; 2549 case NVPTXISD::TexCubeArrayFloatFloat: 2550 Opc = NVPTX::TEX_CUBE_ARRAY_F32_F32; 2551 break; 2552 case NVPTXISD::TexCubeArrayFloatFloatLevel: 2553 Opc = NVPTX::TEX_CUBE_ARRAY_F32_F32_LEVEL; 2554 break; 2555 case NVPTXISD::TexCubeArrayS32Float: 2556 Opc = NVPTX::TEX_CUBE_ARRAY_S32_F32; 2557 break; 2558 case NVPTXISD::TexCubeArrayS32FloatLevel: 2559 Opc = NVPTX::TEX_CUBE_ARRAY_S32_F32_LEVEL; 2560 break; 2561 case NVPTXISD::TexCubeArrayU32Float: 2562 Opc = NVPTX::TEX_CUBE_ARRAY_U32_F32; 2563 break; 2564 case NVPTXISD::TexCubeArrayU32FloatLevel: 2565 Opc = NVPTX::TEX_CUBE_ARRAY_U32_F32_LEVEL; 2566 break; 2567 case NVPTXISD::Tld4R2DFloatFloat: 2568 Opc = NVPTX::TLD4_R_2D_F32_F32; 2569 break; 2570 case NVPTXISD::Tld4G2DFloatFloat: 2571 Opc = NVPTX::TLD4_G_2D_F32_F32; 2572 break; 2573 case NVPTXISD::Tld4B2DFloatFloat: 2574 Opc = NVPTX::TLD4_B_2D_F32_F32; 2575 break; 2576 case NVPTXISD::Tld4A2DFloatFloat: 2577 Opc = NVPTX::TLD4_A_2D_F32_F32; 2578 break; 2579 case NVPTXISD::Tld4R2DS64Float: 2580 Opc = NVPTX::TLD4_R_2D_S32_F32; 2581 break; 2582 case NVPTXISD::Tld4G2DS64Float: 2583 Opc = NVPTX::TLD4_G_2D_S32_F32; 2584 break; 2585 case NVPTXISD::Tld4B2DS64Float: 2586 Opc = NVPTX::TLD4_B_2D_S32_F32; 2587 break; 2588 case NVPTXISD::Tld4A2DS64Float: 2589 Opc = NVPTX::TLD4_A_2D_S32_F32; 2590 break; 2591 case NVPTXISD::Tld4R2DU64Float: 2592 Opc = NVPTX::TLD4_R_2D_U32_F32; 2593 break; 2594 case NVPTXISD::Tld4G2DU64Float: 2595 Opc = NVPTX::TLD4_G_2D_U32_F32; 2596 break; 2597 case NVPTXISD::Tld4B2DU64Float: 2598 Opc = NVPTX::TLD4_B_2D_U32_F32; 2599 break; 2600 case NVPTXISD::Tld4A2DU64Float: 2601 Opc = NVPTX::TLD4_A_2D_U32_F32; 2602 break; 2603 case NVPTXISD::TexUnified1DFloatS32: 2604 Opc = NVPTX::TEX_UNIFIED_1D_F32_S32; 2605 break; 2606 case NVPTXISD::TexUnified1DFloatFloat: 2607 Opc = NVPTX::TEX_UNIFIED_1D_F32_F32; 2608 break; 2609 case NVPTXISD::TexUnified1DFloatFloatLevel: 2610 Opc = NVPTX::TEX_UNIFIED_1D_F32_F32_LEVEL; 2611 break; 2612 case NVPTXISD::TexUnified1DFloatFloatGrad: 2613 Opc = NVPTX::TEX_UNIFIED_1D_F32_F32_GRAD; 2614 break; 2615 case NVPTXISD::TexUnified1DS32S32: 2616 Opc = NVPTX::TEX_UNIFIED_1D_S32_S32; 2617 break; 2618 case NVPTXISD::TexUnified1DS32Float: 2619 Opc = NVPTX::TEX_UNIFIED_1D_S32_F32; 2620 break; 2621 case NVPTXISD::TexUnified1DS32FloatLevel: 2622 Opc = NVPTX::TEX_UNIFIED_1D_S32_F32_LEVEL; 2623 break; 2624 case NVPTXISD::TexUnified1DS32FloatGrad: 2625 Opc = NVPTX::TEX_UNIFIED_1D_S32_F32_GRAD; 2626 break; 2627 case NVPTXISD::TexUnified1DU32S32: 2628 Opc = NVPTX::TEX_UNIFIED_1D_U32_S32; 2629 break; 2630 case NVPTXISD::TexUnified1DU32Float: 2631 Opc = NVPTX::TEX_UNIFIED_1D_U32_F32; 2632 break; 2633 case NVPTXISD::TexUnified1DU32FloatLevel: 2634 Opc = NVPTX::TEX_UNIFIED_1D_U32_F32_LEVEL; 2635 break; 2636 case NVPTXISD::TexUnified1DU32FloatGrad: 2637 Opc = NVPTX::TEX_UNIFIED_1D_U32_F32_GRAD; 2638 break; 2639 case NVPTXISD::TexUnified1DArrayFloatS32: 2640 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_F32_S32; 2641 break; 2642 case NVPTXISD::TexUnified1DArrayFloatFloat: 2643 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_F32_F32; 2644 break; 2645 case NVPTXISD::TexUnified1DArrayFloatFloatLevel: 2646 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_F32_F32_LEVEL; 2647 break; 2648 case NVPTXISD::TexUnified1DArrayFloatFloatGrad: 2649 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_F32_F32_GRAD; 2650 break; 2651 case NVPTXISD::TexUnified1DArrayS32S32: 2652 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_S32_S32; 2653 break; 2654 case NVPTXISD::TexUnified1DArrayS32Float: 2655 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_S32_F32; 2656 break; 2657 case NVPTXISD::TexUnified1DArrayS32FloatLevel: 2658 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_S32_F32_LEVEL; 2659 break; 2660 case NVPTXISD::TexUnified1DArrayS32FloatGrad: 2661 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_S32_F32_GRAD; 2662 break; 2663 case NVPTXISD::TexUnified1DArrayU32S32: 2664 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_U32_S32; 2665 break; 2666 case NVPTXISD::TexUnified1DArrayU32Float: 2667 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_U32_F32; 2668 break; 2669 case NVPTXISD::TexUnified1DArrayU32FloatLevel: 2670 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_U32_F32_LEVEL; 2671 break; 2672 case NVPTXISD::TexUnified1DArrayU32FloatGrad: 2673 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_U32_F32_GRAD; 2674 break; 2675 case NVPTXISD::TexUnified2DFloatS32: 2676 Opc = NVPTX::TEX_UNIFIED_2D_F32_S32; 2677 break; 2678 case NVPTXISD::TexUnified2DFloatFloat: 2679 Opc = NVPTX::TEX_UNIFIED_2D_F32_F32; 2680 break; 2681 case NVPTXISD::TexUnified2DFloatFloatLevel: 2682 Opc = NVPTX::TEX_UNIFIED_2D_F32_F32_LEVEL; 2683 break; 2684 case NVPTXISD::TexUnified2DFloatFloatGrad: 2685 Opc = NVPTX::TEX_UNIFIED_2D_F32_F32_GRAD; 2686 break; 2687 case NVPTXISD::TexUnified2DS32S32: 2688 Opc = NVPTX::TEX_UNIFIED_2D_S32_S32; 2689 break; 2690 case NVPTXISD::TexUnified2DS32Float: 2691 Opc = NVPTX::TEX_UNIFIED_2D_S32_F32; 2692 break; 2693 case NVPTXISD::TexUnified2DS32FloatLevel: 2694 Opc = NVPTX::TEX_UNIFIED_2D_S32_F32_LEVEL; 2695 break; 2696 case NVPTXISD::TexUnified2DS32FloatGrad: 2697 Opc = NVPTX::TEX_UNIFIED_2D_S32_F32_GRAD; 2698 break; 2699 case NVPTXISD::TexUnified2DU32S32: 2700 Opc = NVPTX::TEX_UNIFIED_2D_U32_S32; 2701 break; 2702 case NVPTXISD::TexUnified2DU32Float: 2703 Opc = NVPTX::TEX_UNIFIED_2D_U32_F32; 2704 break; 2705 case NVPTXISD::TexUnified2DU32FloatLevel: 2706 Opc = NVPTX::TEX_UNIFIED_2D_U32_F32_LEVEL; 2707 break; 2708 case NVPTXISD::TexUnified2DU32FloatGrad: 2709 Opc = NVPTX::TEX_UNIFIED_2D_U32_F32_GRAD; 2710 break; 2711 case NVPTXISD::TexUnified2DArrayFloatS32: 2712 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_F32_S32; 2713 break; 2714 case NVPTXISD::TexUnified2DArrayFloatFloat: 2715 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_F32_F32; 2716 break; 2717 case NVPTXISD::TexUnified2DArrayFloatFloatLevel: 2718 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_F32_F32_LEVEL; 2719 break; 2720 case NVPTXISD::TexUnified2DArrayFloatFloatGrad: 2721 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_F32_F32_GRAD; 2722 break; 2723 case NVPTXISD::TexUnified2DArrayS32S32: 2724 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_S32_S32; 2725 break; 2726 case NVPTXISD::TexUnified2DArrayS32Float: 2727 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_S32_F32; 2728 break; 2729 case NVPTXISD::TexUnified2DArrayS32FloatLevel: 2730 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_S32_F32_LEVEL; 2731 break; 2732 case NVPTXISD::TexUnified2DArrayS32FloatGrad: 2733 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_S32_F32_GRAD; 2734 break; 2735 case NVPTXISD::TexUnified2DArrayU32S32: 2736 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_U32_S32; 2737 break; 2738 case NVPTXISD::TexUnified2DArrayU32Float: 2739 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_U32_F32; 2740 break; 2741 case NVPTXISD::TexUnified2DArrayU32FloatLevel: 2742 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_U32_F32_LEVEL; 2743 break; 2744 case NVPTXISD::TexUnified2DArrayU32FloatGrad: 2745 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_U32_F32_GRAD; 2746 break; 2747 case NVPTXISD::TexUnified3DFloatS32: 2748 Opc = NVPTX::TEX_UNIFIED_3D_F32_S32; 2749 break; 2750 case NVPTXISD::TexUnified3DFloatFloat: 2751 Opc = NVPTX::TEX_UNIFIED_3D_F32_F32; 2752 break; 2753 case NVPTXISD::TexUnified3DFloatFloatLevel: 2754 Opc = NVPTX::TEX_UNIFIED_3D_F32_F32_LEVEL; 2755 break; 2756 case NVPTXISD::TexUnified3DFloatFloatGrad: 2757 Opc = NVPTX::TEX_UNIFIED_3D_F32_F32_GRAD; 2758 break; 2759 case NVPTXISD::TexUnified3DS32S32: 2760 Opc = NVPTX::TEX_UNIFIED_3D_S32_S32; 2761 break; 2762 case NVPTXISD::TexUnified3DS32Float: 2763 Opc = NVPTX::TEX_UNIFIED_3D_S32_F32; 2764 break; 2765 case NVPTXISD::TexUnified3DS32FloatLevel: 2766 Opc = NVPTX::TEX_UNIFIED_3D_S32_F32_LEVEL; 2767 break; 2768 case NVPTXISD::TexUnified3DS32FloatGrad: 2769 Opc = NVPTX::TEX_UNIFIED_3D_S32_F32_GRAD; 2770 break; 2771 case NVPTXISD::TexUnified3DU32S32: 2772 Opc = NVPTX::TEX_UNIFIED_3D_U32_S32; 2773 break; 2774 case NVPTXISD::TexUnified3DU32Float: 2775 Opc = NVPTX::TEX_UNIFIED_3D_U32_F32; 2776 break; 2777 case NVPTXISD::TexUnified3DU32FloatLevel: 2778 Opc = NVPTX::TEX_UNIFIED_3D_U32_F32_LEVEL; 2779 break; 2780 case NVPTXISD::TexUnified3DU32FloatGrad: 2781 Opc = NVPTX::TEX_UNIFIED_3D_U32_F32_GRAD; 2782 break; 2783 case NVPTXISD::TexUnifiedCubeFloatFloat: 2784 Opc = NVPTX::TEX_UNIFIED_CUBE_F32_F32; 2785 break; 2786 case NVPTXISD::TexUnifiedCubeFloatFloatLevel: 2787 Opc = NVPTX::TEX_UNIFIED_CUBE_F32_F32_LEVEL; 2788 break; 2789 case NVPTXISD::TexUnifiedCubeS32Float: 2790 Opc = NVPTX::TEX_UNIFIED_CUBE_S32_F32; 2791 break; 2792 case NVPTXISD::TexUnifiedCubeS32FloatLevel: 2793 Opc = NVPTX::TEX_UNIFIED_CUBE_S32_F32_LEVEL; 2794 break; 2795 case NVPTXISD::TexUnifiedCubeU32Float: 2796 Opc = NVPTX::TEX_UNIFIED_CUBE_U32_F32; 2797 break; 2798 case NVPTXISD::TexUnifiedCubeU32FloatLevel: 2799 Opc = NVPTX::TEX_UNIFIED_CUBE_U32_F32_LEVEL; 2800 break; 2801 case NVPTXISD::TexUnifiedCubeArrayFloatFloat: 2802 Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_F32_F32; 2803 break; 2804 case NVPTXISD::TexUnifiedCubeArrayFloatFloatLevel: 2805 Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_F32_F32_LEVEL; 2806 break; 2807 case NVPTXISD::TexUnifiedCubeArrayS32Float: 2808 Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_S32_F32; 2809 break; 2810 case NVPTXISD::TexUnifiedCubeArrayS32FloatLevel: 2811 Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_S32_F32_LEVEL; 2812 break; 2813 case NVPTXISD::TexUnifiedCubeArrayU32Float: 2814 Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_U32_F32; 2815 break; 2816 case NVPTXISD::TexUnifiedCubeArrayU32FloatLevel: 2817 Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_U32_F32_LEVEL; 2818 break; 2819 case NVPTXISD::Tld4UnifiedR2DFloatFloat: 2820 Opc = NVPTX::TLD4_UNIFIED_R_2D_F32_F32; 2821 break; 2822 case NVPTXISD::Tld4UnifiedG2DFloatFloat: 2823 Opc = NVPTX::TLD4_UNIFIED_G_2D_F32_F32; 2824 break; 2825 case NVPTXISD::Tld4UnifiedB2DFloatFloat: 2826 Opc = NVPTX::TLD4_UNIFIED_B_2D_F32_F32; 2827 break; 2828 case NVPTXISD::Tld4UnifiedA2DFloatFloat: 2829 Opc = NVPTX::TLD4_UNIFIED_A_2D_F32_F32; 2830 break; 2831 case NVPTXISD::Tld4UnifiedR2DS64Float: 2832 Opc = NVPTX::TLD4_UNIFIED_R_2D_S32_F32; 2833 break; 2834 case NVPTXISD::Tld4UnifiedG2DS64Float: 2835 Opc = NVPTX::TLD4_UNIFIED_G_2D_S32_F32; 2836 break; 2837 case NVPTXISD::Tld4UnifiedB2DS64Float: 2838 Opc = NVPTX::TLD4_UNIFIED_B_2D_S32_F32; 2839 break; 2840 case NVPTXISD::Tld4UnifiedA2DS64Float: 2841 Opc = NVPTX::TLD4_UNIFIED_A_2D_S32_F32; 2842 break; 2843 case NVPTXISD::Tld4UnifiedR2DU64Float: 2844 Opc = NVPTX::TLD4_UNIFIED_R_2D_U32_F32; 2845 break; 2846 case NVPTXISD::Tld4UnifiedG2DU64Float: 2847 Opc = NVPTX::TLD4_UNIFIED_G_2D_U32_F32; 2848 break; 2849 case NVPTXISD::Tld4UnifiedB2DU64Float: 2850 Opc = NVPTX::TLD4_UNIFIED_B_2D_U32_F32; 2851 break; 2852 case NVPTXISD::Tld4UnifiedA2DU64Float: 2853 Opc = NVPTX::TLD4_UNIFIED_A_2D_U32_F32; 2854 break; 2855 } 2856 2857 // Copy over operands 2858 SmallVector<SDValue, 8> Ops(N->op_begin() + 1, N->op_end()); 2859 Ops.push_back(N->getOperand(0)); // Move chain to the back. 2860 2861 ReplaceNode(N, CurDAG->getMachineNode(Opc, SDLoc(N), N->getVTList(), Ops)); 2862 return true; 2863 } 2864 2865 bool NVPTXDAGToDAGISel::trySurfaceIntrinsic(SDNode *N) { 2866 unsigned Opc = 0; 2867 switch (N->getOpcode()) { 2868 default: return false; 2869 case NVPTXISD::Suld1DI8Clamp: 2870 Opc = NVPTX::SULD_1D_I8_CLAMP; 2871 break; 2872 case NVPTXISD::Suld1DI16Clamp: 2873 Opc = NVPTX::SULD_1D_I16_CLAMP; 2874 break; 2875 case NVPTXISD::Suld1DI32Clamp: 2876 Opc = NVPTX::SULD_1D_I32_CLAMP; 2877 break; 2878 case NVPTXISD::Suld1DI64Clamp: 2879 Opc = NVPTX::SULD_1D_I64_CLAMP; 2880 break; 2881 case NVPTXISD::Suld1DV2I8Clamp: 2882 Opc = NVPTX::SULD_1D_V2I8_CLAMP; 2883 break; 2884 case NVPTXISD::Suld1DV2I16Clamp: 2885 Opc = NVPTX::SULD_1D_V2I16_CLAMP; 2886 break; 2887 case NVPTXISD::Suld1DV2I32Clamp: 2888 Opc = NVPTX::SULD_1D_V2I32_CLAMP; 2889 break; 2890 case NVPTXISD::Suld1DV2I64Clamp: 2891 Opc = NVPTX::SULD_1D_V2I64_CLAMP; 2892 break; 2893 case NVPTXISD::Suld1DV4I8Clamp: 2894 Opc = NVPTX::SULD_1D_V4I8_CLAMP; 2895 break; 2896 case NVPTXISD::Suld1DV4I16Clamp: 2897 Opc = NVPTX::SULD_1D_V4I16_CLAMP; 2898 break; 2899 case NVPTXISD::Suld1DV4I32Clamp: 2900 Opc = NVPTX::SULD_1D_V4I32_CLAMP; 2901 break; 2902 case NVPTXISD::Suld1DArrayI8Clamp: 2903 Opc = NVPTX::SULD_1D_ARRAY_I8_CLAMP; 2904 break; 2905 case NVPTXISD::Suld1DArrayI16Clamp: 2906 Opc = NVPTX::SULD_1D_ARRAY_I16_CLAMP; 2907 break; 2908 case NVPTXISD::Suld1DArrayI32Clamp: 2909 Opc = NVPTX::SULD_1D_ARRAY_I32_CLAMP; 2910 break; 2911 case NVPTXISD::Suld1DArrayI64Clamp: 2912 Opc = NVPTX::SULD_1D_ARRAY_I64_CLAMP; 2913 break; 2914 case NVPTXISD::Suld1DArrayV2I8Clamp: 2915 Opc = NVPTX::SULD_1D_ARRAY_V2I8_CLAMP; 2916 break; 2917 case NVPTXISD::Suld1DArrayV2I16Clamp: 2918 Opc = NVPTX::SULD_1D_ARRAY_V2I16_CLAMP; 2919 break; 2920 case NVPTXISD::Suld1DArrayV2I32Clamp: 2921 Opc = NVPTX::SULD_1D_ARRAY_V2I32_CLAMP; 2922 break; 2923 case NVPTXISD::Suld1DArrayV2I64Clamp: 2924 Opc = NVPTX::SULD_1D_ARRAY_V2I64_CLAMP; 2925 break; 2926 case NVPTXISD::Suld1DArrayV4I8Clamp: 2927 Opc = NVPTX::SULD_1D_ARRAY_V4I8_CLAMP; 2928 break; 2929 case NVPTXISD::Suld1DArrayV4I16Clamp: 2930 Opc = NVPTX::SULD_1D_ARRAY_V4I16_CLAMP; 2931 break; 2932 case NVPTXISD::Suld1DArrayV4I32Clamp: 2933 Opc = NVPTX::SULD_1D_ARRAY_V4I32_CLAMP; 2934 break; 2935 case NVPTXISD::Suld2DI8Clamp: 2936 Opc = NVPTX::SULD_2D_I8_CLAMP; 2937 break; 2938 case NVPTXISD::Suld2DI16Clamp: 2939 Opc = NVPTX::SULD_2D_I16_CLAMP; 2940 break; 2941 case NVPTXISD::Suld2DI32Clamp: 2942 Opc = NVPTX::SULD_2D_I32_CLAMP; 2943 break; 2944 case NVPTXISD::Suld2DI64Clamp: 2945 Opc = NVPTX::SULD_2D_I64_CLAMP; 2946 break; 2947 case NVPTXISD::Suld2DV2I8Clamp: 2948 Opc = NVPTX::SULD_2D_V2I8_CLAMP; 2949 break; 2950 case NVPTXISD::Suld2DV2I16Clamp: 2951 Opc = NVPTX::SULD_2D_V2I16_CLAMP; 2952 break; 2953 case NVPTXISD::Suld2DV2I32Clamp: 2954 Opc = NVPTX::SULD_2D_V2I32_CLAMP; 2955 break; 2956 case NVPTXISD::Suld2DV2I64Clamp: 2957 Opc = NVPTX::SULD_2D_V2I64_CLAMP; 2958 break; 2959 case NVPTXISD::Suld2DV4I8Clamp: 2960 Opc = NVPTX::SULD_2D_V4I8_CLAMP; 2961 break; 2962 case NVPTXISD::Suld2DV4I16Clamp: 2963 Opc = NVPTX::SULD_2D_V4I16_CLAMP; 2964 break; 2965 case NVPTXISD::Suld2DV4I32Clamp: 2966 Opc = NVPTX::SULD_2D_V4I32_CLAMP; 2967 break; 2968 case NVPTXISD::Suld2DArrayI8Clamp: 2969 Opc = NVPTX::SULD_2D_ARRAY_I8_CLAMP; 2970 break; 2971 case NVPTXISD::Suld2DArrayI16Clamp: 2972 Opc = NVPTX::SULD_2D_ARRAY_I16_CLAMP; 2973 break; 2974 case NVPTXISD::Suld2DArrayI32Clamp: 2975 Opc = NVPTX::SULD_2D_ARRAY_I32_CLAMP; 2976 break; 2977 case NVPTXISD::Suld2DArrayI64Clamp: 2978 Opc = NVPTX::SULD_2D_ARRAY_I64_CLAMP; 2979 break; 2980 case NVPTXISD::Suld2DArrayV2I8Clamp: 2981 Opc = NVPTX::SULD_2D_ARRAY_V2I8_CLAMP; 2982 break; 2983 case NVPTXISD::Suld2DArrayV2I16Clamp: 2984 Opc = NVPTX::SULD_2D_ARRAY_V2I16_CLAMP; 2985 break; 2986 case NVPTXISD::Suld2DArrayV2I32Clamp: 2987 Opc = NVPTX::SULD_2D_ARRAY_V2I32_CLAMP; 2988 break; 2989 case NVPTXISD::Suld2DArrayV2I64Clamp: 2990 Opc = NVPTX::SULD_2D_ARRAY_V2I64_CLAMP; 2991 break; 2992 case NVPTXISD::Suld2DArrayV4I8Clamp: 2993 Opc = NVPTX::SULD_2D_ARRAY_V4I8_CLAMP; 2994 break; 2995 case NVPTXISD::Suld2DArrayV4I16Clamp: 2996 Opc = NVPTX::SULD_2D_ARRAY_V4I16_CLAMP; 2997 break; 2998 case NVPTXISD::Suld2DArrayV4I32Clamp: 2999 Opc = NVPTX::SULD_2D_ARRAY_V4I32_CLAMP; 3000 break; 3001 case NVPTXISD::Suld3DI8Clamp: 3002 Opc = NVPTX::SULD_3D_I8_CLAMP; 3003 break; 3004 case NVPTXISD::Suld3DI16Clamp: 3005 Opc = NVPTX::SULD_3D_I16_CLAMP; 3006 break; 3007 case NVPTXISD::Suld3DI32Clamp: 3008 Opc = NVPTX::SULD_3D_I32_CLAMP; 3009 break; 3010 case NVPTXISD::Suld3DI64Clamp: 3011 Opc = NVPTX::SULD_3D_I64_CLAMP; 3012 break; 3013 case NVPTXISD::Suld3DV2I8Clamp: 3014 Opc = NVPTX::SULD_3D_V2I8_CLAMP; 3015 break; 3016 case NVPTXISD::Suld3DV2I16Clamp: 3017 Opc = NVPTX::SULD_3D_V2I16_CLAMP; 3018 break; 3019 case NVPTXISD::Suld3DV2I32Clamp: 3020 Opc = NVPTX::SULD_3D_V2I32_CLAMP; 3021 break; 3022 case NVPTXISD::Suld3DV2I64Clamp: 3023 Opc = NVPTX::SULD_3D_V2I64_CLAMP; 3024 break; 3025 case NVPTXISD::Suld3DV4I8Clamp: 3026 Opc = NVPTX::SULD_3D_V4I8_CLAMP; 3027 break; 3028 case NVPTXISD::Suld3DV4I16Clamp: 3029 Opc = NVPTX::SULD_3D_V4I16_CLAMP; 3030 break; 3031 case NVPTXISD::Suld3DV4I32Clamp: 3032 Opc = NVPTX::SULD_3D_V4I32_CLAMP; 3033 break; 3034 case NVPTXISD::Suld1DI8Trap: 3035 Opc = NVPTX::SULD_1D_I8_TRAP; 3036 break; 3037 case NVPTXISD::Suld1DI16Trap: 3038 Opc = NVPTX::SULD_1D_I16_TRAP; 3039 break; 3040 case NVPTXISD::Suld1DI32Trap: 3041 Opc = NVPTX::SULD_1D_I32_TRAP; 3042 break; 3043 case NVPTXISD::Suld1DI64Trap: 3044 Opc = NVPTX::SULD_1D_I64_TRAP; 3045 break; 3046 case NVPTXISD::Suld1DV2I8Trap: 3047 Opc = NVPTX::SULD_1D_V2I8_TRAP; 3048 break; 3049 case NVPTXISD::Suld1DV2I16Trap: 3050 Opc = NVPTX::SULD_1D_V2I16_TRAP; 3051 break; 3052 case NVPTXISD::Suld1DV2I32Trap: 3053 Opc = NVPTX::SULD_1D_V2I32_TRAP; 3054 break; 3055 case NVPTXISD::Suld1DV2I64Trap: 3056 Opc = NVPTX::SULD_1D_V2I64_TRAP; 3057 break; 3058 case NVPTXISD::Suld1DV4I8Trap: 3059 Opc = NVPTX::SULD_1D_V4I8_TRAP; 3060 break; 3061 case NVPTXISD::Suld1DV4I16Trap: 3062 Opc = NVPTX::SULD_1D_V4I16_TRAP; 3063 break; 3064 case NVPTXISD::Suld1DV4I32Trap: 3065 Opc = NVPTX::SULD_1D_V4I32_TRAP; 3066 break; 3067 case NVPTXISD::Suld1DArrayI8Trap: 3068 Opc = NVPTX::SULD_1D_ARRAY_I8_TRAP; 3069 break; 3070 case NVPTXISD::Suld1DArrayI16Trap: 3071 Opc = NVPTX::SULD_1D_ARRAY_I16_TRAP; 3072 break; 3073 case NVPTXISD::Suld1DArrayI32Trap: 3074 Opc = NVPTX::SULD_1D_ARRAY_I32_TRAP; 3075 break; 3076 case NVPTXISD::Suld1DArrayI64Trap: 3077 Opc = NVPTX::SULD_1D_ARRAY_I64_TRAP; 3078 break; 3079 case NVPTXISD::Suld1DArrayV2I8Trap: 3080 Opc = NVPTX::SULD_1D_ARRAY_V2I8_TRAP; 3081 break; 3082 case NVPTXISD::Suld1DArrayV2I16Trap: 3083 Opc = NVPTX::SULD_1D_ARRAY_V2I16_TRAP; 3084 break; 3085 case NVPTXISD::Suld1DArrayV2I32Trap: 3086 Opc = NVPTX::SULD_1D_ARRAY_V2I32_TRAP; 3087 break; 3088 case NVPTXISD::Suld1DArrayV2I64Trap: 3089 Opc = NVPTX::SULD_1D_ARRAY_V2I64_TRAP; 3090 break; 3091 case NVPTXISD::Suld1DArrayV4I8Trap: 3092 Opc = NVPTX::SULD_1D_ARRAY_V4I8_TRAP; 3093 break; 3094 case NVPTXISD::Suld1DArrayV4I16Trap: 3095 Opc = NVPTX::SULD_1D_ARRAY_V4I16_TRAP; 3096 break; 3097 case NVPTXISD::Suld1DArrayV4I32Trap: 3098 Opc = NVPTX::SULD_1D_ARRAY_V4I32_TRAP; 3099 break; 3100 case NVPTXISD::Suld2DI8Trap: 3101 Opc = NVPTX::SULD_2D_I8_TRAP; 3102 break; 3103 case NVPTXISD::Suld2DI16Trap: 3104 Opc = NVPTX::SULD_2D_I16_TRAP; 3105 break; 3106 case NVPTXISD::Suld2DI32Trap: 3107 Opc = NVPTX::SULD_2D_I32_TRAP; 3108 break; 3109 case NVPTXISD::Suld2DI64Trap: 3110 Opc = NVPTX::SULD_2D_I64_TRAP; 3111 break; 3112 case NVPTXISD::Suld2DV2I8Trap: 3113 Opc = NVPTX::SULD_2D_V2I8_TRAP; 3114 break; 3115 case NVPTXISD::Suld2DV2I16Trap: 3116 Opc = NVPTX::SULD_2D_V2I16_TRAP; 3117 break; 3118 case NVPTXISD::Suld2DV2I32Trap: 3119 Opc = NVPTX::SULD_2D_V2I32_TRAP; 3120 break; 3121 case NVPTXISD::Suld2DV2I64Trap: 3122 Opc = NVPTX::SULD_2D_V2I64_TRAP; 3123 break; 3124 case NVPTXISD::Suld2DV4I8Trap: 3125 Opc = NVPTX::SULD_2D_V4I8_TRAP; 3126 break; 3127 case NVPTXISD::Suld2DV4I16Trap: 3128 Opc = NVPTX::SULD_2D_V4I16_TRAP; 3129 break; 3130 case NVPTXISD::Suld2DV4I32Trap: 3131 Opc = NVPTX::SULD_2D_V4I32_TRAP; 3132 break; 3133 case NVPTXISD::Suld2DArrayI8Trap: 3134 Opc = NVPTX::SULD_2D_ARRAY_I8_TRAP; 3135 break; 3136 case NVPTXISD::Suld2DArrayI16Trap: 3137 Opc = NVPTX::SULD_2D_ARRAY_I16_TRAP; 3138 break; 3139 case NVPTXISD::Suld2DArrayI32Trap: 3140 Opc = NVPTX::SULD_2D_ARRAY_I32_TRAP; 3141 break; 3142 case NVPTXISD::Suld2DArrayI64Trap: 3143 Opc = NVPTX::SULD_2D_ARRAY_I64_TRAP; 3144 break; 3145 case NVPTXISD::Suld2DArrayV2I8Trap: 3146 Opc = NVPTX::SULD_2D_ARRAY_V2I8_TRAP; 3147 break; 3148 case NVPTXISD::Suld2DArrayV2I16Trap: 3149 Opc = NVPTX::SULD_2D_ARRAY_V2I16_TRAP; 3150 break; 3151 case NVPTXISD::Suld2DArrayV2I32Trap: 3152 Opc = NVPTX::SULD_2D_ARRAY_V2I32_TRAP; 3153 break; 3154 case NVPTXISD::Suld2DArrayV2I64Trap: 3155 Opc = NVPTX::SULD_2D_ARRAY_V2I64_TRAP; 3156 break; 3157 case NVPTXISD::Suld2DArrayV4I8Trap: 3158 Opc = NVPTX::SULD_2D_ARRAY_V4I8_TRAP; 3159 break; 3160 case NVPTXISD::Suld2DArrayV4I16Trap: 3161 Opc = NVPTX::SULD_2D_ARRAY_V4I16_TRAP; 3162 break; 3163 case NVPTXISD::Suld2DArrayV4I32Trap: 3164 Opc = NVPTX::SULD_2D_ARRAY_V4I32_TRAP; 3165 break; 3166 case NVPTXISD::Suld3DI8Trap: 3167 Opc = NVPTX::SULD_3D_I8_TRAP; 3168 break; 3169 case NVPTXISD::Suld3DI16Trap: 3170 Opc = NVPTX::SULD_3D_I16_TRAP; 3171 break; 3172 case NVPTXISD::Suld3DI32Trap: 3173 Opc = NVPTX::SULD_3D_I32_TRAP; 3174 break; 3175 case NVPTXISD::Suld3DI64Trap: 3176 Opc = NVPTX::SULD_3D_I64_TRAP; 3177 break; 3178 case NVPTXISD::Suld3DV2I8Trap: 3179 Opc = NVPTX::SULD_3D_V2I8_TRAP; 3180 break; 3181 case NVPTXISD::Suld3DV2I16Trap: 3182 Opc = NVPTX::SULD_3D_V2I16_TRAP; 3183 break; 3184 case NVPTXISD::Suld3DV2I32Trap: 3185 Opc = NVPTX::SULD_3D_V2I32_TRAP; 3186 break; 3187 case NVPTXISD::Suld3DV2I64Trap: 3188 Opc = NVPTX::SULD_3D_V2I64_TRAP; 3189 break; 3190 case NVPTXISD::Suld3DV4I8Trap: 3191 Opc = NVPTX::SULD_3D_V4I8_TRAP; 3192 break; 3193 case NVPTXISD::Suld3DV4I16Trap: 3194 Opc = NVPTX::SULD_3D_V4I16_TRAP; 3195 break; 3196 case NVPTXISD::Suld3DV4I32Trap: 3197 Opc = NVPTX::SULD_3D_V4I32_TRAP; 3198 break; 3199 case NVPTXISD::Suld1DI8Zero: 3200 Opc = NVPTX::SULD_1D_I8_ZERO; 3201 break; 3202 case NVPTXISD::Suld1DI16Zero: 3203 Opc = NVPTX::SULD_1D_I16_ZERO; 3204 break; 3205 case NVPTXISD::Suld1DI32Zero: 3206 Opc = NVPTX::SULD_1D_I32_ZERO; 3207 break; 3208 case NVPTXISD::Suld1DI64Zero: 3209 Opc = NVPTX::SULD_1D_I64_ZERO; 3210 break; 3211 case NVPTXISD::Suld1DV2I8Zero: 3212 Opc = NVPTX::SULD_1D_V2I8_ZERO; 3213 break; 3214 case NVPTXISD::Suld1DV2I16Zero: 3215 Opc = NVPTX::SULD_1D_V2I16_ZERO; 3216 break; 3217 case NVPTXISD::Suld1DV2I32Zero: 3218 Opc = NVPTX::SULD_1D_V2I32_ZERO; 3219 break; 3220 case NVPTXISD::Suld1DV2I64Zero: 3221 Opc = NVPTX::SULD_1D_V2I64_ZERO; 3222 break; 3223 case NVPTXISD::Suld1DV4I8Zero: 3224 Opc = NVPTX::SULD_1D_V4I8_ZERO; 3225 break; 3226 case NVPTXISD::Suld1DV4I16Zero: 3227 Opc = NVPTX::SULD_1D_V4I16_ZERO; 3228 break; 3229 case NVPTXISD::Suld1DV4I32Zero: 3230 Opc = NVPTX::SULD_1D_V4I32_ZERO; 3231 break; 3232 case NVPTXISD::Suld1DArrayI8Zero: 3233 Opc = NVPTX::SULD_1D_ARRAY_I8_ZERO; 3234 break; 3235 case NVPTXISD::Suld1DArrayI16Zero: 3236 Opc = NVPTX::SULD_1D_ARRAY_I16_ZERO; 3237 break; 3238 case NVPTXISD::Suld1DArrayI32Zero: 3239 Opc = NVPTX::SULD_1D_ARRAY_I32_ZERO; 3240 break; 3241 case NVPTXISD::Suld1DArrayI64Zero: 3242 Opc = NVPTX::SULD_1D_ARRAY_I64_ZERO; 3243 break; 3244 case NVPTXISD::Suld1DArrayV2I8Zero: 3245 Opc = NVPTX::SULD_1D_ARRAY_V2I8_ZERO; 3246 break; 3247 case NVPTXISD::Suld1DArrayV2I16Zero: 3248 Opc = NVPTX::SULD_1D_ARRAY_V2I16_ZERO; 3249 break; 3250 case NVPTXISD::Suld1DArrayV2I32Zero: 3251 Opc = NVPTX::SULD_1D_ARRAY_V2I32_ZERO; 3252 break; 3253 case NVPTXISD::Suld1DArrayV2I64Zero: 3254 Opc = NVPTX::SULD_1D_ARRAY_V2I64_ZERO; 3255 break; 3256 case NVPTXISD::Suld1DArrayV4I8Zero: 3257 Opc = NVPTX::SULD_1D_ARRAY_V4I8_ZERO; 3258 break; 3259 case NVPTXISD::Suld1DArrayV4I16Zero: 3260 Opc = NVPTX::SULD_1D_ARRAY_V4I16_ZERO; 3261 break; 3262 case NVPTXISD::Suld1DArrayV4I32Zero: 3263 Opc = NVPTX::SULD_1D_ARRAY_V4I32_ZERO; 3264 break; 3265 case NVPTXISD::Suld2DI8Zero: 3266 Opc = NVPTX::SULD_2D_I8_ZERO; 3267 break; 3268 case NVPTXISD::Suld2DI16Zero: 3269 Opc = NVPTX::SULD_2D_I16_ZERO; 3270 break; 3271 case NVPTXISD::Suld2DI32Zero: 3272 Opc = NVPTX::SULD_2D_I32_ZERO; 3273 break; 3274 case NVPTXISD::Suld2DI64Zero: 3275 Opc = NVPTX::SULD_2D_I64_ZERO; 3276 break; 3277 case NVPTXISD::Suld2DV2I8Zero: 3278 Opc = NVPTX::SULD_2D_V2I8_ZERO; 3279 break; 3280 case NVPTXISD::Suld2DV2I16Zero: 3281 Opc = NVPTX::SULD_2D_V2I16_ZERO; 3282 break; 3283 case NVPTXISD::Suld2DV2I32Zero: 3284 Opc = NVPTX::SULD_2D_V2I32_ZERO; 3285 break; 3286 case NVPTXISD::Suld2DV2I64Zero: 3287 Opc = NVPTX::SULD_2D_V2I64_ZERO; 3288 break; 3289 case NVPTXISD::Suld2DV4I8Zero: 3290 Opc = NVPTX::SULD_2D_V4I8_ZERO; 3291 break; 3292 case NVPTXISD::Suld2DV4I16Zero: 3293 Opc = NVPTX::SULD_2D_V4I16_ZERO; 3294 break; 3295 case NVPTXISD::Suld2DV4I32Zero: 3296 Opc = NVPTX::SULD_2D_V4I32_ZERO; 3297 break; 3298 case NVPTXISD::Suld2DArrayI8Zero: 3299 Opc = NVPTX::SULD_2D_ARRAY_I8_ZERO; 3300 break; 3301 case NVPTXISD::Suld2DArrayI16Zero: 3302 Opc = NVPTX::SULD_2D_ARRAY_I16_ZERO; 3303 break; 3304 case NVPTXISD::Suld2DArrayI32Zero: 3305 Opc = NVPTX::SULD_2D_ARRAY_I32_ZERO; 3306 break; 3307 case NVPTXISD::Suld2DArrayI64Zero: 3308 Opc = NVPTX::SULD_2D_ARRAY_I64_ZERO; 3309 break; 3310 case NVPTXISD::Suld2DArrayV2I8Zero: 3311 Opc = NVPTX::SULD_2D_ARRAY_V2I8_ZERO; 3312 break; 3313 case NVPTXISD::Suld2DArrayV2I16Zero: 3314 Opc = NVPTX::SULD_2D_ARRAY_V2I16_ZERO; 3315 break; 3316 case NVPTXISD::Suld2DArrayV2I32Zero: 3317 Opc = NVPTX::SULD_2D_ARRAY_V2I32_ZERO; 3318 break; 3319 case NVPTXISD::Suld2DArrayV2I64Zero: 3320 Opc = NVPTX::SULD_2D_ARRAY_V2I64_ZERO; 3321 break; 3322 case NVPTXISD::Suld2DArrayV4I8Zero: 3323 Opc = NVPTX::SULD_2D_ARRAY_V4I8_ZERO; 3324 break; 3325 case NVPTXISD::Suld2DArrayV4I16Zero: 3326 Opc = NVPTX::SULD_2D_ARRAY_V4I16_ZERO; 3327 break; 3328 case NVPTXISD::Suld2DArrayV4I32Zero: 3329 Opc = NVPTX::SULD_2D_ARRAY_V4I32_ZERO; 3330 break; 3331 case NVPTXISD::Suld3DI8Zero: 3332 Opc = NVPTX::SULD_3D_I8_ZERO; 3333 break; 3334 case NVPTXISD::Suld3DI16Zero: 3335 Opc = NVPTX::SULD_3D_I16_ZERO; 3336 break; 3337 case NVPTXISD::Suld3DI32Zero: 3338 Opc = NVPTX::SULD_3D_I32_ZERO; 3339 break; 3340 case NVPTXISD::Suld3DI64Zero: 3341 Opc = NVPTX::SULD_3D_I64_ZERO; 3342 break; 3343 case NVPTXISD::Suld3DV2I8Zero: 3344 Opc = NVPTX::SULD_3D_V2I8_ZERO; 3345 break; 3346 case NVPTXISD::Suld3DV2I16Zero: 3347 Opc = NVPTX::SULD_3D_V2I16_ZERO; 3348 break; 3349 case NVPTXISD::Suld3DV2I32Zero: 3350 Opc = NVPTX::SULD_3D_V2I32_ZERO; 3351 break; 3352 case NVPTXISD::Suld3DV2I64Zero: 3353 Opc = NVPTX::SULD_3D_V2I64_ZERO; 3354 break; 3355 case NVPTXISD::Suld3DV4I8Zero: 3356 Opc = NVPTX::SULD_3D_V4I8_ZERO; 3357 break; 3358 case NVPTXISD::Suld3DV4I16Zero: 3359 Opc = NVPTX::SULD_3D_V4I16_ZERO; 3360 break; 3361 case NVPTXISD::Suld3DV4I32Zero: 3362 Opc = NVPTX::SULD_3D_V4I32_ZERO; 3363 break; 3364 } 3365 3366 // Copy over operands 3367 SmallVector<SDValue, 8> Ops(N->op_begin() + 1, N->op_end()); 3368 Ops.push_back(N->getOperand(0)); // Move chain to the back. 3369 3370 ReplaceNode(N, CurDAG->getMachineNode(Opc, SDLoc(N), N->getVTList(), Ops)); 3371 return true; 3372 } 3373 3374 3375 /// SelectBFE - Look for instruction sequences that can be made more efficient 3376 /// by using the 'bfe' (bit-field extract) PTX instruction 3377 bool NVPTXDAGToDAGISel::tryBFE(SDNode *N) { 3378 SDLoc DL(N); 3379 SDValue LHS = N->getOperand(0); 3380 SDValue RHS = N->getOperand(1); 3381 SDValue Len; 3382 SDValue Start; 3383 SDValue Val; 3384 bool IsSigned = false; 3385 3386 if (N->getOpcode() == ISD::AND) { 3387 // Canonicalize the operands 3388 // We want 'and %val, %mask' 3389 if (isa<ConstantSDNode>(LHS) && !isa<ConstantSDNode>(RHS)) { 3390 std::swap(LHS, RHS); 3391 } 3392 3393 ConstantSDNode *Mask = dyn_cast<ConstantSDNode>(RHS); 3394 if (!Mask) { 3395 // We need a constant mask on the RHS of the AND 3396 return false; 3397 } 3398 3399 // Extract the mask bits 3400 uint64_t MaskVal = Mask->getZExtValue(); 3401 if (!isMask_64(MaskVal)) { 3402 // We *could* handle shifted masks here, but doing so would require an 3403 // 'and' operation to fix up the low-order bits so we would trade 3404 // shr+and for bfe+and, which has the same throughput 3405 return false; 3406 } 3407 3408 // How many bits are in our mask? 3409 uint64_t NumBits = countTrailingOnes(MaskVal); 3410 Len = CurDAG->getTargetConstant(NumBits, DL, MVT::i32); 3411 3412 if (LHS.getOpcode() == ISD::SRL || LHS.getOpcode() == ISD::SRA) { 3413 // We have a 'srl/and' pair, extract the effective start bit and length 3414 Val = LHS.getNode()->getOperand(0); 3415 Start = LHS.getNode()->getOperand(1); 3416 ConstantSDNode *StartConst = dyn_cast<ConstantSDNode>(Start); 3417 if (StartConst) { 3418 uint64_t StartVal = StartConst->getZExtValue(); 3419 // How many "good" bits do we have left? "good" is defined here as bits 3420 // that exist in the original value, not shifted in. 3421 uint64_t GoodBits = Start.getValueSizeInBits() - StartVal; 3422 if (NumBits > GoodBits) { 3423 // Do not handle the case where bits have been shifted in. In theory 3424 // we could handle this, but the cost is likely higher than just 3425 // emitting the srl/and pair. 3426 return false; 3427 } 3428 Start = CurDAG->getTargetConstant(StartVal, DL, MVT::i32); 3429 } else { 3430 // Do not handle the case where the shift amount (can be zero if no srl 3431 // was found) is not constant. We could handle this case, but it would 3432 // require run-time logic that would be more expensive than just 3433 // emitting the srl/and pair. 3434 return false; 3435 } 3436 } else { 3437 // Do not handle the case where the LHS of the and is not a shift. While 3438 // it would be trivial to handle this case, it would just transform 3439 // 'and' -> 'bfe', but 'and' has higher-throughput. 3440 return false; 3441 } 3442 } else if (N->getOpcode() == ISD::SRL || N->getOpcode() == ISD::SRA) { 3443 if (LHS->getOpcode() == ISD::AND) { 3444 ConstantSDNode *ShiftCnst = dyn_cast<ConstantSDNode>(RHS); 3445 if (!ShiftCnst) { 3446 // Shift amount must be constant 3447 return false; 3448 } 3449 3450 uint64_t ShiftAmt = ShiftCnst->getZExtValue(); 3451 3452 SDValue AndLHS = LHS->getOperand(0); 3453 SDValue AndRHS = LHS->getOperand(1); 3454 3455 // Canonicalize the AND to have the mask on the RHS 3456 if (isa<ConstantSDNode>(AndLHS)) { 3457 std::swap(AndLHS, AndRHS); 3458 } 3459 3460 ConstantSDNode *MaskCnst = dyn_cast<ConstantSDNode>(AndRHS); 3461 if (!MaskCnst) { 3462 // Mask must be constant 3463 return false; 3464 } 3465 3466 uint64_t MaskVal = MaskCnst->getZExtValue(); 3467 uint64_t NumZeros; 3468 uint64_t NumBits; 3469 if (isMask_64(MaskVal)) { 3470 NumZeros = 0; 3471 // The number of bits in the result bitfield will be the number of 3472 // trailing ones (the AND) minus the number of bits we shift off 3473 NumBits = countTrailingOnes(MaskVal) - ShiftAmt; 3474 } else if (isShiftedMask_64(MaskVal)) { 3475 NumZeros = countTrailingZeros(MaskVal); 3476 unsigned NumOnes = countTrailingOnes(MaskVal >> NumZeros); 3477 // The number of bits in the result bitfield will be the number of 3478 // trailing zeros plus the number of set bits in the mask minus the 3479 // number of bits we shift off 3480 NumBits = NumZeros + NumOnes - ShiftAmt; 3481 } else { 3482 // This is not a mask we can handle 3483 return false; 3484 } 3485 3486 if (ShiftAmt < NumZeros) { 3487 // Handling this case would require extra logic that would make this 3488 // transformation non-profitable 3489 return false; 3490 } 3491 3492 Val = AndLHS; 3493 Start = CurDAG->getTargetConstant(ShiftAmt, DL, MVT::i32); 3494 Len = CurDAG->getTargetConstant(NumBits, DL, MVT::i32); 3495 } else if (LHS->getOpcode() == ISD::SHL) { 3496 // Here, we have a pattern like: 3497 // 3498 // (sra (shl val, NN), MM) 3499 // or 3500 // (srl (shl val, NN), MM) 3501 // 3502 // If MM >= NN, we can efficiently optimize this with bfe 3503 Val = LHS->getOperand(0); 3504 3505 SDValue ShlRHS = LHS->getOperand(1); 3506 ConstantSDNode *ShlCnst = dyn_cast<ConstantSDNode>(ShlRHS); 3507 if (!ShlCnst) { 3508 // Shift amount must be constant 3509 return false; 3510 } 3511 uint64_t InnerShiftAmt = ShlCnst->getZExtValue(); 3512 3513 SDValue ShrRHS = RHS; 3514 ConstantSDNode *ShrCnst = dyn_cast<ConstantSDNode>(ShrRHS); 3515 if (!ShrCnst) { 3516 // Shift amount must be constant 3517 return false; 3518 } 3519 uint64_t OuterShiftAmt = ShrCnst->getZExtValue(); 3520 3521 // To avoid extra codegen and be profitable, we need Outer >= Inner 3522 if (OuterShiftAmt < InnerShiftAmt) { 3523 return false; 3524 } 3525 3526 // If the outer shift is more than the type size, we have no bitfield to 3527 // extract (since we also check that the inner shift is <= the outer shift 3528 // then this also implies that the inner shift is < the type size) 3529 if (OuterShiftAmt >= Val.getValueSizeInBits()) { 3530 return false; 3531 } 3532 3533 Start = CurDAG->getTargetConstant(OuterShiftAmt - InnerShiftAmt, DL, 3534 MVT::i32); 3535 Len = CurDAG->getTargetConstant(Val.getValueSizeInBits() - OuterShiftAmt, 3536 DL, MVT::i32); 3537 3538 if (N->getOpcode() == ISD::SRA) { 3539 // If we have a arithmetic right shift, we need to use the signed bfe 3540 // variant 3541 IsSigned = true; 3542 } 3543 } else { 3544 // No can do... 3545 return false; 3546 } 3547 } else { 3548 // No can do... 3549 return false; 3550 } 3551 3552 3553 unsigned Opc; 3554 // For the BFE operations we form here from "and" and "srl", always use the 3555 // unsigned variants. 3556 if (Val.getValueType() == MVT::i32) { 3557 if (IsSigned) { 3558 Opc = NVPTX::BFE_S32rii; 3559 } else { 3560 Opc = NVPTX::BFE_U32rii; 3561 } 3562 } else if (Val.getValueType() == MVT::i64) { 3563 if (IsSigned) { 3564 Opc = NVPTX::BFE_S64rii; 3565 } else { 3566 Opc = NVPTX::BFE_U64rii; 3567 } 3568 } else { 3569 // We cannot handle this type 3570 return false; 3571 } 3572 3573 SDValue Ops[] = { 3574 Val, Start, Len 3575 }; 3576 3577 ReplaceNode(N, CurDAG->getMachineNode(Opc, DL, N->getVTList(), Ops)); 3578 return true; 3579 } 3580 3581 // SelectDirectAddr - Match a direct address for DAG. 3582 // A direct address could be a globaladdress or externalsymbol. 3583 bool NVPTXDAGToDAGISel::SelectDirectAddr(SDValue N, SDValue &Address) { 3584 // Return true if TGA or ES. 3585 if (N.getOpcode() == ISD::TargetGlobalAddress || 3586 N.getOpcode() == ISD::TargetExternalSymbol) { 3587 Address = N; 3588 return true; 3589 } 3590 if (N.getOpcode() == NVPTXISD::Wrapper) { 3591 Address = N.getOperand(0); 3592 return true; 3593 } 3594 // addrspacecast(MoveParam(arg_symbol) to addrspace(PARAM)) -> arg_symbol 3595 if (AddrSpaceCastSDNode *CastN = dyn_cast<AddrSpaceCastSDNode>(N)) { 3596 if (CastN->getSrcAddressSpace() == ADDRESS_SPACE_GENERIC && 3597 CastN->getDestAddressSpace() == ADDRESS_SPACE_PARAM && 3598 CastN->getOperand(0).getOpcode() == NVPTXISD::MoveParam) 3599 return SelectDirectAddr(CastN->getOperand(0).getOperand(0), Address); 3600 } 3601 return false; 3602 } 3603 3604 // symbol+offset 3605 bool NVPTXDAGToDAGISel::SelectADDRsi_imp( 3606 SDNode *OpNode, SDValue Addr, SDValue &Base, SDValue &Offset, MVT mvt) { 3607 if (Addr.getOpcode() == ISD::ADD) { 3608 if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1))) { 3609 SDValue base = Addr.getOperand(0); 3610 if (SelectDirectAddr(base, Base)) { 3611 Offset = CurDAG->getTargetConstant(CN->getZExtValue(), SDLoc(OpNode), 3612 mvt); 3613 return true; 3614 } 3615 } 3616 } 3617 return false; 3618 } 3619 3620 // symbol+offset 3621 bool NVPTXDAGToDAGISel::SelectADDRsi(SDNode *OpNode, SDValue Addr, 3622 SDValue &Base, SDValue &Offset) { 3623 return SelectADDRsi_imp(OpNode, Addr, Base, Offset, MVT::i32); 3624 } 3625 3626 // symbol+offset 3627 bool NVPTXDAGToDAGISel::SelectADDRsi64(SDNode *OpNode, SDValue Addr, 3628 SDValue &Base, SDValue &Offset) { 3629 return SelectADDRsi_imp(OpNode, Addr, Base, Offset, MVT::i64); 3630 } 3631 3632 // register+offset 3633 bool NVPTXDAGToDAGISel::SelectADDRri_imp( 3634 SDNode *OpNode, SDValue Addr, SDValue &Base, SDValue &Offset, MVT mvt) { 3635 if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) { 3636 Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), mvt); 3637 Offset = CurDAG->getTargetConstant(0, SDLoc(OpNode), mvt); 3638 return true; 3639 } 3640 if (Addr.getOpcode() == ISD::TargetExternalSymbol || 3641 Addr.getOpcode() == ISD::TargetGlobalAddress) 3642 return false; // direct calls. 3643 3644 if (Addr.getOpcode() == ISD::ADD) { 3645 if (SelectDirectAddr(Addr.getOperand(0), Addr)) { 3646 return false; 3647 } 3648 if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1))) { 3649 if (FrameIndexSDNode *FIN = 3650 dyn_cast<FrameIndexSDNode>(Addr.getOperand(0))) 3651 // Constant offset from frame ref. 3652 Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), mvt); 3653 else 3654 Base = Addr.getOperand(0); 3655 Offset = CurDAG->getTargetConstant(CN->getZExtValue(), SDLoc(OpNode), 3656 mvt); 3657 return true; 3658 } 3659 } 3660 return false; 3661 } 3662 3663 // register+offset 3664 bool NVPTXDAGToDAGISel::SelectADDRri(SDNode *OpNode, SDValue Addr, 3665 SDValue &Base, SDValue &Offset) { 3666 return SelectADDRri_imp(OpNode, Addr, Base, Offset, MVT::i32); 3667 } 3668 3669 // register+offset 3670 bool NVPTXDAGToDAGISel::SelectADDRri64(SDNode *OpNode, SDValue Addr, 3671 SDValue &Base, SDValue &Offset) { 3672 return SelectADDRri_imp(OpNode, Addr, Base, Offset, MVT::i64); 3673 } 3674 3675 bool NVPTXDAGToDAGISel::ChkMemSDNodeAddressSpace(SDNode *N, 3676 unsigned int spN) const { 3677 const Value *Src = nullptr; 3678 if (MemSDNode *mN = dyn_cast<MemSDNode>(N)) { 3679 if (spN == 0 && mN->getMemOperand()->getPseudoValue()) 3680 return true; 3681 Src = mN->getMemOperand()->getValue(); 3682 } 3683 if (!Src) 3684 return false; 3685 if (auto *PT = dyn_cast<PointerType>(Src->getType())) 3686 return (PT->getAddressSpace() == spN); 3687 return false; 3688 } 3689 3690 /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for 3691 /// inline asm expressions. 3692 bool NVPTXDAGToDAGISel::SelectInlineAsmMemoryOperand( 3693 const SDValue &Op, unsigned ConstraintID, std::vector<SDValue> &OutOps) { 3694 SDValue Op0, Op1; 3695 switch (ConstraintID) { 3696 default: 3697 return true; 3698 case InlineAsm::Constraint_m: // memory 3699 if (SelectDirectAddr(Op, Op0)) { 3700 OutOps.push_back(Op0); 3701 OutOps.push_back(CurDAG->getTargetConstant(0, SDLoc(Op), MVT::i32)); 3702 return false; 3703 } 3704 if (SelectADDRri(Op.getNode(), Op, Op0, Op1)) { 3705 OutOps.push_back(Op0); 3706 OutOps.push_back(Op1); 3707 return false; 3708 } 3709 break; 3710 } 3711 return true; 3712 } 3713 3714 /// GetConvertOpcode - Returns the CVT_ instruction opcode that implements a 3715 /// conversion from \p SrcTy to \p DestTy. 3716 unsigned NVPTXDAGToDAGISel::GetConvertOpcode(MVT DestTy, MVT SrcTy, 3717 bool IsSigned) { 3718 switch (SrcTy.SimpleTy) { 3719 default: 3720 llvm_unreachable("Unhandled source type"); 3721 case MVT::i8: 3722 switch (DestTy.SimpleTy) { 3723 default: 3724 llvm_unreachable("Unhandled dest type"); 3725 case MVT::i16: 3726 return IsSigned ? NVPTX::CVT_s16_s8 : NVPTX::CVT_u16_u8; 3727 case MVT::i32: 3728 return IsSigned ? NVPTX::CVT_s32_s8 : NVPTX::CVT_u32_u8; 3729 case MVT::i64: 3730 return IsSigned ? NVPTX::CVT_s64_s8 : NVPTX::CVT_u64_u8; 3731 } 3732 case MVT::i16: 3733 switch (DestTy.SimpleTy) { 3734 default: 3735 llvm_unreachable("Unhandled dest type"); 3736 case MVT::i8: 3737 return IsSigned ? NVPTX::CVT_s8_s16 : NVPTX::CVT_u8_u16; 3738 case MVT::i32: 3739 return IsSigned ? NVPTX::CVT_s32_s16 : NVPTX::CVT_u32_u16; 3740 case MVT::i64: 3741 return IsSigned ? NVPTX::CVT_s64_s16 : NVPTX::CVT_u64_u16; 3742 } 3743 case MVT::i32: 3744 switch (DestTy.SimpleTy) { 3745 default: 3746 llvm_unreachable("Unhandled dest type"); 3747 case MVT::i8: 3748 return IsSigned ? NVPTX::CVT_s8_s32 : NVPTX::CVT_u8_u32; 3749 case MVT::i16: 3750 return IsSigned ? NVPTX::CVT_s16_s32 : NVPTX::CVT_u16_u32; 3751 case MVT::i64: 3752 return IsSigned ? NVPTX::CVT_s64_s32 : NVPTX::CVT_u64_u32; 3753 } 3754 case MVT::i64: 3755 switch (DestTy.SimpleTy) { 3756 default: 3757 llvm_unreachable("Unhandled dest type"); 3758 case MVT::i8: 3759 return IsSigned ? NVPTX::CVT_s8_s64 : NVPTX::CVT_u8_u64; 3760 case MVT::i16: 3761 return IsSigned ? NVPTX::CVT_s16_s64 : NVPTX::CVT_u16_u64; 3762 case MVT::i32: 3763 return IsSigned ? NVPTX::CVT_s32_s64 : NVPTX::CVT_u32_u64; 3764 } 3765 } 3766 } 3767