1 //===- AArch64LegalizerInfo.cpp ----------------------------------*- C++ -*-==// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 /// \file 9 /// This file implements the targeting of the Machinelegalizer class for 10 /// AArch64. 11 /// \todo This should be generated by TableGen. 12 //===----------------------------------------------------------------------===// 13 14 #include "AArch64LegalizerInfo.h" 15 #include "AArch64Subtarget.h" 16 #include "llvm/CodeGen/GlobalISel/LegalizerHelper.h" 17 #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h" 18 #include "llvm/CodeGen/GlobalISel/Utils.h" 19 #include "llvm/CodeGen/MachineInstr.h" 20 #include "llvm/CodeGen/MachineRegisterInfo.h" 21 #include "llvm/CodeGen/TargetOpcodes.h" 22 #include "llvm/CodeGen/ValueTypes.h" 23 #include "llvm/IR/DerivedTypes.h" 24 #include "llvm/IR/Type.h" 25 26 #define DEBUG_TYPE "aarch64-legalinfo" 27 28 using namespace llvm; 29 using namespace LegalizeActions; 30 using namespace LegalizeMutations; 31 using namespace LegalityPredicates; 32 33 AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST) 34 : ST(&ST) { 35 using namespace TargetOpcode; 36 const LLT p0 = LLT::pointer(0, 64); 37 const LLT s1 = LLT::scalar(1); 38 const LLT s8 = LLT::scalar(8); 39 const LLT s16 = LLT::scalar(16); 40 const LLT s32 = LLT::scalar(32); 41 const LLT s64 = LLT::scalar(64); 42 const LLT s128 = LLT::scalar(128); 43 const LLT s256 = LLT::scalar(256); 44 const LLT s512 = LLT::scalar(512); 45 const LLT v16s8 = LLT::vector(16, 8); 46 const LLT v8s8 = LLT::vector(8, 8); 47 const LLT v4s8 = LLT::vector(4, 8); 48 const LLT v8s16 = LLT::vector(8, 16); 49 const LLT v4s16 = LLT::vector(4, 16); 50 const LLT v2s16 = LLT::vector(2, 16); 51 const LLT v2s32 = LLT::vector(2, 32); 52 const LLT v4s32 = LLT::vector(4, 32); 53 const LLT v2s64 = LLT::vector(2, 64); 54 const LLT v2p0 = LLT::vector(2, p0); 55 56 const TargetMachine &TM = ST.getTargetLowering()->getTargetMachine(); 57 58 // FIXME: support subtargets which have neon/fp-armv8 disabled. 59 if (!ST.hasNEON() || !ST.hasFPARMv8()) { 60 computeTables(); 61 return; 62 } 63 64 getActionDefinitionsBuilder({G_IMPLICIT_DEF, G_FREEZE}) 65 .legalFor({p0, s1, s8, s16, s32, s64, v2s32, v4s32, v2s64}) 66 .clampScalar(0, s1, s64) 67 .widenScalarToNextPow2(0, 8) 68 .fewerElementsIf( 69 [=](const LegalityQuery &Query) { 70 return Query.Types[0].isVector() && 71 (Query.Types[0].getElementType() != s64 || 72 Query.Types[0].getNumElements() != 2); 73 }, 74 [=](const LegalityQuery &Query) { 75 LLT EltTy = Query.Types[0].getElementType(); 76 if (EltTy == s64) 77 return std::make_pair(0, LLT::vector(2, 64)); 78 return std::make_pair(0, EltTy); 79 }); 80 81 getActionDefinitionsBuilder(G_PHI) 82 .legalFor({p0, s16, s32, s64, v2s32, v4s32, v2s64}) 83 .clampScalar(0, s16, s64) 84 .widenScalarToNextPow2(0); 85 86 getActionDefinitionsBuilder(G_BSWAP) 87 .legalFor({s32, s64, v4s32, v2s32, v2s64}) 88 .clampScalar(0, s32, s64) 89 .widenScalarToNextPow2(0); 90 91 getActionDefinitionsBuilder({G_ADD, G_SUB, G_MUL, G_AND, G_OR, G_XOR}) 92 .legalFor({s32, s64, v2s32, v4s32, v2s64, v8s16, v16s8}) 93 .clampScalar(0, s32, s64) 94 .widenScalarToNextPow2(0) 95 .clampNumElements(0, v2s32, v4s32) 96 .clampNumElements(0, v2s64, v2s64) 97 .moreElementsToNextPow2(0); 98 99 getActionDefinitionsBuilder(G_SHL) 100 .customIf([=](const LegalityQuery &Query) { 101 const auto &SrcTy = Query.Types[0]; 102 const auto &AmtTy = Query.Types[1]; 103 return !SrcTy.isVector() && SrcTy.getSizeInBits() == 32 && 104 AmtTy.getSizeInBits() == 32; 105 }) 106 .legalFor({{s32, s32}, 107 {s64, s64}, 108 {s32, s64}, 109 {v2s32, v2s32}, 110 {v4s32, v4s32}, 111 {v2s64, v2s64}}) 112 .clampScalar(1, s32, s64) 113 .clampScalar(0, s32, s64) 114 .widenScalarToNextPow2(0) 115 .clampNumElements(0, v2s32, v4s32) 116 .clampNumElements(0, v2s64, v2s64) 117 .moreElementsToNextPow2(0) 118 .minScalarSameAs(1, 0); 119 120 getActionDefinitionsBuilder(G_PTR_ADD) 121 .legalFor({{p0, s64}, {v2p0, v2s64}}) 122 .clampScalar(1, s64, s64); 123 124 getActionDefinitionsBuilder(G_PTRMASK).legalFor({{p0, s64}}); 125 126 getActionDefinitionsBuilder({G_SDIV, G_UDIV}) 127 .legalFor({s32, s64}) 128 .libcallFor({s128}) 129 .clampScalar(0, s32, s64) 130 .widenScalarToNextPow2(0) 131 .scalarize(0); 132 133 getActionDefinitionsBuilder({G_LSHR, G_ASHR}) 134 .customIf([=](const LegalityQuery &Query) { 135 const auto &SrcTy = Query.Types[0]; 136 const auto &AmtTy = Query.Types[1]; 137 return !SrcTy.isVector() && SrcTy.getSizeInBits() == 32 && 138 AmtTy.getSizeInBits() == 32; 139 }) 140 .legalFor({{s32, s32}, 141 {s32, s64}, 142 {s64, s64}, 143 {v2s32, v2s32}, 144 {v4s32, v4s32}, 145 {v2s64, v2s64}}) 146 .clampScalar(1, s32, s64) 147 .clampScalar(0, s32, s64) 148 .minScalarSameAs(1, 0); 149 150 getActionDefinitionsBuilder({G_SREM, G_UREM}) 151 .lowerFor({s1, s8, s16, s32, s64}); 152 153 getActionDefinitionsBuilder({G_SMULO, G_UMULO}) 154 .lowerFor({{s64, s1}}); 155 156 getActionDefinitionsBuilder({G_SMULH, G_UMULH}).legalFor({s32, s64}); 157 158 getActionDefinitionsBuilder({G_UADDE, G_USUBE, G_SADDO, G_SSUBO, G_UADDO}) 159 .legalFor({{s32, s1}, {s64, s1}}) 160 .minScalar(0, s32); 161 162 getActionDefinitionsBuilder({G_FADD, G_FSUB, G_FMUL, G_FDIV, G_FNEG}) 163 .legalFor({s32, s64, v2s64, v4s32, v2s32}); 164 165 getActionDefinitionsBuilder(G_FREM).libcallFor({s32, s64}); 166 167 getActionDefinitionsBuilder({G_FCEIL, G_FABS, G_FSQRT, G_FFLOOR, G_FRINT, 168 G_FMA, G_INTRINSIC_TRUNC, G_INTRINSIC_ROUND, 169 G_FNEARBYINT}) 170 // If we don't have full FP16 support, then scalarize the elements of 171 // vectors containing fp16 types. 172 .fewerElementsIf( 173 [=, &ST](const LegalityQuery &Query) { 174 const auto &Ty = Query.Types[0]; 175 return Ty.isVector() && Ty.getElementType() == s16 && 176 !ST.hasFullFP16(); 177 }, 178 [=](const LegalityQuery &Query) { return std::make_pair(0, s16); }) 179 // If we don't have full FP16 support, then widen s16 to s32 if we 180 // encounter it. 181 .widenScalarIf( 182 [=, &ST](const LegalityQuery &Query) { 183 return Query.Types[0] == s16 && !ST.hasFullFP16(); 184 }, 185 [=](const LegalityQuery &Query) { return std::make_pair(0, s32); }) 186 .legalFor({s16, s32, s64, v2s32, v4s32, v2s64, v2s16, v4s16, v8s16}); 187 188 getActionDefinitionsBuilder( 189 {G_FCOS, G_FSIN, G_FLOG10, G_FLOG, G_FLOG2, G_FEXP, G_FEXP2, G_FPOW}) 190 // We need a call for these, so we always need to scalarize. 191 .scalarize(0) 192 // Regardless of FP16 support, widen 16-bit elements to 32-bits. 193 .minScalar(0, s32) 194 .libcallFor({s32, s64, v2s32, v4s32, v2s64}); 195 196 getActionDefinitionsBuilder(G_INSERT) 197 .unsupportedIf([=](const LegalityQuery &Query) { 198 return Query.Types[0].getSizeInBits() <= Query.Types[1].getSizeInBits(); 199 }) 200 .legalIf([=](const LegalityQuery &Query) { 201 const LLT &Ty0 = Query.Types[0]; 202 const LLT &Ty1 = Query.Types[1]; 203 if (Ty0 != s32 && Ty0 != s64 && Ty0 != p0) 204 return false; 205 return isPowerOf2_32(Ty1.getSizeInBits()) && 206 (Ty1.getSizeInBits() == 1 || Ty1.getSizeInBits() >= 8); 207 }) 208 .clampScalar(0, s32, s64) 209 .widenScalarToNextPow2(0) 210 .maxScalarIf(typeInSet(0, {s32}), 1, s16) 211 .maxScalarIf(typeInSet(0, {s64}), 1, s32) 212 .widenScalarToNextPow2(1); 213 214 getActionDefinitionsBuilder(G_EXTRACT) 215 .unsupportedIf([=](const LegalityQuery &Query) { 216 return Query.Types[0].getSizeInBits() >= Query.Types[1].getSizeInBits(); 217 }) 218 .legalIf([=](const LegalityQuery &Query) { 219 const LLT &Ty0 = Query.Types[0]; 220 const LLT &Ty1 = Query.Types[1]; 221 if (Ty1 != s32 && Ty1 != s64 && Ty1 != s128) 222 return false; 223 if (Ty1 == p0) 224 return true; 225 return isPowerOf2_32(Ty0.getSizeInBits()) && 226 (Ty0.getSizeInBits() == 1 || Ty0.getSizeInBits() >= 8); 227 }) 228 .clampScalar(1, s32, s128) 229 .widenScalarToNextPow2(1) 230 .maxScalarIf(typeInSet(1, {s32}), 0, s16) 231 .maxScalarIf(typeInSet(1, {s64}), 0, s32) 232 .widenScalarToNextPow2(0); 233 234 getActionDefinitionsBuilder({G_SEXTLOAD, G_ZEXTLOAD}) 235 .legalForTypesWithMemDesc({{s32, p0, 8, 8}, 236 {s32, p0, 16, 8}, 237 {s32, p0, 32, 8}, 238 {s64, p0, 8, 2}, 239 {s64, p0, 16, 2}, 240 {s64, p0, 32, 4}, 241 {s64, p0, 64, 8}, 242 {p0, p0, 64, 8}, 243 {v2s32, p0, 64, 8}}) 244 .clampScalar(0, s32, s64) 245 .widenScalarToNextPow2(0) 246 // TODO: We could support sum-of-pow2's but the lowering code doesn't know 247 // how to do that yet. 248 .unsupportedIfMemSizeNotPow2() 249 // Lower anything left over into G_*EXT and G_LOAD 250 .lower(); 251 252 auto IsPtrVecPred = [=](const LegalityQuery &Query) { 253 const LLT &ValTy = Query.Types[0]; 254 if (!ValTy.isVector()) 255 return false; 256 const LLT EltTy = ValTy.getElementType(); 257 return EltTy.isPointer() && EltTy.getAddressSpace() == 0; 258 }; 259 260 getActionDefinitionsBuilder(G_LOAD) 261 .legalForTypesWithMemDesc({{s8, p0, 8, 8}, 262 {s16, p0, 16, 8}, 263 {s32, p0, 32, 8}, 264 {s64, p0, 64, 8}, 265 {p0, p0, 64, 8}, 266 {s128, p0, 128, 8}, 267 {v8s8, p0, 64, 8}, 268 {v16s8, p0, 128, 8}, 269 {v4s16, p0, 64, 8}, 270 {v8s16, p0, 128, 8}, 271 {v2s32, p0, 64, 8}, 272 {v4s32, p0, 128, 8}, 273 {v2s64, p0, 128, 8}}) 274 // These extends are also legal 275 .legalForTypesWithMemDesc({{s32, p0, 8, 8}, 276 {s32, p0, 16, 8}}) 277 .clampScalar(0, s8, s64) 278 .lowerIfMemSizeNotPow2() 279 // Lower any any-extending loads left into G_ANYEXT and G_LOAD 280 .lowerIf([=](const LegalityQuery &Query) { 281 return Query.Types[0].getSizeInBits() != Query.MMODescrs[0].SizeInBits; 282 }) 283 .widenScalarToNextPow2(0) 284 .clampMaxNumElements(0, s32, 2) 285 .clampMaxNumElements(0, s64, 1) 286 .customIf(IsPtrVecPred); 287 288 getActionDefinitionsBuilder(G_STORE) 289 .legalForTypesWithMemDesc({{s8, p0, 8, 8}, 290 {s16, p0, 16, 8}, 291 {s32, p0, 8, 8}, 292 {s32, p0, 16, 8}, 293 {s32, p0, 32, 8}, 294 {s64, p0, 64, 8}, 295 {p0, p0, 64, 8}, 296 {s128, p0, 128, 8}, 297 {v16s8, p0, 128, 8}, 298 {v4s16, p0, 64, 8}, 299 {v8s16, p0, 128, 8}, 300 {v2s32, p0, 64, 8}, 301 {v4s32, p0, 128, 8}, 302 {v2s64, p0, 128, 8}}) 303 .clampScalar(0, s8, s64) 304 .lowerIfMemSizeNotPow2() 305 .lowerIf([=](const LegalityQuery &Query) { 306 return Query.Types[0].isScalar() && 307 Query.Types[0].getSizeInBits() != Query.MMODescrs[0].SizeInBits; 308 }) 309 .clampMaxNumElements(0, s32, 2) 310 .clampMaxNumElements(0, s64, 1) 311 .customIf(IsPtrVecPred); 312 313 // Constants 314 getActionDefinitionsBuilder(G_CONSTANT) 315 .legalFor({p0, s8, s16, s32, s64}) 316 .clampScalar(0, s8, s64) 317 .widenScalarToNextPow2(0); 318 getActionDefinitionsBuilder(G_FCONSTANT) 319 .legalFor({s32, s64}) 320 .clampScalar(0, s32, s64); 321 322 getActionDefinitionsBuilder(G_ICMP) 323 .legalFor({{s32, s32}, 324 {s32, s64}, 325 {s32, p0}, 326 {v4s32, v4s32}, 327 {v2s32, v2s32}, 328 {v2s64, v2s64}, 329 {v2s64, v2p0}, 330 {v4s16, v4s16}, 331 {v8s16, v8s16}, 332 {v8s8, v8s8}, 333 {v16s8, v16s8}}) 334 .clampScalar(1, s32, s64) 335 .clampScalar(0, s32, s32) 336 .minScalarEltSameAsIf( 337 [=](const LegalityQuery &Query) { 338 const LLT &Ty = Query.Types[0]; 339 const LLT &SrcTy = Query.Types[1]; 340 return Ty.isVector() && !SrcTy.getElementType().isPointer() && 341 Ty.getElementType() != SrcTy.getElementType(); 342 }, 343 0, 1) 344 .minScalarOrEltIf( 345 [=](const LegalityQuery &Query) { return Query.Types[1] == v2s16; }, 346 1, s32) 347 .minScalarOrEltIf( 348 [=](const LegalityQuery &Query) { return Query.Types[1] == v2p0; }, 0, 349 s64) 350 .widenScalarOrEltToNextPow2(1); 351 352 getActionDefinitionsBuilder(G_FCMP) 353 .legalFor({{s32, s32}, {s32, s64}}) 354 .clampScalar(0, s32, s32) 355 .clampScalar(1, s32, s64) 356 .widenScalarToNextPow2(1); 357 358 // Extensions 359 auto ExtLegalFunc = [=](const LegalityQuery &Query) { 360 unsigned DstSize = Query.Types[0].getSizeInBits(); 361 362 if (DstSize == 128 && !Query.Types[0].isVector()) 363 return false; // Extending to a scalar s128 needs narrowing. 364 365 // Make sure that we have something that will fit in a register, and 366 // make sure it's a power of 2. 367 if (DstSize < 8 || DstSize > 128 || !isPowerOf2_32(DstSize)) 368 return false; 369 370 const LLT &SrcTy = Query.Types[1]; 371 372 // Special case for s1. 373 if (SrcTy == s1) 374 return true; 375 376 // Make sure we fit in a register otherwise. Don't bother checking that 377 // the source type is below 128 bits. We shouldn't be allowing anything 378 // through which is wider than the destination in the first place. 379 unsigned SrcSize = SrcTy.getSizeInBits(); 380 if (SrcSize < 8 || !isPowerOf2_32(SrcSize)) 381 return false; 382 383 return true; 384 }; 385 getActionDefinitionsBuilder({G_ZEXT, G_SEXT, G_ANYEXT}) 386 .legalIf(ExtLegalFunc) 387 .clampScalar(0, s64, s64); // Just for s128, others are handled above. 388 389 getActionDefinitionsBuilder(G_TRUNC).alwaysLegal(); 390 391 getActionDefinitionsBuilder(G_SEXT_INREG) 392 .legalFor({s32, s64}) 393 .lower(); 394 395 // FP conversions 396 getActionDefinitionsBuilder(G_FPTRUNC).legalFor( 397 {{s16, s32}, {s16, s64}, {s32, s64}, {v4s16, v4s32}, {v2s32, v2s64}}); 398 getActionDefinitionsBuilder(G_FPEXT).legalFor( 399 {{s32, s16}, {s64, s16}, {s64, s32}, {v4s32, v4s16}, {v2s64, v2s32}}); 400 401 // Conversions 402 getActionDefinitionsBuilder({G_FPTOSI, G_FPTOUI}) 403 .legalForCartesianProduct({s32, s64, v2s64, v4s32, v2s32}) 404 .clampScalar(0, s32, s64) 405 .widenScalarToNextPow2(0) 406 .clampScalar(1, s32, s64) 407 .widenScalarToNextPow2(1); 408 409 getActionDefinitionsBuilder({G_SITOFP, G_UITOFP}) 410 .legalForCartesianProduct({s32, s64, v2s64, v4s32, v2s32}) 411 .clampScalar(1, s32, s64) 412 .widenScalarToNextPow2(1) 413 .clampScalar(0, s32, s64) 414 .widenScalarToNextPow2(0); 415 416 // Control-flow 417 getActionDefinitionsBuilder(G_BRCOND).legalFor({s1, s8, s16, s32}); 418 getActionDefinitionsBuilder(G_BRINDIRECT).legalFor({p0}); 419 420 // Select 421 // FIXME: We can probably do a bit better than just scalarizing vector 422 // selects. 423 getActionDefinitionsBuilder(G_SELECT) 424 .legalFor({{s32, s1}, {s64, s1}, {p0, s1}}) 425 .clampScalar(0, s32, s64) 426 .widenScalarToNextPow2(0) 427 .scalarize(0); 428 429 // Pointer-handling 430 getActionDefinitionsBuilder(G_FRAME_INDEX).legalFor({p0}); 431 432 if (TM.getCodeModel() == CodeModel::Small) 433 getActionDefinitionsBuilder(G_GLOBAL_VALUE).custom(); 434 else 435 getActionDefinitionsBuilder(G_GLOBAL_VALUE).legalFor({p0}); 436 437 getActionDefinitionsBuilder(G_PTRTOINT) 438 .legalForCartesianProduct({s1, s8, s16, s32, s64}, {p0}) 439 .maxScalar(0, s64) 440 .widenScalarToNextPow2(0, /*Min*/ 8); 441 442 getActionDefinitionsBuilder(G_INTTOPTR) 443 .unsupportedIf([&](const LegalityQuery &Query) { 444 return Query.Types[0].getSizeInBits() != Query.Types[1].getSizeInBits(); 445 }) 446 .legalFor({{p0, s64}}); 447 448 // Casts for 32 and 64-bit width type are just copies. 449 // Same for 128-bit width type, except they are on the FPR bank. 450 getActionDefinitionsBuilder(G_BITCAST) 451 // FIXME: This is wrong since G_BITCAST is not allowed to change the 452 // number of bits but it's what the previous code described and fixing 453 // it breaks tests. 454 .legalForCartesianProduct({s1, s8, s16, s32, s64, s128, v16s8, v8s8, v4s8, 455 v8s16, v4s16, v2s16, v4s32, v2s32, v2s64, 456 v2p0}); 457 458 getActionDefinitionsBuilder(G_VASTART).legalFor({p0}); 459 460 // va_list must be a pointer, but most sized types are pretty easy to handle 461 // as the destination. 462 getActionDefinitionsBuilder(G_VAARG) 463 .customForCartesianProduct({s8, s16, s32, s64, p0}, {p0}) 464 .clampScalar(0, s8, s64) 465 .widenScalarToNextPow2(0, /*Min*/ 8); 466 467 if (ST.hasLSE()) { 468 getActionDefinitionsBuilder(G_ATOMIC_CMPXCHG_WITH_SUCCESS) 469 .lowerIf(all( 470 typeInSet(0, {s8, s16, s32, s64}), typeIs(1, s1), typeIs(2, p0), 471 atomicOrderingAtLeastOrStrongerThan(0, AtomicOrdering::Monotonic))); 472 473 getActionDefinitionsBuilder( 474 {G_ATOMICRMW_XCHG, G_ATOMICRMW_ADD, G_ATOMICRMW_SUB, G_ATOMICRMW_AND, 475 G_ATOMICRMW_OR, G_ATOMICRMW_XOR, G_ATOMICRMW_MIN, G_ATOMICRMW_MAX, 476 G_ATOMICRMW_UMIN, G_ATOMICRMW_UMAX, G_ATOMIC_CMPXCHG}) 477 .legalIf(all( 478 typeInSet(0, {s8, s16, s32, s64}), typeIs(1, p0), 479 atomicOrderingAtLeastOrStrongerThan(0, AtomicOrdering::Monotonic))); 480 } 481 482 getActionDefinitionsBuilder(G_BLOCK_ADDR).legalFor({p0}); 483 484 // Merge/Unmerge 485 for (unsigned Op : {G_MERGE_VALUES, G_UNMERGE_VALUES}) { 486 unsigned BigTyIdx = Op == G_MERGE_VALUES ? 0 : 1; 487 unsigned LitTyIdx = Op == G_MERGE_VALUES ? 1 : 0; 488 489 auto notValidElt = [](const LegalityQuery &Query, unsigned TypeIdx) { 490 const LLT &Ty = Query.Types[TypeIdx]; 491 if (Ty.isVector()) { 492 const LLT &EltTy = Ty.getElementType(); 493 if (EltTy.getSizeInBits() < 8 || EltTy.getSizeInBits() > 64) 494 return true; 495 if (!isPowerOf2_32(EltTy.getSizeInBits())) 496 return true; 497 } 498 return false; 499 }; 500 501 // FIXME: This rule is horrible, but specifies the same as what we had 502 // before with the particularly strange definitions removed (e.g. 503 // s8 = G_MERGE_VALUES s32, s32). 504 // Part of the complexity comes from these ops being extremely flexible. For 505 // example, you can build/decompose vectors with it, concatenate vectors, 506 // etc. and in addition to this you can also bitcast with it at the same 507 // time. We've been considering breaking it up into multiple ops to make it 508 // more manageable throughout the backend. 509 getActionDefinitionsBuilder(Op) 510 // Break up vectors with weird elements into scalars 511 .fewerElementsIf( 512 [=](const LegalityQuery &Query) { return notValidElt(Query, 0); }, 513 scalarize(0)) 514 .fewerElementsIf( 515 [=](const LegalityQuery &Query) { return notValidElt(Query, 1); }, 516 scalarize(1)) 517 // Clamp the big scalar to s8-s512 and make it either a power of 2, 192, 518 // or 384. 519 .clampScalar(BigTyIdx, s8, s512) 520 .widenScalarIf( 521 [=](const LegalityQuery &Query) { 522 const LLT &Ty = Query.Types[BigTyIdx]; 523 return !isPowerOf2_32(Ty.getSizeInBits()) && 524 Ty.getSizeInBits() % 64 != 0; 525 }, 526 [=](const LegalityQuery &Query) { 527 // Pick the next power of 2, or a multiple of 64 over 128. 528 // Whichever is smaller. 529 const LLT &Ty = Query.Types[BigTyIdx]; 530 unsigned NewSizeInBits = 1 531 << Log2_32_Ceil(Ty.getSizeInBits() + 1); 532 if (NewSizeInBits >= 256) { 533 unsigned RoundedTo = alignTo<64>(Ty.getSizeInBits() + 1); 534 if (RoundedTo < NewSizeInBits) 535 NewSizeInBits = RoundedTo; 536 } 537 return std::make_pair(BigTyIdx, LLT::scalar(NewSizeInBits)); 538 }) 539 // Clamp the little scalar to s8-s256 and make it a power of 2. It's not 540 // worth considering the multiples of 64 since 2*192 and 2*384 are not 541 // valid. 542 .clampScalar(LitTyIdx, s8, s256) 543 .widenScalarToNextPow2(LitTyIdx, /*Min*/ 8) 544 // So at this point, we have s8, s16, s32, s64, s128, s192, s256, s384, 545 // s512, <X x s8>, <X x s16>, <X x s32>, or <X x s64>. 546 // At this point it's simple enough to accept the legal types. 547 .legalIf([=](const LegalityQuery &Query) { 548 const LLT &BigTy = Query.Types[BigTyIdx]; 549 const LLT &LitTy = Query.Types[LitTyIdx]; 550 if (BigTy.isVector() && BigTy.getSizeInBits() < 32) 551 return false; 552 if (LitTy.isVector() && LitTy.getSizeInBits() < 32) 553 return false; 554 return BigTy.getSizeInBits() % LitTy.getSizeInBits() == 0; 555 }) 556 // Any vectors left are the wrong size. Scalarize them. 557 .scalarize(0) 558 .scalarize(1); 559 } 560 561 getActionDefinitionsBuilder(G_EXTRACT_VECTOR_ELT) 562 .unsupportedIf([=](const LegalityQuery &Query) { 563 const LLT &EltTy = Query.Types[1].getElementType(); 564 return Query.Types[0] != EltTy; 565 }) 566 .minScalar(2, s64) 567 .legalIf([=](const LegalityQuery &Query) { 568 const LLT &VecTy = Query.Types[1]; 569 return VecTy == v2s16 || VecTy == v4s16 || VecTy == v8s16 || 570 VecTy == v4s32 || VecTy == v2s64 || VecTy == v2s32; 571 }); 572 573 getActionDefinitionsBuilder(G_INSERT_VECTOR_ELT) 574 .legalIf([=](const LegalityQuery &Query) { 575 const LLT &VecTy = Query.Types[0]; 576 // TODO: Support s8 and s16 577 return VecTy == v2s32 || VecTy == v4s32 || VecTy == v2s64; 578 }); 579 580 getActionDefinitionsBuilder(G_BUILD_VECTOR) 581 .legalFor({{v4s16, s16}, 582 {v8s16, s16}, 583 {v2s32, s32}, 584 {v4s32, s32}, 585 {v2p0, p0}, 586 {v2s64, s64}}) 587 .clampNumElements(0, v4s32, v4s32) 588 .clampNumElements(0, v2s64, v2s64) 589 590 // Deal with larger scalar types, which will be implicitly truncated. 591 .legalIf([=](const LegalityQuery &Query) { 592 return Query.Types[0].getScalarSizeInBits() < 593 Query.Types[1].getSizeInBits(); 594 }) 595 .minScalarSameAs(1, 0); 596 597 getActionDefinitionsBuilder(G_CTLZ).legalForCartesianProduct( 598 {s32, s64, v8s8, v16s8, v4s16, v8s16, v2s32, v4s32}) 599 .scalarize(1); 600 601 getActionDefinitionsBuilder(G_SHUFFLE_VECTOR) 602 .legalIf([=](const LegalityQuery &Query) { 603 const LLT &DstTy = Query.Types[0]; 604 const LLT &SrcTy = Query.Types[1]; 605 // For now just support the TBL2 variant which needs the source vectors 606 // to be the same size as the dest. 607 if (DstTy != SrcTy) 608 return false; 609 for (auto &Ty : {v2s32, v4s32, v2s64}) { 610 if (DstTy == Ty) 611 return true; 612 } 613 return false; 614 }) 615 // G_SHUFFLE_VECTOR can have scalar sources (from 1 x s vectors), we 616 // just want those lowered into G_BUILD_VECTOR 617 .lowerIf([=](const LegalityQuery &Query) { 618 return !Query.Types[1].isVector(); 619 }) 620 .clampNumElements(0, v4s32, v4s32) 621 .clampNumElements(0, v2s64, v2s64); 622 623 getActionDefinitionsBuilder(G_CONCAT_VECTORS) 624 .legalFor({{v4s32, v2s32}, {v8s16, v4s16}}); 625 626 getActionDefinitionsBuilder(G_JUMP_TABLE) 627 .legalFor({{p0}, {s64}}); 628 629 getActionDefinitionsBuilder(G_BRJT).legalIf([=](const LegalityQuery &Query) { 630 return Query.Types[0] == p0 && Query.Types[1] == s64; 631 }); 632 633 getActionDefinitionsBuilder(G_DYN_STACKALLOC).lower(); 634 635 computeTables(); 636 verify(*ST.getInstrInfo()); 637 } 638 639 bool AArch64LegalizerInfo::legalizeCustom(LegalizerHelper &Helper, 640 MachineInstr &MI) const { 641 MachineIRBuilder &MIRBuilder = Helper.MIRBuilder; 642 MachineRegisterInfo &MRI = *MIRBuilder.getMRI(); 643 GISelChangeObserver &Observer = Helper.Observer; 644 switch (MI.getOpcode()) { 645 default: 646 // No idea what to do. 647 return false; 648 case TargetOpcode::G_VAARG: 649 return legalizeVaArg(MI, MRI, MIRBuilder); 650 case TargetOpcode::G_LOAD: 651 case TargetOpcode::G_STORE: 652 return legalizeLoadStore(MI, MRI, MIRBuilder, Observer); 653 case TargetOpcode::G_SHL: 654 case TargetOpcode::G_ASHR: 655 case TargetOpcode::G_LSHR: 656 return legalizeShlAshrLshr(MI, MRI, MIRBuilder, Observer); 657 case TargetOpcode::G_GLOBAL_VALUE: 658 return legalizeSmallCMGlobalValue(MI, MRI, MIRBuilder, Observer); 659 } 660 661 llvm_unreachable("expected switch to return"); 662 } 663 664 bool AArch64LegalizerInfo::legalizeSmallCMGlobalValue(MachineInstr &MI, 665 MachineRegisterInfo &MRI, 666 MachineIRBuilder &MIRBuilder, 667 GISelChangeObserver &Observer) const { 668 assert(MI.getOpcode() == TargetOpcode::G_GLOBAL_VALUE); 669 // We do this custom legalization to convert G_GLOBAL_VALUE into target ADRP + 670 // G_ADD_LOW instructions. 671 // By splitting this here, we can optimize accesses in the small code model by 672 // folding in the G_ADD_LOW into the load/store offset. 673 auto GV = MI.getOperand(1).getGlobal(); 674 if (GV->isThreadLocal()) 675 return true; // Don't want to modify TLS vars. 676 677 auto &TM = ST->getTargetLowering()->getTargetMachine(); 678 unsigned OpFlags = ST->ClassifyGlobalReference(GV, TM); 679 680 if (OpFlags & AArch64II::MO_GOT) 681 return true; 682 683 Register DstReg = MI.getOperand(0).getReg(); 684 auto ADRP = MIRBuilder.buildInstr(AArch64::ADRP, {LLT::pointer(0, 64)}, {}) 685 .addGlobalAddress(GV, 0, OpFlags | AArch64II::MO_PAGE); 686 // Set the regclass on the dest reg too. 687 MRI.setRegClass(ADRP.getReg(0), &AArch64::GPR64RegClass); 688 689 MIRBuilder.buildInstr(AArch64::G_ADD_LOW, {DstReg}, {ADRP}) 690 .addGlobalAddress(GV, 0, 691 OpFlags | AArch64II::MO_PAGEOFF | AArch64II::MO_NC); 692 MI.eraseFromParent(); 693 return true; 694 } 695 696 bool AArch64LegalizerInfo::legalizeIntrinsic( 697 LegalizerHelper &Helper, MachineInstr &MI) const { 698 MachineIRBuilder &MIRBuilder = Helper.MIRBuilder; 699 switch (MI.getIntrinsicID()) { 700 case Intrinsic::memcpy: 701 case Intrinsic::memset: 702 case Intrinsic::memmove: 703 if (createMemLibcall(MIRBuilder, *MIRBuilder.getMRI(), MI) == 704 LegalizerHelper::UnableToLegalize) 705 return false; 706 MI.eraseFromParent(); 707 return true; 708 default: 709 break; 710 } 711 return true; 712 } 713 714 bool AArch64LegalizerInfo::legalizeShlAshrLshr( 715 MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &MIRBuilder, 716 GISelChangeObserver &Observer) const { 717 assert(MI.getOpcode() == TargetOpcode::G_ASHR || 718 MI.getOpcode() == TargetOpcode::G_LSHR || 719 MI.getOpcode() == TargetOpcode::G_SHL); 720 // If the shift amount is a G_CONSTANT, promote it to a 64 bit type so the 721 // imported patterns can select it later. Either way, it will be legal. 722 Register AmtReg = MI.getOperand(2).getReg(); 723 auto VRegAndVal = getConstantVRegValWithLookThrough(AmtReg, MRI); 724 if (!VRegAndVal) 725 return true; 726 // Check the shift amount is in range for an immediate form. 727 int64_t Amount = VRegAndVal->Value; 728 if (Amount > 31) 729 return true; // This will have to remain a register variant. 730 auto ExtCst = MIRBuilder.buildConstant(LLT::scalar(64), Amount); 731 MI.getOperand(2).setReg(ExtCst.getReg(0)); 732 return true; 733 } 734 735 bool AArch64LegalizerInfo::legalizeLoadStore( 736 MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &MIRBuilder, 737 GISelChangeObserver &Observer) const { 738 assert(MI.getOpcode() == TargetOpcode::G_STORE || 739 MI.getOpcode() == TargetOpcode::G_LOAD); 740 // Here we just try to handle vector loads/stores where our value type might 741 // have pointer elements, which the SelectionDAG importer can't handle. To 742 // allow the existing patterns for s64 to fire for p0, we just try to bitcast 743 // the value to use s64 types. 744 745 // Custom legalization requires the instruction, if not deleted, must be fully 746 // legalized. In order to allow further legalization of the inst, we create 747 // a new instruction and erase the existing one. 748 749 Register ValReg = MI.getOperand(0).getReg(); 750 const LLT ValTy = MRI.getType(ValReg); 751 752 if (!ValTy.isVector() || !ValTy.getElementType().isPointer() || 753 ValTy.getElementType().getAddressSpace() != 0) { 754 LLVM_DEBUG(dbgs() << "Tried to do custom legalization on wrong load/store"); 755 return false; 756 } 757 758 unsigned PtrSize = ValTy.getElementType().getSizeInBits(); 759 const LLT NewTy = LLT::vector(ValTy.getNumElements(), PtrSize); 760 auto &MMO = **MI.memoperands_begin(); 761 if (MI.getOpcode() == TargetOpcode::G_STORE) { 762 auto Bitcast = MIRBuilder.buildBitcast(NewTy, ValReg); 763 MIRBuilder.buildStore(Bitcast.getReg(0), MI.getOperand(1), MMO); 764 } else { 765 auto NewLoad = MIRBuilder.buildLoad(NewTy, MI.getOperand(1), MMO); 766 MIRBuilder.buildBitcast(ValReg, NewLoad); 767 } 768 MI.eraseFromParent(); 769 return true; 770 } 771 772 bool AArch64LegalizerInfo::legalizeVaArg(MachineInstr &MI, 773 MachineRegisterInfo &MRI, 774 MachineIRBuilder &MIRBuilder) const { 775 MachineFunction &MF = MIRBuilder.getMF(); 776 Align Alignment(MI.getOperand(2).getImm()); 777 Register Dst = MI.getOperand(0).getReg(); 778 Register ListPtr = MI.getOperand(1).getReg(); 779 780 LLT PtrTy = MRI.getType(ListPtr); 781 LLT IntPtrTy = LLT::scalar(PtrTy.getSizeInBits()); 782 783 const unsigned PtrSize = PtrTy.getSizeInBits() / 8; 784 const Align PtrAlign = Align(PtrSize); 785 auto List = MIRBuilder.buildLoad( 786 PtrTy, ListPtr, 787 *MF.getMachineMemOperand(MachinePointerInfo(), MachineMemOperand::MOLoad, 788 PtrSize, PtrAlign)); 789 790 MachineInstrBuilder DstPtr; 791 if (Alignment > PtrAlign) { 792 // Realign the list to the actual required alignment. 793 auto AlignMinus1 = 794 MIRBuilder.buildConstant(IntPtrTy, Alignment.value() - 1); 795 auto ListTmp = MIRBuilder.buildPtrAdd(PtrTy, List, AlignMinus1.getReg(0)); 796 DstPtr = MIRBuilder.buildMaskLowPtrBits(PtrTy, ListTmp, Log2(Alignment)); 797 } else 798 DstPtr = List; 799 800 uint64_t ValSize = MRI.getType(Dst).getSizeInBits() / 8; 801 MIRBuilder.buildLoad( 802 Dst, DstPtr, 803 *MF.getMachineMemOperand(MachinePointerInfo(), MachineMemOperand::MOLoad, 804 ValSize, std::max(Alignment, PtrAlign))); 805 806 auto Size = MIRBuilder.buildConstant(IntPtrTy, alignTo(ValSize, PtrAlign)); 807 808 auto NewList = MIRBuilder.buildPtrAdd(PtrTy, DstPtr, Size.getReg(0)); 809 810 MIRBuilder.buildStore(NewList, ListPtr, 811 *MF.getMachineMemOperand(MachinePointerInfo(), 812 MachineMemOperand::MOStore, 813 PtrSize, PtrAlign)); 814 815 MI.eraseFromParent(); 816 return true; 817 } 818