1 //===- AArch64LegalizerInfo.cpp ----------------------------------*- C++ -*-==// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 /// \file 9 /// This file implements the targeting of the Machinelegalizer class for 10 /// AArch64. 11 /// \todo This should be generated by TableGen. 12 //===----------------------------------------------------------------------===// 13 14 #include "AArch64LegalizerInfo.h" 15 #include "AArch64Subtarget.h" 16 #include "llvm/CodeGen/GlobalISel/LegalizerHelper.h" 17 #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h" 18 #include "llvm/CodeGen/GlobalISel/Utils.h" 19 #include "llvm/CodeGen/MachineInstr.h" 20 #include "llvm/CodeGen/MachineRegisterInfo.h" 21 #include "llvm/CodeGen/TargetOpcodes.h" 22 #include "llvm/CodeGen/ValueTypes.h" 23 #include "llvm/IR/DerivedTypes.h" 24 #include "llvm/IR/Type.h" 25 26 #define DEBUG_TYPE "aarch64-legalinfo" 27 28 using namespace llvm; 29 using namespace LegalizeActions; 30 using namespace LegalizeMutations; 31 using namespace LegalityPredicates; 32 33 AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST) 34 : ST(&ST) { 35 using namespace TargetOpcode; 36 const LLT p0 = LLT::pointer(0, 64); 37 const LLT s1 = LLT::scalar(1); 38 const LLT s8 = LLT::scalar(8); 39 const LLT s16 = LLT::scalar(16); 40 const LLT s32 = LLT::scalar(32); 41 const LLT s64 = LLT::scalar(64); 42 const LLT s128 = LLT::scalar(128); 43 const LLT s256 = LLT::scalar(256); 44 const LLT s512 = LLT::scalar(512); 45 const LLT v16s8 = LLT::vector(16, 8); 46 const LLT v8s8 = LLT::vector(8, 8); 47 const LLT v4s8 = LLT::vector(4, 8); 48 const LLT v8s16 = LLT::vector(8, 16); 49 const LLT v4s16 = LLT::vector(4, 16); 50 const LLT v2s16 = LLT::vector(2, 16); 51 const LLT v2s32 = LLT::vector(2, 32); 52 const LLT v4s32 = LLT::vector(4, 32); 53 const LLT v2s64 = LLT::vector(2, 64); 54 const LLT v2p0 = LLT::vector(2, p0); 55 56 const TargetMachine &TM = ST.getTargetLowering()->getTargetMachine(); 57 58 // FIXME: support subtargets which have neon/fp-armv8 disabled. 59 if (!ST.hasNEON() || !ST.hasFPARMv8()) { 60 computeTables(); 61 return; 62 } 63 64 getActionDefinitionsBuilder({G_IMPLICIT_DEF, G_FREEZE}) 65 .legalFor({p0, s1, s8, s16, s32, s64, v2s32, v4s32, v2s64}) 66 .clampScalar(0, s1, s64) 67 .widenScalarToNextPow2(0, 8) 68 .fewerElementsIf( 69 [=](const LegalityQuery &Query) { 70 return Query.Types[0].isVector() && 71 (Query.Types[0].getElementType() != s64 || 72 Query.Types[0].getNumElements() != 2); 73 }, 74 [=](const LegalityQuery &Query) { 75 LLT EltTy = Query.Types[0].getElementType(); 76 if (EltTy == s64) 77 return std::make_pair(0, LLT::vector(2, 64)); 78 return std::make_pair(0, EltTy); 79 }); 80 81 getActionDefinitionsBuilder(G_PHI) 82 .legalFor({p0, s16, s32, s64, v2s32, v4s32, v2s64}) 83 .clampScalar(0, s16, s64) 84 .widenScalarToNextPow2(0); 85 86 getActionDefinitionsBuilder(G_BSWAP) 87 .legalFor({s32, s64, v4s32, v2s32, v2s64}) 88 .clampScalar(0, s32, s64) 89 .widenScalarToNextPow2(0); 90 91 getActionDefinitionsBuilder({G_ADD, G_SUB, G_MUL, G_AND, G_OR, G_XOR}) 92 .legalFor({s32, s64, v2s32, v4s32, v2s64, v8s16, v16s8}) 93 .clampScalar(0, s32, s64) 94 .widenScalarToNextPow2(0) 95 .clampNumElements(0, v2s32, v4s32) 96 .clampNumElements(0, v2s64, v2s64) 97 .moreElementsToNextPow2(0); 98 99 getActionDefinitionsBuilder(G_SHL) 100 .legalFor({{s32, s32}, {s64, s64}, 101 {v2s32, v2s32}, {v4s32, v4s32}, {v2s64, v2s64}}) 102 .clampScalar(1, s32, s64) 103 .clampScalar(0, s32, s64) 104 .widenScalarToNextPow2(0) 105 .clampNumElements(0, v2s32, v4s32) 106 .clampNumElements(0, v2s64, v2s64) 107 .moreElementsToNextPow2(0) 108 .minScalarSameAs(1, 0); 109 110 getActionDefinitionsBuilder(G_PTR_ADD) 111 .legalFor({{p0, s64}, {v2p0, v2s64}}) 112 .clampScalar(1, s64, s64); 113 114 getActionDefinitionsBuilder(G_PTRMASK).legalFor({{p0, s64}}); 115 116 getActionDefinitionsBuilder({G_SDIV, G_UDIV}) 117 .legalFor({s32, s64}) 118 .libcallFor({s128}) 119 .clampScalar(0, s32, s64) 120 .widenScalarToNextPow2(0) 121 .scalarize(0); 122 123 getActionDefinitionsBuilder({G_LSHR, G_ASHR}) 124 .customIf([=](const LegalityQuery &Query) { 125 const auto &SrcTy = Query.Types[0]; 126 const auto &AmtTy = Query.Types[1]; 127 return !SrcTy.isVector() && SrcTy.getSizeInBits() == 32 && 128 AmtTy.getSizeInBits() == 32; 129 }) 130 .legalFor({{s32, s32}, 131 {s32, s64}, 132 {s64, s64}, 133 {v2s32, v2s32}, 134 {v4s32, v4s32}, 135 {v2s64, v2s64}}) 136 .clampScalar(1, s32, s64) 137 .clampScalar(0, s32, s64) 138 .minScalarSameAs(1, 0); 139 140 getActionDefinitionsBuilder({G_SREM, G_UREM}) 141 .lowerFor({s1, s8, s16, s32, s64}); 142 143 getActionDefinitionsBuilder({G_SMULO, G_UMULO}) 144 .lowerFor({{s64, s1}}); 145 146 getActionDefinitionsBuilder({G_SMULH, G_UMULH}).legalFor({s32, s64}); 147 148 getActionDefinitionsBuilder({G_UADDE, G_USUBE, G_SADDO, G_SSUBO, G_UADDO}) 149 .legalFor({{s32, s1}, {s64, s1}}) 150 .minScalar(0, s32); 151 152 getActionDefinitionsBuilder({G_FADD, G_FSUB, G_FMUL, G_FDIV, G_FNEG}) 153 .legalFor({s32, s64, v2s64, v4s32, v2s32}); 154 155 getActionDefinitionsBuilder(G_FREM).libcallFor({s32, s64}); 156 157 getActionDefinitionsBuilder({G_FCEIL, G_FABS, G_FSQRT, G_FFLOOR, G_FRINT, 158 G_FMA, G_INTRINSIC_TRUNC, G_INTRINSIC_ROUND, 159 G_FNEARBYINT}) 160 // If we don't have full FP16 support, then scalarize the elements of 161 // vectors containing fp16 types. 162 .fewerElementsIf( 163 [=, &ST](const LegalityQuery &Query) { 164 const auto &Ty = Query.Types[0]; 165 return Ty.isVector() && Ty.getElementType() == s16 && 166 !ST.hasFullFP16(); 167 }, 168 [=](const LegalityQuery &Query) { return std::make_pair(0, s16); }) 169 // If we don't have full FP16 support, then widen s16 to s32 if we 170 // encounter it. 171 .widenScalarIf( 172 [=, &ST](const LegalityQuery &Query) { 173 return Query.Types[0] == s16 && !ST.hasFullFP16(); 174 }, 175 [=](const LegalityQuery &Query) { return std::make_pair(0, s32); }) 176 .legalFor({s16, s32, s64, v2s32, v4s32, v2s64, v2s16, v4s16, v8s16}); 177 178 getActionDefinitionsBuilder( 179 {G_FCOS, G_FSIN, G_FLOG10, G_FLOG, G_FLOG2, G_FEXP, G_FEXP2, G_FPOW}) 180 // We need a call for these, so we always need to scalarize. 181 .scalarize(0) 182 // Regardless of FP16 support, widen 16-bit elements to 32-bits. 183 .minScalar(0, s32) 184 .libcallFor({s32, s64, v2s32, v4s32, v2s64}); 185 186 getActionDefinitionsBuilder(G_INSERT) 187 .unsupportedIf([=](const LegalityQuery &Query) { 188 return Query.Types[0].getSizeInBits() <= Query.Types[1].getSizeInBits(); 189 }) 190 .legalIf([=](const LegalityQuery &Query) { 191 const LLT &Ty0 = Query.Types[0]; 192 const LLT &Ty1 = Query.Types[1]; 193 if (Ty0 != s32 && Ty0 != s64 && Ty0 != p0) 194 return false; 195 return isPowerOf2_32(Ty1.getSizeInBits()) && 196 (Ty1.getSizeInBits() == 1 || Ty1.getSizeInBits() >= 8); 197 }) 198 .clampScalar(0, s32, s64) 199 .widenScalarToNextPow2(0) 200 .maxScalarIf(typeInSet(0, {s32}), 1, s16) 201 .maxScalarIf(typeInSet(0, {s64}), 1, s32) 202 .widenScalarToNextPow2(1); 203 204 getActionDefinitionsBuilder(G_EXTRACT) 205 .unsupportedIf([=](const LegalityQuery &Query) { 206 return Query.Types[0].getSizeInBits() >= Query.Types[1].getSizeInBits(); 207 }) 208 .legalIf([=](const LegalityQuery &Query) { 209 const LLT &Ty0 = Query.Types[0]; 210 const LLT &Ty1 = Query.Types[1]; 211 if (Ty1 != s32 && Ty1 != s64 && Ty1 != s128) 212 return false; 213 if (Ty1 == p0) 214 return true; 215 return isPowerOf2_32(Ty0.getSizeInBits()) && 216 (Ty0.getSizeInBits() == 1 || Ty0.getSizeInBits() >= 8); 217 }) 218 .clampScalar(1, s32, s128) 219 .widenScalarToNextPow2(1) 220 .maxScalarIf(typeInSet(1, {s32}), 0, s16) 221 .maxScalarIf(typeInSet(1, {s64}), 0, s32) 222 .widenScalarToNextPow2(0); 223 224 getActionDefinitionsBuilder({G_SEXTLOAD, G_ZEXTLOAD}) 225 .legalForTypesWithMemDesc({{s32, p0, 8, 8}, 226 {s32, p0, 16, 8}, 227 {s32, p0, 32, 8}, 228 {s64, p0, 8, 2}, 229 {s64, p0, 16, 2}, 230 {s64, p0, 32, 4}, 231 {s64, p0, 64, 8}, 232 {p0, p0, 64, 8}, 233 {v2s32, p0, 64, 8}}) 234 .clampScalar(0, s32, s64) 235 .widenScalarToNextPow2(0) 236 // TODO: We could support sum-of-pow2's but the lowering code doesn't know 237 // how to do that yet. 238 .unsupportedIfMemSizeNotPow2() 239 // Lower anything left over into G_*EXT and G_LOAD 240 .lower(); 241 242 auto IsPtrVecPred = [=](const LegalityQuery &Query) { 243 const LLT &ValTy = Query.Types[0]; 244 if (!ValTy.isVector()) 245 return false; 246 const LLT EltTy = ValTy.getElementType(); 247 return EltTy.isPointer() && EltTy.getAddressSpace() == 0; 248 }; 249 250 getActionDefinitionsBuilder(G_LOAD) 251 .legalForTypesWithMemDesc({{s8, p0, 8, 8}, 252 {s16, p0, 16, 8}, 253 {s32, p0, 32, 8}, 254 {s64, p0, 64, 8}, 255 {p0, p0, 64, 8}, 256 {s128, p0, 128, 8}, 257 {v8s8, p0, 64, 8}, 258 {v16s8, p0, 128, 8}, 259 {v4s16, p0, 64, 8}, 260 {v8s16, p0, 128, 8}, 261 {v2s32, p0, 64, 8}, 262 {v4s32, p0, 128, 8}, 263 {v2s64, p0, 128, 8}}) 264 // These extends are also legal 265 .legalForTypesWithMemDesc({{s32, p0, 8, 8}, 266 {s32, p0, 16, 8}}) 267 .clampScalar(0, s8, s64) 268 .lowerIfMemSizeNotPow2() 269 // Lower any any-extending loads left into G_ANYEXT and G_LOAD 270 .lowerIf([=](const LegalityQuery &Query) { 271 return Query.Types[0].getSizeInBits() != Query.MMODescrs[0].SizeInBits; 272 }) 273 .widenScalarToNextPow2(0) 274 .clampMaxNumElements(0, s32, 2) 275 .clampMaxNumElements(0, s64, 1) 276 .customIf(IsPtrVecPred); 277 278 getActionDefinitionsBuilder(G_STORE) 279 .legalForTypesWithMemDesc({{s8, p0, 8, 8}, 280 {s16, p0, 16, 8}, 281 {s32, p0, 8, 8}, 282 {s32, p0, 16, 8}, 283 {s32, p0, 32, 8}, 284 {s64, p0, 64, 8}, 285 {p0, p0, 64, 8}, 286 {s128, p0, 128, 8}, 287 {v16s8, p0, 128, 8}, 288 {v4s16, p0, 64, 8}, 289 {v8s16, p0, 128, 8}, 290 {v2s32, p0, 64, 8}, 291 {v4s32, p0, 128, 8}, 292 {v2s64, p0, 128, 8}}) 293 .clampScalar(0, s8, s64) 294 .lowerIfMemSizeNotPow2() 295 .lowerIf([=](const LegalityQuery &Query) { 296 return Query.Types[0].isScalar() && 297 Query.Types[0].getSizeInBits() != Query.MMODescrs[0].SizeInBits; 298 }) 299 .clampMaxNumElements(0, s32, 2) 300 .clampMaxNumElements(0, s64, 1) 301 .customIf(IsPtrVecPred); 302 303 // Constants 304 getActionDefinitionsBuilder(G_CONSTANT) 305 .legalFor({p0, s8, s16, s32, s64}) 306 .clampScalar(0, s8, s64) 307 .widenScalarToNextPow2(0); 308 getActionDefinitionsBuilder(G_FCONSTANT) 309 .legalFor({s32, s64}) 310 .clampScalar(0, s32, s64); 311 312 getActionDefinitionsBuilder(G_ICMP) 313 .legalFor({{s32, s32}, 314 {s32, s64}, 315 {s32, p0}, 316 {v4s32, v4s32}, 317 {v2s32, v2s32}, 318 {v2s64, v2s64}, 319 {v2s64, v2p0}, 320 {v4s16, v4s16}, 321 {v8s16, v8s16}, 322 {v8s8, v8s8}, 323 {v16s8, v16s8}}) 324 .clampScalar(1, s32, s64) 325 .clampScalar(0, s32, s32) 326 .minScalarEltSameAsIf( 327 [=](const LegalityQuery &Query) { 328 const LLT &Ty = Query.Types[0]; 329 const LLT &SrcTy = Query.Types[1]; 330 return Ty.isVector() && !SrcTy.getElementType().isPointer() && 331 Ty.getElementType() != SrcTy.getElementType(); 332 }, 333 0, 1) 334 .minScalarOrEltIf( 335 [=](const LegalityQuery &Query) { return Query.Types[1] == v2s16; }, 336 1, s32) 337 .minScalarOrEltIf( 338 [=](const LegalityQuery &Query) { return Query.Types[1] == v2p0; }, 0, 339 s64) 340 .widenScalarOrEltToNextPow2(1); 341 342 getActionDefinitionsBuilder(G_FCMP) 343 .legalFor({{s32, s32}, {s32, s64}}) 344 .clampScalar(0, s32, s32) 345 .clampScalar(1, s32, s64) 346 .widenScalarToNextPow2(1); 347 348 // Extensions 349 auto ExtLegalFunc = [=](const LegalityQuery &Query) { 350 unsigned DstSize = Query.Types[0].getSizeInBits(); 351 352 if (DstSize == 128 && !Query.Types[0].isVector()) 353 return false; // Extending to a scalar s128 needs narrowing. 354 355 // Make sure that we have something that will fit in a register, and 356 // make sure it's a power of 2. 357 if (DstSize < 8 || DstSize > 128 || !isPowerOf2_32(DstSize)) 358 return false; 359 360 const LLT &SrcTy = Query.Types[1]; 361 362 // Special case for s1. 363 if (SrcTy == s1) 364 return true; 365 366 // Make sure we fit in a register otherwise. Don't bother checking that 367 // the source type is below 128 bits. We shouldn't be allowing anything 368 // through which is wider than the destination in the first place. 369 unsigned SrcSize = SrcTy.getSizeInBits(); 370 if (SrcSize < 8 || !isPowerOf2_32(SrcSize)) 371 return false; 372 373 return true; 374 }; 375 getActionDefinitionsBuilder({G_ZEXT, G_SEXT, G_ANYEXT}) 376 .legalIf(ExtLegalFunc) 377 .clampScalar(0, s64, s64); // Just for s128, others are handled above. 378 379 getActionDefinitionsBuilder(G_TRUNC).alwaysLegal(); 380 381 getActionDefinitionsBuilder(G_SEXT_INREG) 382 .legalFor({s32, s64}) 383 .lower(); 384 385 // FP conversions 386 getActionDefinitionsBuilder(G_FPTRUNC).legalFor( 387 {{s16, s32}, {s16, s64}, {s32, s64}, {v4s16, v4s32}, {v2s32, v2s64}}); 388 getActionDefinitionsBuilder(G_FPEXT).legalFor( 389 {{s32, s16}, {s64, s16}, {s64, s32}, {v4s32, v4s16}, {v2s64, v2s32}}); 390 391 // Conversions 392 getActionDefinitionsBuilder({G_FPTOSI, G_FPTOUI}) 393 .legalForCartesianProduct({s32, s64, v2s64, v4s32, v2s32}) 394 .clampScalar(0, s32, s64) 395 .widenScalarToNextPow2(0) 396 .clampScalar(1, s32, s64) 397 .widenScalarToNextPow2(1); 398 399 getActionDefinitionsBuilder({G_SITOFP, G_UITOFP}) 400 .legalForCartesianProduct({s32, s64, v2s64, v4s32, v2s32}) 401 .clampScalar(1, s32, s64) 402 .widenScalarToNextPow2(1) 403 .clampScalar(0, s32, s64) 404 .widenScalarToNextPow2(0); 405 406 // Control-flow 407 getActionDefinitionsBuilder(G_BRCOND).legalFor({s1, s8, s16, s32}); 408 getActionDefinitionsBuilder(G_BRINDIRECT).legalFor({p0}); 409 410 // Select 411 // FIXME: We can probably do a bit better than just scalarizing vector 412 // selects. 413 getActionDefinitionsBuilder(G_SELECT) 414 .legalFor({{s32, s1}, {s64, s1}, {p0, s1}}) 415 .clampScalar(0, s32, s64) 416 .widenScalarToNextPow2(0) 417 .scalarize(0); 418 419 // Pointer-handling 420 getActionDefinitionsBuilder(G_FRAME_INDEX).legalFor({p0}); 421 422 if (TM.getCodeModel() == CodeModel::Small) 423 getActionDefinitionsBuilder(G_GLOBAL_VALUE).custom(); 424 else 425 getActionDefinitionsBuilder(G_GLOBAL_VALUE).legalFor({p0}); 426 427 getActionDefinitionsBuilder(G_PTRTOINT) 428 .legalForCartesianProduct({s1, s8, s16, s32, s64}, {p0}) 429 .maxScalar(0, s64) 430 .widenScalarToNextPow2(0, /*Min*/ 8); 431 432 getActionDefinitionsBuilder(G_INTTOPTR) 433 .unsupportedIf([&](const LegalityQuery &Query) { 434 return Query.Types[0].getSizeInBits() != Query.Types[1].getSizeInBits(); 435 }) 436 .legalFor({{p0, s64}}); 437 438 // Casts for 32 and 64-bit width type are just copies. 439 // Same for 128-bit width type, except they are on the FPR bank. 440 getActionDefinitionsBuilder(G_BITCAST) 441 // FIXME: This is wrong since G_BITCAST is not allowed to change the 442 // number of bits but it's what the previous code described and fixing 443 // it breaks tests. 444 .legalForCartesianProduct({s1, s8, s16, s32, s64, s128, v16s8, v8s8, v4s8, 445 v8s16, v4s16, v2s16, v4s32, v2s32, v2s64, 446 v2p0}); 447 448 getActionDefinitionsBuilder(G_VASTART).legalFor({p0}); 449 450 // va_list must be a pointer, but most sized types are pretty easy to handle 451 // as the destination. 452 getActionDefinitionsBuilder(G_VAARG) 453 .customForCartesianProduct({s8, s16, s32, s64, p0}, {p0}) 454 .clampScalar(0, s8, s64) 455 .widenScalarToNextPow2(0, /*Min*/ 8); 456 457 if (ST.hasLSE()) { 458 getActionDefinitionsBuilder(G_ATOMIC_CMPXCHG_WITH_SUCCESS) 459 .lowerIf(all( 460 typeInSet(0, {s8, s16, s32, s64}), typeIs(1, s1), typeIs(2, p0), 461 atomicOrderingAtLeastOrStrongerThan(0, AtomicOrdering::Monotonic))); 462 463 getActionDefinitionsBuilder( 464 {G_ATOMICRMW_XCHG, G_ATOMICRMW_ADD, G_ATOMICRMW_SUB, G_ATOMICRMW_AND, 465 G_ATOMICRMW_OR, G_ATOMICRMW_XOR, G_ATOMICRMW_MIN, G_ATOMICRMW_MAX, 466 G_ATOMICRMW_UMIN, G_ATOMICRMW_UMAX, G_ATOMIC_CMPXCHG}) 467 .legalIf(all( 468 typeInSet(0, {s8, s16, s32, s64}), typeIs(1, p0), 469 atomicOrderingAtLeastOrStrongerThan(0, AtomicOrdering::Monotonic))); 470 } 471 472 getActionDefinitionsBuilder(G_BLOCK_ADDR).legalFor({p0}); 473 474 // Merge/Unmerge 475 for (unsigned Op : {G_MERGE_VALUES, G_UNMERGE_VALUES}) { 476 unsigned BigTyIdx = Op == G_MERGE_VALUES ? 0 : 1; 477 unsigned LitTyIdx = Op == G_MERGE_VALUES ? 1 : 0; 478 479 auto notValidElt = [](const LegalityQuery &Query, unsigned TypeIdx) { 480 const LLT &Ty = Query.Types[TypeIdx]; 481 if (Ty.isVector()) { 482 const LLT &EltTy = Ty.getElementType(); 483 if (EltTy.getSizeInBits() < 8 || EltTy.getSizeInBits() > 64) 484 return true; 485 if (!isPowerOf2_32(EltTy.getSizeInBits())) 486 return true; 487 } 488 return false; 489 }; 490 491 // FIXME: This rule is horrible, but specifies the same as what we had 492 // before with the particularly strange definitions removed (e.g. 493 // s8 = G_MERGE_VALUES s32, s32). 494 // Part of the complexity comes from these ops being extremely flexible. For 495 // example, you can build/decompose vectors with it, concatenate vectors, 496 // etc. and in addition to this you can also bitcast with it at the same 497 // time. We've been considering breaking it up into multiple ops to make it 498 // more manageable throughout the backend. 499 getActionDefinitionsBuilder(Op) 500 // Break up vectors with weird elements into scalars 501 .fewerElementsIf( 502 [=](const LegalityQuery &Query) { return notValidElt(Query, 0); }, 503 scalarize(0)) 504 .fewerElementsIf( 505 [=](const LegalityQuery &Query) { return notValidElt(Query, 1); }, 506 scalarize(1)) 507 // Clamp the big scalar to s8-s512 and make it either a power of 2, 192, 508 // or 384. 509 .clampScalar(BigTyIdx, s8, s512) 510 .widenScalarIf( 511 [=](const LegalityQuery &Query) { 512 const LLT &Ty = Query.Types[BigTyIdx]; 513 return !isPowerOf2_32(Ty.getSizeInBits()) && 514 Ty.getSizeInBits() % 64 != 0; 515 }, 516 [=](const LegalityQuery &Query) { 517 // Pick the next power of 2, or a multiple of 64 over 128. 518 // Whichever is smaller. 519 const LLT &Ty = Query.Types[BigTyIdx]; 520 unsigned NewSizeInBits = 1 521 << Log2_32_Ceil(Ty.getSizeInBits() + 1); 522 if (NewSizeInBits >= 256) { 523 unsigned RoundedTo = alignTo<64>(Ty.getSizeInBits() + 1); 524 if (RoundedTo < NewSizeInBits) 525 NewSizeInBits = RoundedTo; 526 } 527 return std::make_pair(BigTyIdx, LLT::scalar(NewSizeInBits)); 528 }) 529 // Clamp the little scalar to s8-s256 and make it a power of 2. It's not 530 // worth considering the multiples of 64 since 2*192 and 2*384 are not 531 // valid. 532 .clampScalar(LitTyIdx, s8, s256) 533 .widenScalarToNextPow2(LitTyIdx, /*Min*/ 8) 534 // So at this point, we have s8, s16, s32, s64, s128, s192, s256, s384, 535 // s512, <X x s8>, <X x s16>, <X x s32>, or <X x s64>. 536 // At this point it's simple enough to accept the legal types. 537 .legalIf([=](const LegalityQuery &Query) { 538 const LLT &BigTy = Query.Types[BigTyIdx]; 539 const LLT &LitTy = Query.Types[LitTyIdx]; 540 if (BigTy.isVector() && BigTy.getSizeInBits() < 32) 541 return false; 542 if (LitTy.isVector() && LitTy.getSizeInBits() < 32) 543 return false; 544 return BigTy.getSizeInBits() % LitTy.getSizeInBits() == 0; 545 }) 546 // Any vectors left are the wrong size. Scalarize them. 547 .scalarize(0) 548 .scalarize(1); 549 } 550 551 getActionDefinitionsBuilder(G_EXTRACT_VECTOR_ELT) 552 .unsupportedIf([=](const LegalityQuery &Query) { 553 const LLT &EltTy = Query.Types[1].getElementType(); 554 return Query.Types[0] != EltTy; 555 }) 556 .minScalar(2, s64) 557 .legalIf([=](const LegalityQuery &Query) { 558 const LLT &VecTy = Query.Types[1]; 559 return VecTy == v2s16 || VecTy == v4s16 || VecTy == v8s16 || 560 VecTy == v4s32 || VecTy == v2s64 || VecTy == v2s32; 561 }); 562 563 getActionDefinitionsBuilder(G_INSERT_VECTOR_ELT) 564 .legalIf([=](const LegalityQuery &Query) { 565 const LLT &VecTy = Query.Types[0]; 566 // TODO: Support s8 and s16 567 return VecTy == v2s32 || VecTy == v4s32 || VecTy == v2s64; 568 }); 569 570 getActionDefinitionsBuilder(G_BUILD_VECTOR) 571 .legalFor({{v4s16, s16}, 572 {v8s16, s16}, 573 {v2s32, s32}, 574 {v4s32, s32}, 575 {v2p0, p0}, 576 {v2s64, s64}}) 577 .clampNumElements(0, v4s32, v4s32) 578 .clampNumElements(0, v2s64, v2s64) 579 580 // Deal with larger scalar types, which will be implicitly truncated. 581 .legalIf([=](const LegalityQuery &Query) { 582 return Query.Types[0].getScalarSizeInBits() < 583 Query.Types[1].getSizeInBits(); 584 }) 585 .minScalarSameAs(1, 0); 586 587 getActionDefinitionsBuilder(G_CTLZ).legalForCartesianProduct( 588 {s32, s64, v8s8, v16s8, v4s16, v8s16, v2s32, v4s32}) 589 .scalarize(1); 590 591 getActionDefinitionsBuilder(G_SHUFFLE_VECTOR) 592 .legalIf([=](const LegalityQuery &Query) { 593 const LLT &DstTy = Query.Types[0]; 594 const LLT &SrcTy = Query.Types[1]; 595 // For now just support the TBL2 variant which needs the source vectors 596 // to be the same size as the dest. 597 if (DstTy != SrcTy) 598 return false; 599 for (auto &Ty : {v2s32, v4s32, v2s64}) { 600 if (DstTy == Ty) 601 return true; 602 } 603 return false; 604 }) 605 // G_SHUFFLE_VECTOR can have scalar sources (from 1 x s vectors), we 606 // just want those lowered into G_BUILD_VECTOR 607 .lowerIf([=](const LegalityQuery &Query) { 608 return !Query.Types[1].isVector(); 609 }) 610 .clampNumElements(0, v4s32, v4s32) 611 .clampNumElements(0, v2s64, v2s64); 612 613 getActionDefinitionsBuilder(G_CONCAT_VECTORS) 614 .legalFor({{v4s32, v2s32}, {v8s16, v4s16}}); 615 616 getActionDefinitionsBuilder(G_JUMP_TABLE) 617 .legalFor({{p0}, {s64}}); 618 619 getActionDefinitionsBuilder(G_BRJT).legalIf([=](const LegalityQuery &Query) { 620 return Query.Types[0] == p0 && Query.Types[1] == s64; 621 }); 622 623 getActionDefinitionsBuilder(G_DYN_STACKALLOC).lower(); 624 625 computeTables(); 626 verify(*ST.getInstrInfo()); 627 } 628 629 bool AArch64LegalizerInfo::legalizeCustom(LegalizerHelper &Helper, 630 MachineInstr &MI) const { 631 MachineIRBuilder &MIRBuilder = Helper.MIRBuilder; 632 MachineRegisterInfo &MRI = *MIRBuilder.getMRI(); 633 GISelChangeObserver &Observer = Helper.Observer; 634 switch (MI.getOpcode()) { 635 default: 636 // No idea what to do. 637 return false; 638 case TargetOpcode::G_VAARG: 639 return legalizeVaArg(MI, MRI, MIRBuilder); 640 case TargetOpcode::G_LOAD: 641 case TargetOpcode::G_STORE: 642 return legalizeLoadStore(MI, MRI, MIRBuilder, Observer); 643 case TargetOpcode::G_SHL: 644 case TargetOpcode::G_ASHR: 645 case TargetOpcode::G_LSHR: 646 return legalizeShlAshrLshr(MI, MRI, MIRBuilder, Observer); 647 case TargetOpcode::G_GLOBAL_VALUE: 648 return legalizeSmallCMGlobalValue(MI, MRI, MIRBuilder, Observer); 649 } 650 651 llvm_unreachable("expected switch to return"); 652 } 653 654 bool AArch64LegalizerInfo::legalizeSmallCMGlobalValue(MachineInstr &MI, 655 MachineRegisterInfo &MRI, 656 MachineIRBuilder &MIRBuilder, 657 GISelChangeObserver &Observer) const { 658 assert(MI.getOpcode() == TargetOpcode::G_GLOBAL_VALUE); 659 // We do this custom legalization to convert G_GLOBAL_VALUE into target ADRP + 660 // G_ADD_LOW instructions. 661 // By splitting this here, we can optimize accesses in the small code model by 662 // folding in the G_ADD_LOW into the load/store offset. 663 auto GV = MI.getOperand(1).getGlobal(); 664 if (GV->isThreadLocal()) 665 return true; // Don't want to modify TLS vars. 666 667 auto &TM = ST->getTargetLowering()->getTargetMachine(); 668 unsigned OpFlags = ST->ClassifyGlobalReference(GV, TM); 669 670 if (OpFlags & AArch64II::MO_GOT) 671 return true; 672 673 Register DstReg = MI.getOperand(0).getReg(); 674 auto ADRP = MIRBuilder.buildInstr(AArch64::ADRP, {LLT::pointer(0, 64)}, {}) 675 .addGlobalAddress(GV, 0, OpFlags | AArch64II::MO_PAGE); 676 // Set the regclass on the dest reg too. 677 MRI.setRegClass(ADRP.getReg(0), &AArch64::GPR64RegClass); 678 679 MIRBuilder.buildInstr(AArch64::G_ADD_LOW, {DstReg}, {ADRP}) 680 .addGlobalAddress(GV, 0, 681 OpFlags | AArch64II::MO_PAGEOFF | AArch64II::MO_NC); 682 MI.eraseFromParent(); 683 return true; 684 } 685 686 bool AArch64LegalizerInfo::legalizeIntrinsic( 687 LegalizerHelper &Helper, MachineInstr &MI) const { 688 MachineIRBuilder &MIRBuilder = Helper.MIRBuilder; 689 switch (MI.getIntrinsicID()) { 690 case Intrinsic::memcpy: 691 case Intrinsic::memset: 692 case Intrinsic::memmove: 693 if (createMemLibcall(MIRBuilder, *MIRBuilder.getMRI(), MI) == 694 LegalizerHelper::UnableToLegalize) 695 return false; 696 MI.eraseFromParent(); 697 return true; 698 default: 699 break; 700 } 701 return true; 702 } 703 704 bool AArch64LegalizerInfo::legalizeShlAshrLshr( 705 MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &MIRBuilder, 706 GISelChangeObserver &Observer) const { 707 assert(MI.getOpcode() == TargetOpcode::G_ASHR || 708 MI.getOpcode() == TargetOpcode::G_LSHR || 709 MI.getOpcode() == TargetOpcode::G_SHL); 710 // If the shift amount is a G_CONSTANT, promote it to a 64 bit type so the 711 // imported patterns can select it later. Either way, it will be legal. 712 Register AmtReg = MI.getOperand(2).getReg(); 713 auto *CstMI = MRI.getVRegDef(AmtReg); 714 assert(CstMI && "expected to find a vreg def"); 715 if (CstMI->getOpcode() != TargetOpcode::G_CONSTANT) 716 return true; 717 // Check the shift amount is in range for an immediate form. 718 unsigned Amount = CstMI->getOperand(1).getCImm()->getZExtValue(); 719 if (Amount > 31) 720 return true; // This will have to remain a register variant. 721 assert(MRI.getType(AmtReg).getSizeInBits() == 32); 722 auto ExtCst = MIRBuilder.buildZExt(LLT::scalar(64), AmtReg); 723 MI.getOperand(2).setReg(ExtCst.getReg(0)); 724 return true; 725 } 726 727 bool AArch64LegalizerInfo::legalizeLoadStore( 728 MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &MIRBuilder, 729 GISelChangeObserver &Observer) const { 730 assert(MI.getOpcode() == TargetOpcode::G_STORE || 731 MI.getOpcode() == TargetOpcode::G_LOAD); 732 // Here we just try to handle vector loads/stores where our value type might 733 // have pointer elements, which the SelectionDAG importer can't handle. To 734 // allow the existing patterns for s64 to fire for p0, we just try to bitcast 735 // the value to use s64 types. 736 737 // Custom legalization requires the instruction, if not deleted, must be fully 738 // legalized. In order to allow further legalization of the inst, we create 739 // a new instruction and erase the existing one. 740 741 Register ValReg = MI.getOperand(0).getReg(); 742 const LLT ValTy = MRI.getType(ValReg); 743 744 if (!ValTy.isVector() || !ValTy.getElementType().isPointer() || 745 ValTy.getElementType().getAddressSpace() != 0) { 746 LLVM_DEBUG(dbgs() << "Tried to do custom legalization on wrong load/store"); 747 return false; 748 } 749 750 unsigned PtrSize = ValTy.getElementType().getSizeInBits(); 751 const LLT NewTy = LLT::vector(ValTy.getNumElements(), PtrSize); 752 auto &MMO = **MI.memoperands_begin(); 753 if (MI.getOpcode() == TargetOpcode::G_STORE) { 754 auto Bitcast = MIRBuilder.buildBitcast(NewTy, ValReg); 755 MIRBuilder.buildStore(Bitcast.getReg(0), MI.getOperand(1), MMO); 756 } else { 757 auto NewLoad = MIRBuilder.buildLoad(NewTy, MI.getOperand(1), MMO); 758 MIRBuilder.buildBitcast(ValReg, NewLoad); 759 } 760 MI.eraseFromParent(); 761 return true; 762 } 763 764 bool AArch64LegalizerInfo::legalizeVaArg(MachineInstr &MI, 765 MachineRegisterInfo &MRI, 766 MachineIRBuilder &MIRBuilder) const { 767 MachineFunction &MF = MIRBuilder.getMF(); 768 Align Alignment(MI.getOperand(2).getImm()); 769 Register Dst = MI.getOperand(0).getReg(); 770 Register ListPtr = MI.getOperand(1).getReg(); 771 772 LLT PtrTy = MRI.getType(ListPtr); 773 LLT IntPtrTy = LLT::scalar(PtrTy.getSizeInBits()); 774 775 const unsigned PtrSize = PtrTy.getSizeInBits() / 8; 776 const Align PtrAlign = Align(PtrSize); 777 auto List = MIRBuilder.buildLoad( 778 PtrTy, ListPtr, 779 *MF.getMachineMemOperand(MachinePointerInfo(), MachineMemOperand::MOLoad, 780 PtrSize, PtrAlign)); 781 782 MachineInstrBuilder DstPtr; 783 if (Alignment > PtrAlign) { 784 // Realign the list to the actual required alignment. 785 auto AlignMinus1 = 786 MIRBuilder.buildConstant(IntPtrTy, Alignment.value() - 1); 787 auto ListTmp = MIRBuilder.buildPtrAdd(PtrTy, List, AlignMinus1.getReg(0)); 788 DstPtr = MIRBuilder.buildMaskLowPtrBits(PtrTy, ListTmp, Log2(Alignment)); 789 } else 790 DstPtr = List; 791 792 uint64_t ValSize = MRI.getType(Dst).getSizeInBits() / 8; 793 MIRBuilder.buildLoad( 794 Dst, DstPtr, 795 *MF.getMachineMemOperand(MachinePointerInfo(), MachineMemOperand::MOLoad, 796 ValSize, std::max(Alignment, PtrAlign))); 797 798 auto Size = MIRBuilder.buildConstant(IntPtrTy, alignTo(ValSize, PtrAlign)); 799 800 auto NewList = MIRBuilder.buildPtrAdd(PtrTy, DstPtr, Size.getReg(0)); 801 802 MIRBuilder.buildStore(NewList, ListPtr, 803 *MF.getMachineMemOperand(MachinePointerInfo(), 804 MachineMemOperand::MOStore, 805 PtrSize, PtrAlign)); 806 807 MI.eraseFromParent(); 808 return true; 809 } 810