1 //===-- NeonEmitter.cpp - Generate arm_neon.h for use with clang ----------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This tablegen backend is responsible for emitting arm_neon.h, which includes 10 // a declaration and definition of each function specified by the ARM NEON 11 // compiler interface. See ARM document DUI0348B. 12 // 13 // Each NEON instruction is implemented in terms of 1 or more functions which 14 // are suffixed with the element type of the input vectors. Functions may be 15 // implemented in terms of generic vector operations such as +, *, -, etc. or 16 // by calling a __builtin_-prefixed function which will be handled by clang's 17 // CodeGen library. 18 // 19 // Additional validation code can be generated by this file when runHeader() is 20 // called, rather than the normal run() entry point. 21 // 22 // See also the documentation in include/clang/Basic/arm_neon.td. 23 // 24 //===----------------------------------------------------------------------===// 25 26 #include "TableGenBackends.h" 27 #include "llvm/ADT/ArrayRef.h" 28 #include "llvm/ADT/DenseMap.h" 29 #include "llvm/ADT/STLExtras.h" 30 #include "llvm/ADT/SmallVector.h" 31 #include "llvm/ADT/StringExtras.h" 32 #include "llvm/ADT/StringRef.h" 33 #include "llvm/Support/Casting.h" 34 #include "llvm/Support/ErrorHandling.h" 35 #include "llvm/Support/raw_ostream.h" 36 #include "llvm/TableGen/AArch64ImmCheck.h" 37 #include "llvm/TableGen/Error.h" 38 #include "llvm/TableGen/Record.h" 39 #include "llvm/TableGen/SetTheory.h" 40 #include "llvm/TableGen/StringToOffsetTable.h" 41 #include <algorithm> 42 #include <cassert> 43 #include <cctype> 44 #include <cstddef> 45 #include <cstdint> 46 #include <deque> 47 #include <map> 48 #include <optional> 49 #include <set> 50 #include <sstream> 51 #include <string> 52 #include <unordered_map> 53 #include <utility> 54 #include <vector> 55 56 using namespace llvm; 57 58 namespace { 59 60 // While globals are generally bad, this one allows us to perform assertions 61 // liberally and somehow still trace them back to the def they indirectly 62 // came from. 63 static const Record *CurrentRecord = nullptr; 64 static void assert_with_loc(bool Assertion, const std::string &Str) { 65 if (!Assertion) { 66 if (CurrentRecord) 67 PrintFatalError(CurrentRecord->getLoc(), Str); 68 else 69 PrintFatalError(Str); 70 } 71 } 72 73 enum ClassKind { 74 ClassNone, 75 ClassI, // generic integer instruction, e.g., "i8" suffix 76 ClassS, // signed/unsigned/poly, e.g., "s8", "u8" or "p8" suffix 77 ClassW, // width-specific instruction, e.g., "8" suffix 78 ClassV, // void-suffix instruction, no suffix 79 ClassB, // bitcast arguments with enum argument to specify type 80 ClassL, // Logical instructions which are op instructions 81 // but we need to not emit any suffix for in our 82 // tests. 83 ClassNoTest // Instructions which we do not test since they are 84 // not TRUE instructions. 85 }; 86 87 /// NeonTypeFlags - Flags to identify the types for overloaded Neon 88 /// builtins. These must be kept in sync with the flags in 89 /// include/clang/Basic/TargetBuiltins.h. 90 namespace NeonTypeFlags { 91 92 enum { EltTypeMask = 0xf, UnsignedFlag = 0x10, QuadFlag = 0x20 }; 93 94 enum EltType { 95 Int8, 96 Int16, 97 Int32, 98 Int64, 99 Poly8, 100 Poly16, 101 Poly64, 102 Poly128, 103 Float16, 104 Float32, 105 Float64, 106 BFloat16, 107 MFloat8 108 }; 109 110 } // end namespace NeonTypeFlags 111 112 class NeonEmitter; 113 114 //===----------------------------------------------------------------------===// 115 // TypeSpec 116 //===----------------------------------------------------------------------===// 117 118 /// A TypeSpec is just a simple wrapper around a string, but gets its own type 119 /// for strong typing purposes. 120 /// 121 /// A TypeSpec can be used to create a type. 122 class TypeSpec : public std::string { 123 public: 124 static std::vector<TypeSpec> fromTypeSpecs(StringRef Str) { 125 std::vector<TypeSpec> Ret; 126 TypeSpec Acc; 127 for (char I : Str.str()) { 128 if (islower(I)) { 129 Acc.push_back(I); 130 Ret.push_back(TypeSpec(Acc)); 131 Acc.clear(); 132 } else { 133 Acc.push_back(I); 134 } 135 } 136 return Ret; 137 } 138 }; 139 140 //===----------------------------------------------------------------------===// 141 // Type 142 //===----------------------------------------------------------------------===// 143 144 /// A Type. Not much more to say here. 145 class Type { 146 private: 147 TypeSpec TS; 148 149 enum TypeKind { Void, Float, SInt, UInt, Poly, BFloat16, MFloat8, FPM }; 150 TypeKind Kind; 151 bool Immediate, Constant, Pointer; 152 // ScalarForMangling and NoManglingQ are really not suited to live here as 153 // they are not related to the type. But they live in the TypeSpec (not the 154 // prototype), so this is really the only place to store them. 155 bool ScalarForMangling, NoManglingQ; 156 unsigned Bitwidth, ElementBitwidth, NumVectors; 157 158 public: 159 Type() 160 : Kind(Void), Immediate(false), Constant(false), 161 Pointer(false), ScalarForMangling(false), NoManglingQ(false), 162 Bitwidth(0), ElementBitwidth(0), NumVectors(0) {} 163 164 Type(TypeSpec TS, StringRef CharMods) 165 : TS(std::move(TS)), Kind(Void), Immediate(false), 166 Constant(false), Pointer(false), ScalarForMangling(false), 167 NoManglingQ(false), Bitwidth(0), ElementBitwidth(0), NumVectors(0) { 168 applyModifiers(CharMods); 169 } 170 171 /// Returns a type representing "void". 172 static Type getVoid() { return Type(); } 173 174 bool operator==(const Type &Other) const { return str() == Other.str(); } 175 bool operator!=(const Type &Other) const { return !operator==(Other); } 176 177 // 178 // Query functions 179 // 180 bool isScalarForMangling() const { return ScalarForMangling; } 181 bool noManglingQ() const { return NoManglingQ; } 182 183 bool isPointer() const { return Pointer; } 184 bool isValue() const { return !isVoid() && !isPointer(); } 185 bool isScalar() const { return isValue() && NumVectors == 0; } 186 bool isVector() const { return isValue() && NumVectors > 0; } 187 bool isConstPointer() const { return Constant; } 188 bool isFloating() const { return Kind == Float; } 189 bool isInteger() const { return Kind == SInt || Kind == UInt; } 190 bool isPoly() const { return Kind == Poly; } 191 bool isSigned() const { return Kind == SInt; } 192 bool isImmediate() const { return Immediate; } 193 bool isFloat() const { return isFloating() && ElementBitwidth == 32; } 194 bool isDouble() const { return isFloating() && ElementBitwidth == 64; } 195 bool isHalf() const { return isFloating() && ElementBitwidth == 16; } 196 bool isChar() const { return ElementBitwidth == 8; } 197 bool isShort() const { return isInteger() && ElementBitwidth == 16; } 198 bool isInt() const { return isInteger() && ElementBitwidth == 32; } 199 bool isLong() const { return isInteger() && ElementBitwidth == 64; } 200 bool isVoid() const { return Kind == Void; } 201 bool isBFloat16() const { return Kind == BFloat16; } 202 bool isMFloat8() const { return Kind == MFloat8; } 203 bool isFPM() const { return Kind == FPM; } 204 unsigned getNumElements() const { return Bitwidth / ElementBitwidth; } 205 unsigned getSizeInBits() const { return Bitwidth; } 206 unsigned getElementSizeInBits() const { return ElementBitwidth; } 207 unsigned getNumVectors() const { return NumVectors; } 208 209 // 210 // Mutator functions 211 // 212 void makeUnsigned() { 213 assert(!isVoid() && "not a potentially signed type"); 214 Kind = UInt; 215 } 216 void makeSigned() { 217 assert(!isVoid() && "not a potentially signed type"); 218 Kind = SInt; 219 } 220 221 void makeInteger(unsigned ElemWidth, bool Sign) { 222 assert(!isVoid() && "converting void to int probably not useful"); 223 Kind = Sign ? SInt : UInt; 224 Immediate = false; 225 ElementBitwidth = ElemWidth; 226 } 227 228 void makeImmediate(unsigned ElemWidth) { 229 Kind = SInt; 230 Immediate = true; 231 ElementBitwidth = ElemWidth; 232 } 233 234 void makeScalar() { 235 Bitwidth = ElementBitwidth; 236 NumVectors = 0; 237 } 238 239 void makeOneVector() { 240 assert(isVector()); 241 NumVectors = 1; 242 } 243 244 void make32BitElement() { 245 assert_with_loc(Bitwidth > 32, "Not enough bits to make it 32!"); 246 ElementBitwidth = 32; 247 } 248 249 void doubleLanes() { 250 assert_with_loc(Bitwidth != 128, "Can't get bigger than 128!"); 251 Bitwidth = 128; 252 } 253 254 void halveLanes() { 255 assert_with_loc(Bitwidth != 64, "Can't get smaller than 64!"); 256 Bitwidth = 64; 257 } 258 259 /// Return the C string representation of a type, which is the typename 260 /// defined in stdint.h or arm_neon.h. 261 std::string str() const; 262 263 /// Return the string representation of a type, which is an encoded 264 /// string for passing to the BUILTIN() macro in Builtins.def. 265 std::string builtin_str() const; 266 267 /// Return the value in NeonTypeFlags for this type. 268 unsigned getNeonEnum() const; 269 270 /// Parse a type from a stdint.h or arm_neon.h typedef name, 271 /// for example uint32x2_t or int64_t. 272 static Type fromTypedefName(StringRef Name); 273 274 private: 275 /// Creates the type based on the typespec string in TS. 276 /// Sets "Quad" to true if the "Q" or "H" modifiers were 277 /// seen. This is needed by applyModifier as some modifiers 278 /// only take effect if the type size was changed by "Q" or "H". 279 void applyTypespec(bool &Quad); 280 /// Applies prototype modifiers to the type. 281 void applyModifiers(StringRef Mods); 282 }; 283 284 //===----------------------------------------------------------------------===// 285 // Variable 286 //===----------------------------------------------------------------------===// 287 288 /// A variable is a simple class that just has a type and a name. 289 class Variable { 290 Type T; 291 std::string N; 292 293 public: 294 Variable() : T(Type::getVoid()) {} 295 Variable(Type T, std::string N) : T(std::move(T)), N(std::move(N)) {} 296 297 Type getType() const { return T; } 298 std::string getName() const { return "__" + N; } 299 }; 300 301 //===----------------------------------------------------------------------===// 302 // Intrinsic 303 //===----------------------------------------------------------------------===// 304 305 /// The main grunt class. This represents an instantiation of an intrinsic with 306 /// a particular typespec and prototype. 307 class Intrinsic { 308 /// The Record this intrinsic was created from. 309 const Record *R; 310 /// The unmangled name. 311 std::string Name; 312 /// The input and output typespecs. InTS == OutTS except when 313 /// CartesianProductWith is non-empty - this is the case for vreinterpret. 314 TypeSpec OutTS, InTS; 315 /// The base class kind. Most intrinsics use ClassS, which has full type 316 /// info for integers (s32/u32). Some use ClassI, which doesn't care about 317 /// signedness (i32), while some (ClassB) have no type at all, only a width 318 /// (32). 319 ClassKind CK; 320 /// The list of DAGs for the body. May be empty, in which case we should 321 /// emit a builtin call. 322 const ListInit *Body; 323 /// The architectural ifdef guard. 324 std::string ArchGuard; 325 /// The architectural target() guard. 326 std::string TargetGuard; 327 /// Set if the Unavailable bit is 1. This means we don't generate a body, 328 /// just an "unavailable" attribute on a declaration. 329 bool IsUnavailable; 330 /// Is this intrinsic safe for big-endian? or does it need its arguments 331 /// reversing? 332 bool BigEndianSafe; 333 334 /// The types of return value [0] and parameters [1..]. 335 std::vector<Type> Types; 336 337 SmallVector<ImmCheck, 2> ImmChecks; 338 /// The index of the key type passed to CGBuiltin.cpp for polymorphic calls. 339 int PolymorphicKeyType; 340 /// The local variables defined. 341 std::map<std::string, Variable, std::less<>> Variables; 342 /// NeededEarly - set if any other intrinsic depends on this intrinsic. 343 bool NeededEarly; 344 /// UseMacro - set if we should implement using a macro or unset for a 345 /// function. 346 bool UseMacro; 347 /// The set of intrinsics that this intrinsic uses/requires. 348 std::set<Intrinsic *> Dependencies; 349 /// The "base type", which is Type('d', OutTS). InBaseType is only 350 /// different if CartesianProductWith is non-empty (for vreinterpret). 351 Type BaseType, InBaseType; 352 /// The return variable. 353 Variable RetVar; 354 /// A postfix to apply to every variable. Defaults to "". 355 std::string VariablePostfix; 356 357 NeonEmitter &Emitter; 358 std::stringstream OS; 359 360 bool isBigEndianSafe() const { 361 if (BigEndianSafe) 362 return true; 363 364 for (const auto &T : Types){ 365 if (T.isVector() && T.getNumElements() > 1) 366 return false; 367 } 368 return true; 369 } 370 371 public: 372 Intrinsic(const Record *R, StringRef Name, StringRef Proto, TypeSpec OutTS, 373 TypeSpec InTS, ClassKind CK, const ListInit *Body, 374 NeonEmitter &Emitter, StringRef ArchGuard, StringRef TargetGuard, 375 bool IsUnavailable, bool BigEndianSafe) 376 : R(R), Name(Name.str()), OutTS(OutTS), InTS(InTS), CK(CK), Body(Body), 377 ArchGuard(ArchGuard.str()), TargetGuard(TargetGuard.str()), 378 IsUnavailable(IsUnavailable), BigEndianSafe(BigEndianSafe), 379 PolymorphicKeyType(0), NeededEarly(false), UseMacro(false), 380 BaseType(OutTS, "."), InBaseType(InTS, "."), Emitter(Emitter) { 381 // Modify the TypeSpec per-argument to get a concrete Type, and create 382 // known variables for each. 383 // Types[0] is the return value. 384 unsigned Pos = 0; 385 Types.emplace_back(OutTS, getNextModifiers(Proto, Pos)); 386 StringRef Mods = getNextModifiers(Proto, Pos); 387 while (!Mods.empty()) { 388 Types.emplace_back(InTS, Mods); 389 if (Mods.contains('!')) 390 PolymorphicKeyType = Types.size() - 1; 391 392 Mods = getNextModifiers(Proto, Pos); 393 } 394 395 for (const auto &Type : Types) { 396 // If this builtin takes an immediate argument, we need to #define it rather 397 // than use a standard declaration, so that SemaChecking can range check 398 // the immediate passed by the user. 399 400 // Pointer arguments need to use macros to avoid hiding aligned attributes 401 // from the pointer type. 402 403 // It is not permitted to pass or return an __fp16 by value, so intrinsics 404 // taking a scalar float16_t must be implemented as macros. 405 if (Type.isImmediate() || Type.isPointer() || 406 (Type.isScalar() && Type.isHalf())) 407 UseMacro = true; 408 } 409 410 int ArgIdx, Kind, TypeArgIdx; 411 for (const Record *I : R->getValueAsListOfDefs("ImmChecks")) { 412 unsigned EltSizeInBits = 0, VecSizeInBits = 0; 413 414 ArgIdx = I->getValueAsInt("ImmArgIdx"); 415 TypeArgIdx = I->getValueAsInt("TypeContextArgIdx"); 416 Kind = I->getValueAsDef("Kind")->getValueAsInt("Value"); 417 418 assert((ArgIdx >= 0 && Kind >= 0) && 419 "ImmArgIdx and Kind must be nonnegative"); 420 421 if (TypeArgIdx >= 0) { 422 Type ContextType = getParamType(TypeArgIdx); 423 424 // Element size cannot be set for intrinscs that map to polymorphic 425 // builtins. 426 if (CK != ClassB) 427 EltSizeInBits = ContextType.getElementSizeInBits(); 428 429 VecSizeInBits = ContextType.getSizeInBits(); 430 } 431 432 ImmChecks.emplace_back(ArgIdx, Kind, EltSizeInBits, VecSizeInBits); 433 } 434 sort(ImmChecks.begin(), ImmChecks.end(), 435 [](const ImmCheck &a, const ImmCheck &b) { 436 return a.getImmArgIdx() < b.getImmArgIdx(); 437 }); // Sort for comparison with other intrinsics which map to the 438 // same builtin 439 } 440 441 /// Get the Record that this intrinsic is based off. 442 const Record *getRecord() const { return R; } 443 /// Get the set of Intrinsics that this intrinsic calls. 444 /// this is the set of immediate dependencies, NOT the 445 /// transitive closure. 446 const std::set<Intrinsic *> &getDependencies() const { return Dependencies; } 447 /// Get the architectural guard string (#ifdef). 448 std::string getArchGuard() const { return ArchGuard; } 449 std::string getTargetGuard() const { return TargetGuard; } 450 ArrayRef<ImmCheck> getImmChecks() const { return ImmChecks; } 451 /// Get the non-mangled name. 452 std::string getName() const { return Name; } 453 454 /// Return true if the intrinsic takes an immediate operand. 455 bool hasImmediate() const { 456 return any_of(Types, [](const Type &T) { return T.isImmediate(); }); 457 } 458 459 // Return if the supplied argument is an immediate 460 bool isArgImmediate(unsigned idx) const { 461 return Types[idx + 1].isImmediate(); 462 } 463 464 unsigned getNumParams() const { return Types.size() - 1; } 465 Type getReturnType() const { return Types[0]; } 466 Type getParamType(unsigned I) const { return Types[I + 1]; } 467 Type getBaseType() const { return BaseType; } 468 Type getPolymorphicKeyType() const { return Types[PolymorphicKeyType]; } 469 470 /// Return true if the prototype has a scalar argument. 471 bool protoHasScalar() const; 472 473 /// Return the index that parameter PIndex will sit at 474 /// in a generated function call. This is often just PIndex, 475 /// but may not be as things such as multiple-vector operands 476 /// and sret parameters need to be taken into account. 477 unsigned getGeneratedParamIdx(unsigned PIndex) { 478 unsigned Idx = 0; 479 if (getReturnType().getNumVectors() > 1) 480 // Multiple vectors are passed as sret. 481 ++Idx; 482 483 for (unsigned I = 0; I < PIndex; ++I) 484 Idx += std::max(1U, getParamType(I).getNumVectors()); 485 486 return Idx; 487 } 488 489 bool hasBody() const { return Body && !Body->empty(); } 490 491 void setNeededEarly() { NeededEarly = true; } 492 493 bool operator<(const Intrinsic &Other) const { 494 // Sort lexicographically on a three-tuple (ArchGuard, TargetGuard, Name) 495 return std::tie(ArchGuard, TargetGuard, Name) < 496 std::tie(Other.ArchGuard, Other.TargetGuard, Other.Name); 497 } 498 499 ClassKind getClassKind(bool UseClassBIfScalar = false) { 500 if (UseClassBIfScalar && !protoHasScalar()) 501 return ClassB; 502 return CK; 503 } 504 505 /// Return the name, mangled with type information. 506 /// If ForceClassS is true, use ClassS (u32/s32) instead 507 /// of the intrinsic's own type class. 508 std::string getMangledName(bool ForceClassS = false) const; 509 /// Return the type code for a builtin function call. 510 std::string getInstTypeCode(Type T, ClassKind CK) const; 511 /// Return the type string for a BUILTIN() macro in Builtins.def. 512 std::string getBuiltinTypeStr(); 513 514 /// Generate the intrinsic, returning code. 515 std::string generate(); 516 /// Perform type checking and populate the dependency graph, but 517 /// don't generate code yet. 518 void indexBody(); 519 520 private: 521 StringRef getNextModifiers(StringRef Proto, unsigned &Pos) const; 522 523 std::string mangleName(std::string Name, ClassKind CK) const; 524 525 void initVariables(); 526 std::string replaceParamsIn(std::string S); 527 528 void emitBodyAsBuiltinCall(); 529 530 void generateImpl(bool ReverseArguments, 531 StringRef NamePrefix, StringRef CallPrefix); 532 void emitReturn(); 533 void emitBody(StringRef CallPrefix); 534 void emitShadowedArgs(); 535 void emitArgumentReversal(); 536 void emitReturnVarDecl(); 537 void emitReturnReversal(); 538 void emitReverseVariable(Variable &Dest, Variable &Src); 539 void emitNewLine(); 540 void emitClosingBrace(); 541 void emitOpeningBrace(); 542 void emitPrototype(StringRef NamePrefix); 543 544 class DagEmitter { 545 Intrinsic &Intr; 546 StringRef CallPrefix; 547 548 public: 549 DagEmitter(Intrinsic &Intr, StringRef CallPrefix) : 550 Intr(Intr), CallPrefix(CallPrefix) { 551 } 552 std::pair<Type, std::string> emitDagArg(const Init *Arg, 553 std::string ArgName); 554 std::pair<Type, std::string> emitDagSaveTemp(const DagInit *DI); 555 std::pair<Type, std::string> emitDagSplat(const DagInit *DI); 556 std::pair<Type, std::string> emitDagDup(const DagInit *DI); 557 std::pair<Type, std::string> emitDagDupTyped(const DagInit *DI); 558 std::pair<Type, std::string> emitDagShuffle(const DagInit *DI); 559 std::pair<Type, std::string> emitDagCast(const DagInit *DI, bool IsBitCast); 560 std::pair<Type, std::string> emitDagCall(const DagInit *DI, 561 bool MatchMangledName); 562 std::pair<Type, std::string> emitDagNameReplace(const DagInit *DI); 563 std::pair<Type, std::string> emitDagLiteral(const DagInit *DI); 564 std::pair<Type, std::string> emitDagOp(const DagInit *DI); 565 std::pair<Type, std::string> emitDag(const DagInit *DI); 566 }; 567 }; 568 569 //===----------------------------------------------------------------------===// 570 // NeonEmitter 571 //===----------------------------------------------------------------------===// 572 573 class NeonEmitter { 574 const RecordKeeper &Records; 575 DenseMap<const Record *, ClassKind> ClassMap; 576 std::map<std::string, std::deque<Intrinsic>, std::less<>> IntrinsicMap; 577 unsigned UniqueNumber; 578 579 void createIntrinsic(const Record *R, SmallVectorImpl<Intrinsic *> &Out); 580 void genBuiltinsDef(raw_ostream &OS, SmallVectorImpl<Intrinsic *> &Defs); 581 void genStreamingSVECompatibleList(raw_ostream &OS, 582 SmallVectorImpl<Intrinsic *> &Defs); 583 void genOverloadTypeCheckCode(raw_ostream &OS, 584 SmallVectorImpl<Intrinsic *> &Defs); 585 bool areRangeChecksCompatible(const ArrayRef<ImmCheck> ChecksA, 586 const ArrayRef<ImmCheck> ChecksB); 587 void genIntrinsicRangeCheckCode(raw_ostream &OS, 588 SmallVectorImpl<Intrinsic *> &Defs); 589 590 public: 591 /// Called by Intrinsic - this attempts to get an intrinsic that takes 592 /// the given types as arguments. 593 Intrinsic &getIntrinsic(StringRef Name, ArrayRef<Type> Types, 594 std::optional<std::string> MangledName); 595 596 /// Called by Intrinsic - returns a globally-unique number. 597 unsigned getUniqueNumber() { return UniqueNumber++; } 598 599 NeonEmitter(const RecordKeeper &R) : Records(R), UniqueNumber(0) { 600 const Record *SI = R.getClass("SInst"); 601 const Record *II = R.getClass("IInst"); 602 const Record *WI = R.getClass("WInst"); 603 const Record *VI = R.getClass("VInst"); 604 const Record *SOpI = R.getClass("SOpInst"); 605 const Record *IOpI = R.getClass("IOpInst"); 606 const Record *WOpI = R.getClass("WOpInst"); 607 const Record *LOpI = R.getClass("LOpInst"); 608 const Record *NoTestOpI = R.getClass("NoTestOpInst"); 609 610 ClassMap[SI] = ClassS; 611 ClassMap[II] = ClassI; 612 ClassMap[WI] = ClassW; 613 ClassMap[VI] = ClassV; 614 ClassMap[SOpI] = ClassS; 615 ClassMap[IOpI] = ClassI; 616 ClassMap[WOpI] = ClassW; 617 ClassMap[LOpI] = ClassL; 618 ClassMap[NoTestOpI] = ClassNoTest; 619 } 620 621 // Emit arm_neon.h.inc 622 void run(raw_ostream &o); 623 624 // Emit arm_fp16.h.inc 625 void runFP16(raw_ostream &o); 626 627 // Emit arm_bf16.h.inc 628 void runBF16(raw_ostream &o); 629 630 void runVectorTypes(raw_ostream &o); 631 632 // Emit all the __builtin prototypes used in arm_neon.h, arm_fp16.h and 633 // arm_bf16.h 634 void runHeader(raw_ostream &o); 635 }; 636 637 } // end anonymous namespace 638 639 //===----------------------------------------------------------------------===// 640 // Type implementation 641 //===----------------------------------------------------------------------===// 642 643 std::string Type::str() const { 644 if (isVoid()) 645 return "void"; 646 if (isFPM()) 647 return "fpm_t"; 648 649 std::string S; 650 651 if (isInteger() && !isSigned()) 652 S += "u"; 653 654 if (isPoly()) 655 S += "poly"; 656 else if (isFloating()) 657 S += "float"; 658 else if (isBFloat16()) 659 S += "bfloat"; 660 else if (isMFloat8()) 661 S += "mfloat"; 662 else 663 S += "int"; 664 665 S += utostr(ElementBitwidth); 666 if (isVector()) 667 S += "x" + utostr(getNumElements()); 668 if (NumVectors > 1) 669 S += "x" + utostr(NumVectors); 670 S += "_t"; 671 672 if (Constant) 673 S += " const"; 674 if (Pointer) 675 S += " *"; 676 677 return S; 678 } 679 680 std::string Type::builtin_str() const { 681 std::string S; 682 if (isVoid()) 683 return "v"; 684 685 if (isPointer()) { 686 // All pointers are void pointers. 687 S = "v"; 688 if (isConstPointer()) 689 S += "C"; 690 S += "*"; 691 return S; 692 } else if (isInteger()) 693 switch (ElementBitwidth) { 694 case 8: S += "c"; break; 695 case 16: S += "s"; break; 696 case 32: S += "i"; break; 697 case 64: S += "Wi"; break; 698 case 128: S += "LLLi"; break; 699 default: llvm_unreachable("Unhandled case!"); 700 } 701 else if (isBFloat16()) { 702 assert(ElementBitwidth == 16 && "BFloat16 can only be 16 bits"); 703 S += "y"; 704 } else if (isMFloat8()) { 705 assert(ElementBitwidth == 8 && "MFloat8 can only be 8 bits"); 706 S += "m"; 707 } else if (isFPM()) { 708 S += "UWi"; 709 } else 710 switch (ElementBitwidth) { 711 case 16: S += "h"; break; 712 case 32: S += "f"; break; 713 case 64: S += "d"; break; 714 default: llvm_unreachable("Unhandled case!"); 715 } 716 717 // FIXME: NECESSARY??????????????????????????????????????????????????????????????????????? 718 if (isChar() && !isPointer() && isSigned()) 719 // Make chars explicitly signed. 720 S = "S" + S; 721 else if (isInteger() && !isSigned()) 722 S = "U" + S; 723 724 // Constant indices are "int", but have the "constant expression" modifier. 725 if (isImmediate()) { 726 assert(isInteger() && isSigned()); 727 S = "I" + S; 728 } 729 730 if (isScalar()) 731 return S; 732 733 std::string Ret; 734 for (unsigned I = 0; I < NumVectors; ++I) 735 Ret += "V" + utostr(getNumElements()) + S; 736 737 return Ret; 738 } 739 740 unsigned Type::getNeonEnum() const { 741 unsigned Addend; 742 switch (ElementBitwidth) { 743 case 8: Addend = 0; break; 744 case 16: Addend = 1; break; 745 case 32: Addend = 2; break; 746 case 64: Addend = 3; break; 747 case 128: Addend = 4; break; 748 default: llvm_unreachable("Unhandled element bitwidth!"); 749 } 750 751 unsigned Base = (unsigned)NeonTypeFlags::Int8 + Addend; 752 if (isPoly()) { 753 // Adjustment needed because Poly32 doesn't exist. 754 if (Addend >= 2) 755 --Addend; 756 Base = (unsigned)NeonTypeFlags::Poly8 + Addend; 757 } 758 if (isFloating()) { 759 assert(Addend != 0 && "Float8 doesn't exist!"); 760 Base = (unsigned)NeonTypeFlags::Float16 + (Addend - 1); 761 } 762 763 if (isBFloat16()) { 764 assert(Addend == 1 && "BFloat16 is only 16 bit"); 765 Base = (unsigned)NeonTypeFlags::BFloat16; 766 } 767 768 if (isMFloat8()) { 769 Base = (unsigned)NeonTypeFlags::MFloat8; 770 } 771 772 if (Bitwidth == 128) 773 Base |= (unsigned)NeonTypeFlags::QuadFlag; 774 if (isInteger() && !isSigned()) 775 Base |= (unsigned)NeonTypeFlags::UnsignedFlag; 776 777 return Base; 778 } 779 780 Type Type::fromTypedefName(StringRef Name) { 781 Type T; 782 T.Kind = SInt; 783 784 if (Name.consume_front("u")) 785 T.Kind = UInt; 786 787 if (Name.consume_front("float")) { 788 T.Kind = Float; 789 } else if (Name.consume_front("poly")) { 790 T.Kind = Poly; 791 } else if (Name.consume_front("bfloat")) { 792 T.Kind = BFloat16; 793 } else if (Name.consume_front("mfloat")) { 794 T.Kind = MFloat8; 795 } else { 796 assert(Name.starts_with("int")); 797 Name = Name.drop_front(3); 798 } 799 800 unsigned I = 0; 801 for (I = 0; I < Name.size(); ++I) { 802 if (!isdigit(Name[I])) 803 break; 804 } 805 Name.substr(0, I).getAsInteger(10, T.ElementBitwidth); 806 Name = Name.drop_front(I); 807 808 T.Bitwidth = T.ElementBitwidth; 809 T.NumVectors = 1; 810 811 if (Name.consume_front("x")) { 812 unsigned I = 0; 813 for (I = 0; I < Name.size(); ++I) { 814 if (!isdigit(Name[I])) 815 break; 816 } 817 unsigned NumLanes; 818 Name.substr(0, I).getAsInteger(10, NumLanes); 819 Name = Name.drop_front(I); 820 T.Bitwidth = T.ElementBitwidth * NumLanes; 821 } else { 822 // Was scalar. 823 T.NumVectors = 0; 824 } 825 if (Name.consume_front("x")) { 826 unsigned I = 0; 827 for (I = 0; I < Name.size(); ++I) { 828 if (!isdigit(Name[I])) 829 break; 830 } 831 Name.substr(0, I).getAsInteger(10, T.NumVectors); 832 Name = Name.drop_front(I); 833 } 834 835 assert(Name.starts_with("_t") && "Malformed typedef!"); 836 return T; 837 } 838 839 void Type::applyTypespec(bool &Quad) { 840 std::string S = TS; 841 ScalarForMangling = false; 842 Kind = SInt; 843 ElementBitwidth = ~0U; 844 NumVectors = 1; 845 846 for (char I : S) { 847 switch (I) { 848 case 'S': 849 ScalarForMangling = true; 850 break; 851 case 'H': 852 NoManglingQ = true; 853 Quad = true; 854 break; 855 case 'Q': 856 Quad = true; 857 break; 858 case 'P': 859 Kind = Poly; 860 break; 861 case 'U': 862 Kind = UInt; 863 break; 864 case 'c': 865 ElementBitwidth = 8; 866 break; 867 case 'h': 868 Kind = Float; 869 [[fallthrough]]; 870 case 's': 871 ElementBitwidth = 16; 872 break; 873 case 'f': 874 Kind = Float; 875 [[fallthrough]]; 876 case 'i': 877 ElementBitwidth = 32; 878 break; 879 case 'd': 880 Kind = Float; 881 [[fallthrough]]; 882 case 'l': 883 ElementBitwidth = 64; 884 break; 885 case 'k': 886 ElementBitwidth = 128; 887 // Poly doesn't have a 128x1 type. 888 if (isPoly()) 889 NumVectors = 0; 890 break; 891 case 'b': 892 Kind = BFloat16; 893 ElementBitwidth = 16; 894 break; 895 case 'm': 896 Kind = MFloat8; 897 ElementBitwidth = 8; 898 break; 899 default: 900 llvm_unreachable("Unhandled type code!"); 901 } 902 } 903 assert(ElementBitwidth != ~0U && "Bad element bitwidth!"); 904 905 Bitwidth = Quad ? 128 : 64; 906 } 907 908 void Type::applyModifiers(StringRef Mods) { 909 bool AppliedQuad = false; 910 applyTypespec(AppliedQuad); 911 912 for (char Mod : Mods) { 913 switch (Mod) { 914 case '.': 915 break; 916 case 'v': 917 Kind = Void; 918 break; 919 case 'S': 920 Kind = SInt; 921 break; 922 case 'U': 923 Kind = UInt; 924 break; 925 case 'B': 926 Kind = BFloat16; 927 ElementBitwidth = 16; 928 break; 929 case 'F': 930 Kind = Float; 931 break; 932 case 'P': 933 Kind = Poly; 934 break; 935 case 'V': 936 Kind = FPM; 937 Bitwidth = ElementBitwidth = 64; 938 NumVectors = 0; 939 Immediate = Constant = Pointer = false; 940 ScalarForMangling = NoManglingQ = true; 941 break; 942 case '>': 943 assert(ElementBitwidth < 128); 944 ElementBitwidth *= 2; 945 break; 946 case '<': 947 assert(ElementBitwidth > 8); 948 ElementBitwidth /= 2; 949 break; 950 case '1': 951 NumVectors = 0; 952 break; 953 case '2': 954 NumVectors = 2; 955 break; 956 case '3': 957 NumVectors = 3; 958 break; 959 case '4': 960 NumVectors = 4; 961 break; 962 case '*': 963 Pointer = true; 964 break; 965 case 'c': 966 Constant = true; 967 break; 968 case 'Q': 969 Bitwidth = 128; 970 break; 971 case 'q': 972 Bitwidth = 64; 973 break; 974 case 'I': 975 Kind = SInt; 976 ElementBitwidth = Bitwidth = 32; 977 NumVectors = 0; 978 Immediate = true; 979 break; 980 case 'p': 981 if (isPoly()) 982 Kind = UInt; 983 break; 984 case '!': 985 // Key type, handled elsewhere. 986 break; 987 default: 988 llvm_unreachable("Unhandled character!"); 989 } 990 } 991 } 992 993 //===----------------------------------------------------------------------===// 994 // Intrinsic implementation 995 //===----------------------------------------------------------------------===// 996 997 StringRef Intrinsic::getNextModifiers(StringRef Proto, unsigned &Pos) const { 998 if (Proto.size() == Pos) 999 return StringRef(); 1000 else if (Proto[Pos] != '(') 1001 return Proto.substr(Pos++, 1); 1002 1003 size_t Start = Pos + 1; 1004 size_t End = Proto.find(')', Start); 1005 assert_with_loc(End != StringRef::npos, "unmatched modifier group paren"); 1006 Pos = End + 1; 1007 return Proto.slice(Start, End); 1008 } 1009 1010 std::string Intrinsic::getInstTypeCode(Type T, ClassKind CK) const { 1011 char typeCode = '\0'; 1012 bool printNumber = true; 1013 1014 if (CK == ClassB && TargetGuard == "neon") 1015 return ""; 1016 1017 if (this->CK == ClassV) 1018 return ""; 1019 1020 if (T.isBFloat16()) 1021 return "bf16"; 1022 1023 if (T.isMFloat8()) 1024 return "mf8"; 1025 1026 if (T.isPoly()) 1027 typeCode = 'p'; 1028 else if (T.isInteger()) 1029 typeCode = T.isSigned() ? 's' : 'u'; 1030 else 1031 typeCode = 'f'; 1032 1033 if (CK == ClassI) { 1034 switch (typeCode) { 1035 default: 1036 break; 1037 case 's': 1038 case 'u': 1039 case 'p': 1040 typeCode = 'i'; 1041 break; 1042 } 1043 } 1044 if (CK == ClassB && TargetGuard == "neon") { 1045 typeCode = '\0'; 1046 } 1047 1048 std::string S; 1049 if (typeCode != '\0') 1050 S.push_back(typeCode); 1051 if (printNumber) 1052 S += utostr(T.getElementSizeInBits()); 1053 1054 return S; 1055 } 1056 1057 std::string Intrinsic::getBuiltinTypeStr() { 1058 ClassKind LocalCK = getClassKind(true); 1059 std::string S; 1060 1061 Type RetT = getReturnType(); 1062 if ((LocalCK == ClassI || LocalCK == ClassW) && RetT.isScalar() && 1063 !RetT.isFloating() && !RetT.isBFloat16() && !RetT.isMFloat8()) 1064 RetT.makeInteger(RetT.getElementSizeInBits(), false); 1065 1066 // Since the return value must be one type, return a vector type of the 1067 // appropriate width which we will bitcast. An exception is made for 1068 // returning structs of 2, 3, or 4 vectors which are returned in a sret-like 1069 // fashion, storing them to a pointer arg. 1070 if (RetT.getNumVectors() > 1) { 1071 S += "vv*"; // void result with void* first argument 1072 } else { 1073 if (RetT.isPoly()) 1074 RetT.makeInteger(RetT.getElementSizeInBits(), false); 1075 if (!RetT.isScalar() && RetT.isInteger() && !RetT.isSigned()) 1076 RetT.makeSigned(); 1077 1078 if (LocalCK == ClassB && RetT.isValue() && !RetT.isScalar()) 1079 // Cast to vector of 8-bit elements. 1080 RetT.makeInteger(8, true); 1081 1082 S += RetT.builtin_str(); 1083 } 1084 1085 for (unsigned I = 0; I < getNumParams(); ++I) { 1086 Type T = getParamType(I); 1087 if (T.isPoly()) 1088 T.makeInteger(T.getElementSizeInBits(), false); 1089 1090 if (LocalCK == ClassB && !T.isScalar()) 1091 T.makeInteger(8, true); 1092 // Halves always get converted to 8-bit elements. 1093 if (T.isHalf() && T.isVector() && !T.isScalarForMangling()) 1094 T.makeInteger(8, true); 1095 1096 if (LocalCK == ClassI && T.isInteger()) 1097 T.makeSigned(); 1098 1099 if (isArgImmediate(I)) 1100 T.makeImmediate(32); 1101 1102 S += T.builtin_str(); 1103 } 1104 1105 // Extra constant integer to hold type class enum for this function, e.g. s8 1106 if (LocalCK == ClassB) 1107 S += "i"; 1108 1109 return S; 1110 } 1111 1112 std::string Intrinsic::getMangledName(bool ForceClassS) const { 1113 // Check if the prototype has a scalar operand with the type of the vector 1114 // elements. If not, bitcasting the args will take care of arg checking. 1115 // The actual signedness etc. will be taken care of with special enums. 1116 ClassKind LocalCK = CK; 1117 if (!protoHasScalar()) 1118 LocalCK = ClassB; 1119 1120 return mangleName(Name, ForceClassS ? ClassS : LocalCK); 1121 } 1122 1123 std::string Intrinsic::mangleName(std::string Name, ClassKind LocalCK) const { 1124 std::string typeCode = getInstTypeCode(BaseType, LocalCK); 1125 std::string S = Name; 1126 1127 if (Name == "vcvt_f16_f32" || Name == "vcvt_f32_f16" || 1128 Name == "vcvt_f32_f64" || Name == "vcvt_f64_f32" || 1129 Name == "vcvt_f32_bf16") 1130 return Name; 1131 1132 if (!typeCode.empty()) { 1133 // If the name ends with _xN (N = 2,3,4), insert the typeCode before _xN. 1134 if (Name.size() >= 3 && isdigit(Name.back()) && 1135 Name[Name.length() - 2] == 'x' && Name[Name.length() - 3] == '_') 1136 S.insert(S.length() - 3, "_" + typeCode); 1137 else 1138 S += "_" + typeCode; 1139 } 1140 1141 if (BaseType != InBaseType) { 1142 // A reinterpret - out the input base type at the end. 1143 S += "_" + getInstTypeCode(InBaseType, LocalCK); 1144 } 1145 1146 if (LocalCK == ClassB && TargetGuard == "neon") 1147 S += "_v"; 1148 1149 // Insert a 'q' before the first '_' character so that it ends up before 1150 // _lane or _n on vector-scalar operations. 1151 if (BaseType.getSizeInBits() == 128 && !BaseType.noManglingQ()) { 1152 size_t Pos = S.find('_'); 1153 S.insert(Pos, "q"); 1154 } 1155 1156 char Suffix = '\0'; 1157 if (BaseType.isScalarForMangling()) { 1158 switch (BaseType.getElementSizeInBits()) { 1159 case 8: Suffix = 'b'; break; 1160 case 16: Suffix = 'h'; break; 1161 case 32: Suffix = 's'; break; 1162 case 64: Suffix = 'd'; break; 1163 default: llvm_unreachable("Bad suffix!"); 1164 } 1165 } 1166 if (Suffix != '\0') { 1167 size_t Pos = S.find('_'); 1168 S.insert(Pos, &Suffix, 1); 1169 } 1170 1171 return S; 1172 } 1173 1174 std::string Intrinsic::replaceParamsIn(std::string S) { 1175 while (S.find('$') != std::string::npos) { 1176 size_t Pos = S.find('$'); 1177 size_t End = Pos + 1; 1178 while (isalpha(S[End])) 1179 ++End; 1180 1181 std::string VarName = S.substr(Pos + 1, End - Pos - 1); 1182 assert_with_loc(Variables.find(VarName) != Variables.end(), 1183 "Variable not defined!"); 1184 S.replace(Pos, End - Pos, Variables.find(VarName)->second.getName()); 1185 } 1186 1187 return S; 1188 } 1189 1190 void Intrinsic::initVariables() { 1191 Variables.clear(); 1192 1193 // Modify the TypeSpec per-argument to get a concrete Type, and create 1194 // known variables for each. 1195 for (unsigned I = 1; I < Types.size(); ++I) { 1196 char NameC = '0' + (I - 1); 1197 std::string Name = "p"; 1198 Name.push_back(NameC); 1199 1200 Variables[Name] = Variable(Types[I], Name + VariablePostfix); 1201 } 1202 RetVar = Variable(Types[0], "ret" + VariablePostfix); 1203 } 1204 1205 void Intrinsic::emitPrototype(StringRef NamePrefix) { 1206 if (UseMacro) { 1207 OS << "#define "; 1208 } else { 1209 OS << "__ai "; 1210 if (TargetGuard != "") 1211 OS << "__attribute__((target(\"" << TargetGuard << "\"))) "; 1212 OS << Types[0].str() << " "; 1213 } 1214 1215 OS << NamePrefix.str() << mangleName(Name, ClassS) << "("; 1216 1217 for (unsigned I = 0; I < getNumParams(); ++I) { 1218 if (I != 0) 1219 OS << ", "; 1220 1221 char NameC = '0' + I; 1222 std::string Name = "p"; 1223 Name.push_back(NameC); 1224 assert(Variables.find(Name) != Variables.end()); 1225 Variable &V = Variables[Name]; 1226 1227 if (!UseMacro) 1228 OS << V.getType().str() << " "; 1229 OS << V.getName(); 1230 } 1231 1232 OS << ")"; 1233 } 1234 1235 void Intrinsic::emitOpeningBrace() { 1236 if (UseMacro) 1237 OS << " __extension__ ({"; 1238 else 1239 OS << " {"; 1240 emitNewLine(); 1241 } 1242 1243 void Intrinsic::emitClosingBrace() { 1244 if (UseMacro) 1245 OS << "})"; 1246 else 1247 OS << "}"; 1248 } 1249 1250 void Intrinsic::emitNewLine() { 1251 if (UseMacro) 1252 OS << " \\\n"; 1253 else 1254 OS << "\n"; 1255 } 1256 1257 void Intrinsic::emitReverseVariable(Variable &Dest, Variable &Src) { 1258 if (Dest.getType().getNumVectors() > 1) { 1259 emitNewLine(); 1260 1261 for (unsigned K = 0; K < Dest.getType().getNumVectors(); ++K) { 1262 OS << " " << Dest.getName() << ".val[" << K << "] = " 1263 << "__builtin_shufflevector(" << Src.getName() << ".val[" << K << "], " 1264 << Src.getName() << ".val[" << K << "], __lane_reverse_" 1265 << Dest.getType().getSizeInBits() << "_" 1266 << Dest.getType().getElementSizeInBits() << ");"; 1267 emitNewLine(); 1268 } 1269 } else { 1270 OS << " " << Dest.getName() << " = __builtin_shufflevector(" 1271 << Src.getName() << ", " << Src.getName() << ", __lane_reverse_" 1272 << Dest.getType().getSizeInBits() << "_" 1273 << Dest.getType().getElementSizeInBits() << ");"; 1274 emitNewLine(); 1275 } 1276 } 1277 1278 void Intrinsic::emitArgumentReversal() { 1279 if (isBigEndianSafe()) 1280 return; 1281 1282 // Reverse all vector arguments. 1283 for (unsigned I = 0; I < getNumParams(); ++I) { 1284 std::string Name = "p" + utostr(I); 1285 std::string NewName = "rev" + utostr(I); 1286 1287 Variable &V = Variables[Name]; 1288 Variable NewV(V.getType(), NewName + VariablePostfix); 1289 1290 if (!NewV.getType().isVector() || NewV.getType().getNumElements() == 1) 1291 continue; 1292 1293 OS << " " << NewV.getType().str() << " " << NewV.getName() << ";"; 1294 emitReverseVariable(NewV, V); 1295 V = NewV; 1296 } 1297 } 1298 1299 void Intrinsic::emitReturnVarDecl() { 1300 assert(RetVar.getType() == Types[0]); 1301 // Create a return variable, if we're not void. 1302 if (!RetVar.getType().isVoid()) { 1303 OS << " " << RetVar.getType().str() << " " << RetVar.getName() << ";"; 1304 emitNewLine(); 1305 } 1306 } 1307 1308 void Intrinsic::emitReturnReversal() { 1309 if (isBigEndianSafe()) 1310 return; 1311 if (!getReturnType().isVector() || getReturnType().isVoid() || 1312 getReturnType().getNumElements() == 1) 1313 return; 1314 emitReverseVariable(RetVar, RetVar); 1315 } 1316 1317 void Intrinsic::emitShadowedArgs() { 1318 // Macro arguments are not type-checked like inline function arguments, 1319 // so assign them to local temporaries to get the right type checking. 1320 if (!UseMacro) 1321 return; 1322 1323 for (unsigned I = 0; I < getNumParams(); ++I) { 1324 // Do not create a temporary for an immediate argument. 1325 // That would defeat the whole point of using a macro! 1326 if (getParamType(I).isImmediate()) 1327 continue; 1328 // Do not create a temporary for pointer arguments. The input 1329 // pointer may have an alignment hint. 1330 if (getParamType(I).isPointer()) 1331 continue; 1332 1333 std::string Name = "p" + utostr(I); 1334 1335 assert(Variables.find(Name) != Variables.end()); 1336 Variable &V = Variables[Name]; 1337 1338 std::string NewName = "s" + utostr(I); 1339 Variable V2(V.getType(), NewName + VariablePostfix); 1340 1341 OS << " " << V2.getType().str() << " " << V2.getName() << " = " 1342 << V.getName() << ";"; 1343 emitNewLine(); 1344 1345 V = V2; 1346 } 1347 } 1348 1349 bool Intrinsic::protoHasScalar() const { 1350 return any_of(Types, 1351 [](const Type &T) { return T.isScalar() && !T.isImmediate(); }); 1352 } 1353 1354 void Intrinsic::emitBodyAsBuiltinCall() { 1355 std::string S; 1356 1357 // If this builtin returns a struct 2, 3, or 4 vectors, pass it as an implicit 1358 // sret-like argument. 1359 bool SRet = getReturnType().getNumVectors() >= 2; 1360 1361 StringRef N = Name; 1362 ClassKind LocalCK = CK; 1363 if (!protoHasScalar()) 1364 LocalCK = ClassB; 1365 1366 if (!getReturnType().isVoid() && !SRet) 1367 S += "__builtin_bit_cast(" + RetVar.getType().str() + ", "; 1368 1369 S += "__builtin_neon_" + mangleName(std::string(N), LocalCK) + "("; 1370 1371 if (SRet) 1372 S += "&" + RetVar.getName() + ", "; 1373 1374 for (unsigned I = 0; I < getNumParams(); ++I) { 1375 Variable &V = Variables["p" + utostr(I)]; 1376 Type T = V.getType(); 1377 1378 // Handle multiple-vector values specially, emitting each subvector as an 1379 // argument to the builtin. 1380 if (T.getNumVectors() > 1) { 1381 // Check if an explicit cast is needed. 1382 std::string Cast; 1383 if (LocalCK == ClassB) { 1384 Type T2 = T; 1385 T2.makeOneVector(); 1386 T2.makeInteger(8, /*Sign=*/true); 1387 Cast = "__builtin_bit_cast(" + T2.str() + ", "; 1388 } 1389 1390 for (unsigned J = 0; J < T.getNumVectors(); ++J) 1391 S += Cast + V.getName() + ".val[" + utostr(J) + "]" + 1392 (Cast.empty() ? ", " : "), "); 1393 continue; 1394 } 1395 1396 std::string Arg = V.getName(); 1397 Type CastToType = T; 1398 1399 // Check if an explicit cast is needed. 1400 if (CastToType.isVector()) { 1401 if (LocalCK == ClassB || (T.isHalf() && !T.isScalarForMangling())) { 1402 CastToType.makeInteger(8, true); 1403 Arg = "__builtin_bit_cast(" + CastToType.str() + ", " + Arg + ")"; 1404 } else if (LocalCK == ClassI) { 1405 if (CastToType.isInteger()) { 1406 CastToType.makeSigned(); 1407 Arg = "__builtin_bit_cast(" + CastToType.str() + ", " + Arg + ")"; 1408 } 1409 } 1410 } 1411 1412 S += Arg + ", "; 1413 } 1414 1415 // Extra constant integer to hold type class enum for this function, e.g. s8 1416 if (getClassKind(true) == ClassB) { 1417 S += utostr(getPolymorphicKeyType().getNeonEnum()); 1418 } else { 1419 // Remove extraneous ", ". 1420 S.pop_back(); 1421 S.pop_back(); 1422 } 1423 1424 if (!getReturnType().isVoid() && !SRet) 1425 S += ")"; 1426 S += ");"; 1427 1428 std::string RetExpr; 1429 if (!SRet && !RetVar.getType().isVoid()) 1430 RetExpr = RetVar.getName() + " = "; 1431 1432 OS << " " << RetExpr << S; 1433 emitNewLine(); 1434 } 1435 1436 void Intrinsic::emitBody(StringRef CallPrefix) { 1437 std::vector<std::string> Lines; 1438 1439 if (!Body || Body->empty()) { 1440 // Nothing specific to output - must output a builtin. 1441 emitBodyAsBuiltinCall(); 1442 return; 1443 } 1444 1445 // We have a list of "things to output". The last should be returned. 1446 for (auto *I : Body->getElements()) { 1447 if (const auto *SI = dyn_cast<StringInit>(I)) { 1448 Lines.push_back(replaceParamsIn(SI->getAsString())); 1449 } else if (const auto *DI = dyn_cast<DagInit>(I)) { 1450 DagEmitter DE(*this, CallPrefix); 1451 Lines.push_back(DE.emitDag(DI).second + ";"); 1452 } 1453 } 1454 1455 assert(!Lines.empty() && "Empty def?"); 1456 if (!RetVar.getType().isVoid()) 1457 Lines.back().insert(0, RetVar.getName() + " = "); 1458 1459 for (auto &L : Lines) { 1460 OS << " " << L; 1461 emitNewLine(); 1462 } 1463 } 1464 1465 void Intrinsic::emitReturn() { 1466 if (RetVar.getType().isVoid()) 1467 return; 1468 if (UseMacro) 1469 OS << " " << RetVar.getName() << ";"; 1470 else 1471 OS << " return " << RetVar.getName() << ";"; 1472 emitNewLine(); 1473 } 1474 1475 std::pair<Type, std::string> Intrinsic::DagEmitter::emitDag(const DagInit *DI) { 1476 // At this point we should only be seeing a def. 1477 const DefInit *DefI = cast<DefInit>(DI->getOperator()); 1478 std::string Op = DefI->getAsString(); 1479 1480 if (Op == "cast" || Op == "bitcast") 1481 return emitDagCast(DI, Op == "bitcast"); 1482 if (Op == "shuffle") 1483 return emitDagShuffle(DI); 1484 if (Op == "dup") 1485 return emitDagDup(DI); 1486 if (Op == "dup_typed") 1487 return emitDagDupTyped(DI); 1488 if (Op == "splat") 1489 return emitDagSplat(DI); 1490 if (Op == "save_temp") 1491 return emitDagSaveTemp(DI); 1492 if (Op == "op") 1493 return emitDagOp(DI); 1494 if (Op == "call" || Op == "call_mangled") 1495 return emitDagCall(DI, Op == "call_mangled"); 1496 if (Op == "name_replace") 1497 return emitDagNameReplace(DI); 1498 if (Op == "literal") 1499 return emitDagLiteral(DI); 1500 assert_with_loc(false, "Unknown operation!"); 1501 return std::make_pair(Type::getVoid(), ""); 1502 } 1503 1504 std::pair<Type, std::string> 1505 Intrinsic::DagEmitter::emitDagOp(const DagInit *DI) { 1506 std::string Op = cast<StringInit>(DI->getArg(0))->getAsUnquotedString(); 1507 if (DI->getNumArgs() == 2) { 1508 // Unary op. 1509 std::pair<Type, std::string> R = 1510 emitDagArg(DI->getArg(1), std::string(DI->getArgNameStr(1))); 1511 return std::make_pair(R.first, Op + R.second); 1512 } else { 1513 assert(DI->getNumArgs() == 3 && "Can only handle unary and binary ops!"); 1514 std::pair<Type, std::string> R1 = 1515 emitDagArg(DI->getArg(1), std::string(DI->getArgNameStr(1))); 1516 std::pair<Type, std::string> R2 = 1517 emitDagArg(DI->getArg(2), std::string(DI->getArgNameStr(2))); 1518 assert_with_loc(R1.first == R2.first, "Argument type mismatch!"); 1519 return std::make_pair(R1.first, R1.second + " " + Op + " " + R2.second); 1520 } 1521 } 1522 1523 std::pair<Type, std::string> 1524 Intrinsic::DagEmitter::emitDagCall(const DagInit *DI, bool MatchMangledName) { 1525 std::vector<Type> Types; 1526 std::vector<std::string> Values; 1527 for (unsigned I = 0; I < DI->getNumArgs() - 1; ++I) { 1528 std::pair<Type, std::string> R = 1529 emitDagArg(DI->getArg(I + 1), std::string(DI->getArgNameStr(I + 1))); 1530 Types.push_back(R.first); 1531 Values.push_back(R.second); 1532 } 1533 1534 // Look up the called intrinsic. 1535 std::string N; 1536 if (const auto *SI = dyn_cast<StringInit>(DI->getArg(0))) 1537 N = SI->getAsUnquotedString(); 1538 else 1539 N = emitDagArg(DI->getArg(0), "").second; 1540 std::optional<std::string> MangledName; 1541 if (MatchMangledName) { 1542 if (Intr.getRecord()->getValueAsString("Name").contains("laneq")) 1543 N += "q"; 1544 MangledName = Intr.mangleName(N, ClassS); 1545 } 1546 Intrinsic &Callee = Intr.Emitter.getIntrinsic(N, Types, MangledName); 1547 1548 // Make sure the callee is known as an early def. 1549 Callee.setNeededEarly(); 1550 Intr.Dependencies.insert(&Callee); 1551 1552 // Now create the call itself. 1553 std::string S; 1554 if (!Callee.isBigEndianSafe()) 1555 S += CallPrefix.str(); 1556 S += Callee.getMangledName(true) + "("; 1557 for (unsigned I = 0; I < DI->getNumArgs() - 1; ++I) { 1558 if (I != 0) 1559 S += ", "; 1560 S += Values[I]; 1561 } 1562 S += ")"; 1563 1564 return std::make_pair(Callee.getReturnType(), S); 1565 } 1566 1567 std::pair<Type, std::string> 1568 Intrinsic::DagEmitter::emitDagCast(const DagInit *DI, bool IsBitCast) { 1569 // (cast MOD* VAL) -> cast VAL to type given by MOD. 1570 std::pair<Type, std::string> R = 1571 emitDagArg(DI->getArg(DI->getNumArgs() - 1), 1572 std::string(DI->getArgNameStr(DI->getNumArgs() - 1))); 1573 Type castToType = R.first; 1574 for (unsigned ArgIdx = 0; ArgIdx < DI->getNumArgs() - 1; ++ArgIdx) { 1575 1576 // MOD can take several forms: 1577 // 1. $X - take the type of parameter / variable X. 1578 // 2. The value "R" - take the type of the return type. 1579 // 3. a type string 1580 // 4. The value "U" or "S" to switch the signedness. 1581 // 5. The value "H" or "D" to half or double the bitwidth. 1582 // 6. The value "8" to convert to 8-bit (signed) integer lanes. 1583 if (!DI->getArgNameStr(ArgIdx).empty()) { 1584 assert_with_loc(Intr.Variables.find(DI->getArgNameStr(ArgIdx)) != 1585 Intr.Variables.end(), 1586 "Variable not found"); 1587 castToType = 1588 Intr.Variables[std::string(DI->getArgNameStr(ArgIdx))].getType(); 1589 } else { 1590 const auto *SI = dyn_cast<StringInit>(DI->getArg(ArgIdx)); 1591 assert_with_loc(SI, "Expected string type or $Name for cast type"); 1592 1593 if (SI->getAsUnquotedString() == "R") { 1594 castToType = Intr.getReturnType(); 1595 } else if (SI->getAsUnquotedString() == "U") { 1596 castToType.makeUnsigned(); 1597 } else if (SI->getAsUnquotedString() == "S") { 1598 castToType.makeSigned(); 1599 } else if (SI->getAsUnquotedString() == "H") { 1600 castToType.halveLanes(); 1601 } else if (SI->getAsUnquotedString() == "D") { 1602 castToType.doubleLanes(); 1603 } else if (SI->getAsUnquotedString() == "8") { 1604 castToType.makeInteger(8, true); 1605 } else if (SI->getAsUnquotedString() == "32") { 1606 castToType.make32BitElement(); 1607 } else { 1608 castToType = Type::fromTypedefName(SI->getAsUnquotedString()); 1609 assert_with_loc(!castToType.isVoid(), "Unknown typedef"); 1610 } 1611 } 1612 } 1613 1614 std::string S; 1615 if (IsBitCast) 1616 S = "__builtin_bit_cast(" + castToType.str() + ", " + R.second + ")"; 1617 else 1618 S = "(" + castToType.str() + ")(" + R.second + ")"; 1619 1620 return std::make_pair(castToType, S); 1621 } 1622 1623 std::pair<Type, std::string> 1624 Intrinsic::DagEmitter::emitDagShuffle(const DagInit *DI) { 1625 // See the documentation in arm_neon.td for a description of these operators. 1626 class LowHalf : public SetTheory::Operator { 1627 public: 1628 void apply(SetTheory &ST, const DagInit *Expr, SetTheory::RecSet &Elts, 1629 ArrayRef<SMLoc> Loc) override { 1630 SetTheory::RecSet Elts2; 1631 ST.evaluate(Expr->arg_begin(), Expr->arg_end(), Elts2, Loc); 1632 Elts.insert(Elts2.begin(), Elts2.begin() + (Elts2.size() / 2)); 1633 } 1634 }; 1635 1636 class HighHalf : public SetTheory::Operator { 1637 public: 1638 void apply(SetTheory &ST, const DagInit *Expr, SetTheory::RecSet &Elts, 1639 ArrayRef<SMLoc> Loc) override { 1640 SetTheory::RecSet Elts2; 1641 ST.evaluate(Expr->arg_begin(), Expr->arg_end(), Elts2, Loc); 1642 Elts.insert(Elts2.begin() + (Elts2.size() / 2), Elts2.end()); 1643 } 1644 }; 1645 1646 class Rev : public SetTheory::Operator { 1647 unsigned ElementSize; 1648 1649 public: 1650 Rev(unsigned ElementSize) : ElementSize(ElementSize) {} 1651 1652 void apply(SetTheory &ST, const DagInit *Expr, SetTheory::RecSet &Elts, 1653 ArrayRef<SMLoc> Loc) override { 1654 SetTheory::RecSet Elts2; 1655 ST.evaluate(Expr->arg_begin() + 1, Expr->arg_end(), Elts2, Loc); 1656 1657 int64_t VectorSize = cast<IntInit>(Expr->getArg(0))->getValue(); 1658 VectorSize /= ElementSize; 1659 1660 std::vector<const Record *> Revved; 1661 for (unsigned VI = 0; VI < Elts2.size(); VI += VectorSize) { 1662 for (int LI = VectorSize - 1; LI >= 0; --LI) { 1663 Revved.push_back(Elts2[VI + LI]); 1664 } 1665 } 1666 1667 Elts.insert_range(Revved); 1668 } 1669 }; 1670 1671 class MaskExpander : public SetTheory::Expander { 1672 unsigned N; 1673 1674 public: 1675 MaskExpander(unsigned N) : N(N) {} 1676 1677 void expand(SetTheory &ST, const Record *R, 1678 SetTheory::RecSet &Elts) override { 1679 unsigned Addend = 0; 1680 if (R->getName() == "mask0") 1681 Addend = 0; 1682 else if (R->getName() == "mask1") 1683 Addend = N; 1684 else 1685 return; 1686 for (unsigned I = 0; I < N; ++I) 1687 Elts.insert(R->getRecords().getDef("sv" + utostr(I + Addend))); 1688 } 1689 }; 1690 1691 // (shuffle arg1, arg2, sequence) 1692 std::pair<Type, std::string> Arg1 = 1693 emitDagArg(DI->getArg(0), std::string(DI->getArgNameStr(0))); 1694 std::pair<Type, std::string> Arg2 = 1695 emitDagArg(DI->getArg(1), std::string(DI->getArgNameStr(1))); 1696 assert_with_loc(Arg1.first == Arg2.first, 1697 "Different types in arguments to shuffle!"); 1698 1699 SetTheory ST; 1700 SetTheory::RecSet Elts; 1701 ST.addOperator("lowhalf", std::make_unique<LowHalf>()); 1702 ST.addOperator("highhalf", std::make_unique<HighHalf>()); 1703 ST.addOperator("rev", 1704 std::make_unique<Rev>(Arg1.first.getElementSizeInBits())); 1705 ST.addExpander("MaskExpand", 1706 std::make_unique<MaskExpander>(Arg1.first.getNumElements())); 1707 ST.evaluate(DI->getArg(2), Elts, {}); 1708 1709 std::string S = "__builtin_shufflevector(" + Arg1.second + ", " + Arg2.second; 1710 for (auto &E : Elts) { 1711 StringRef Name = E->getName(); 1712 assert_with_loc(Name.starts_with("sv"), 1713 "Incorrect element kind in shuffle mask!"); 1714 S += ", " + Name.drop_front(2).str(); 1715 } 1716 S += ")"; 1717 1718 // Recalculate the return type - the shuffle may have halved or doubled it. 1719 Type T(Arg1.first); 1720 if (Elts.size() > T.getNumElements()) { 1721 assert_with_loc( 1722 Elts.size() == T.getNumElements() * 2, 1723 "Can only double or half the number of elements in a shuffle!"); 1724 T.doubleLanes(); 1725 } else if (Elts.size() < T.getNumElements()) { 1726 assert_with_loc( 1727 Elts.size() == T.getNumElements() / 2, 1728 "Can only double or half the number of elements in a shuffle!"); 1729 T.halveLanes(); 1730 } 1731 1732 return std::make_pair(T, S); 1733 } 1734 1735 std::pair<Type, std::string> 1736 Intrinsic::DagEmitter::emitDagDup(const DagInit *DI) { 1737 assert_with_loc(DI->getNumArgs() == 1, "dup() expects one argument"); 1738 std::pair<Type, std::string> A = 1739 emitDagArg(DI->getArg(0), std::string(DI->getArgNameStr(0))); 1740 assert_with_loc(A.first.isScalar(), "dup() expects a scalar argument"); 1741 1742 Type T = Intr.getBaseType(); 1743 assert_with_loc(T.isVector(), "dup() used but default type is scalar!"); 1744 std::string S = "(" + T.str() + ") {"; 1745 for (unsigned I = 0; I < T.getNumElements(); ++I) { 1746 if (I != 0) 1747 S += ", "; 1748 S += A.second; 1749 } 1750 S += "}"; 1751 1752 return std::make_pair(T, S); 1753 } 1754 1755 std::pair<Type, std::string> 1756 Intrinsic::DagEmitter::emitDagDupTyped(const DagInit *DI) { 1757 assert_with_loc(DI->getNumArgs() == 2, "dup_typed() expects two arguments"); 1758 std::pair<Type, std::string> B = 1759 emitDagArg(DI->getArg(1), std::string(DI->getArgNameStr(1))); 1760 assert_with_loc(B.first.isScalar(), 1761 "dup_typed() requires a scalar as the second argument"); 1762 Type T; 1763 // If the type argument is a constant string, construct the type directly. 1764 if (const auto *SI = dyn_cast<StringInit>(DI->getArg(0))) { 1765 T = Type::fromTypedefName(SI->getAsUnquotedString()); 1766 assert_with_loc(!T.isVoid(), "Unknown typedef"); 1767 } else 1768 T = emitDagArg(DI->getArg(0), std::string(DI->getArgNameStr(0))).first; 1769 1770 assert_with_loc(T.isVector(), "dup_typed() used but target type is scalar!"); 1771 std::string S = "(" + T.str() + ") {"; 1772 for (unsigned I = 0; I < T.getNumElements(); ++I) { 1773 if (I != 0) 1774 S += ", "; 1775 S += B.second; 1776 } 1777 S += "}"; 1778 1779 return std::make_pair(T, S); 1780 } 1781 1782 std::pair<Type, std::string> 1783 Intrinsic::DagEmitter::emitDagSplat(const DagInit *DI) { 1784 assert_with_loc(DI->getNumArgs() == 2, "splat() expects two arguments"); 1785 std::pair<Type, std::string> A = 1786 emitDagArg(DI->getArg(0), std::string(DI->getArgNameStr(0))); 1787 std::pair<Type, std::string> B = 1788 emitDagArg(DI->getArg(1), std::string(DI->getArgNameStr(1))); 1789 1790 assert_with_loc(B.first.isScalar(), 1791 "splat() requires a scalar int as the second argument"); 1792 1793 std::string S = "__builtin_shufflevector(" + A.second + ", " + A.second; 1794 for (unsigned I = 0; I < Intr.getBaseType().getNumElements(); ++I) { 1795 S += ", " + B.second; 1796 } 1797 S += ")"; 1798 1799 return std::make_pair(Intr.getBaseType(), S); 1800 } 1801 1802 std::pair<Type, std::string> 1803 Intrinsic::DagEmitter::emitDagSaveTemp(const DagInit *DI) { 1804 assert_with_loc(DI->getNumArgs() == 2, "save_temp() expects two arguments"); 1805 std::pair<Type, std::string> A = 1806 emitDagArg(DI->getArg(1), std::string(DI->getArgNameStr(1))); 1807 1808 assert_with_loc(!A.first.isVoid(), 1809 "Argument to save_temp() must have non-void type!"); 1810 1811 std::string N = std::string(DI->getArgNameStr(0)); 1812 assert_with_loc(!N.empty(), 1813 "save_temp() expects a name as the first argument"); 1814 1815 auto [It, Inserted] = 1816 Intr.Variables.try_emplace(N, A.first, N + Intr.VariablePostfix); 1817 assert_with_loc(Inserted, "Variable already defined!"); 1818 1819 std::string S = A.first.str() + " " + It->second.getName() + " = " + A.second; 1820 1821 return std::make_pair(Type::getVoid(), S); 1822 } 1823 1824 std::pair<Type, std::string> 1825 Intrinsic::DagEmitter::emitDagNameReplace(const DagInit *DI) { 1826 std::string S = Intr.Name; 1827 1828 assert_with_loc(DI->getNumArgs() == 2, "name_replace requires 2 arguments!"); 1829 std::string ToReplace = cast<StringInit>(DI->getArg(0))->getAsUnquotedString(); 1830 std::string ReplaceWith = cast<StringInit>(DI->getArg(1))->getAsUnquotedString(); 1831 1832 size_t Idx = S.find(ToReplace); 1833 1834 assert_with_loc(Idx != std::string::npos, "name should contain '" + ToReplace + "'!"); 1835 S.replace(Idx, ToReplace.size(), ReplaceWith); 1836 1837 return std::make_pair(Type::getVoid(), S); 1838 } 1839 1840 std::pair<Type, std::string> 1841 Intrinsic::DagEmitter::emitDagLiteral(const DagInit *DI) { 1842 std::string Ty = cast<StringInit>(DI->getArg(0))->getAsUnquotedString(); 1843 std::string Value = cast<StringInit>(DI->getArg(1))->getAsUnquotedString(); 1844 return std::make_pair(Type::fromTypedefName(Ty), Value); 1845 } 1846 1847 std::pair<Type, std::string> 1848 Intrinsic::DagEmitter::emitDagArg(const Init *Arg, std::string ArgName) { 1849 if (!ArgName.empty()) { 1850 assert_with_loc(!Arg->isComplete(), 1851 "Arguments must either be DAGs or names, not both!"); 1852 assert_with_loc(Intr.Variables.find(ArgName) != Intr.Variables.end(), 1853 "Variable not defined!"); 1854 Variable &V = Intr.Variables[ArgName]; 1855 return std::make_pair(V.getType(), V.getName()); 1856 } 1857 1858 assert(Arg && "Neither ArgName nor Arg?!"); 1859 const auto *DI = dyn_cast<DagInit>(Arg); 1860 assert_with_loc(DI, "Arguments must either be DAGs or names!"); 1861 1862 return emitDag(DI); 1863 } 1864 1865 std::string Intrinsic::generate() { 1866 // Avoid duplicated code for big and little endian 1867 if (isBigEndianSafe()) { 1868 generateImpl(false, "", ""); 1869 return OS.str(); 1870 } 1871 // Little endian intrinsics are simple and don't require any argument 1872 // swapping. 1873 OS << "#ifdef __LITTLE_ENDIAN__\n"; 1874 1875 generateImpl(false, "", ""); 1876 1877 OS << "#else\n"; 1878 1879 // Big endian intrinsics are more complex. The user intended these intrinsics 1880 // to operate on a vector "as-if" loaded by LDR (for AArch64), VLDR (for 1881 // 64-bit vectors on AArch32), or VLDM (for 128-bit vectors on AArch32) but 1882 // we load as-if LD1 (for AArch64) or VLD1 (for AArch32). So we should swap 1883 // all arguments and swap the return value too. 1884 // 1885 // If we call sub-intrinsics, we should call a version that does 1886 // not re-swap the arguments! 1887 generateImpl(true, "", "__noswap_"); 1888 1889 // If we're needed early, create a non-swapping variant for 1890 // big-endian. 1891 if (NeededEarly) { 1892 generateImpl(false, "__noswap_", "__noswap_"); 1893 } 1894 OS << "#endif\n\n"; 1895 1896 return OS.str(); 1897 } 1898 1899 void Intrinsic::generateImpl(bool ReverseArguments, 1900 StringRef NamePrefix, StringRef CallPrefix) { 1901 CurrentRecord = R; 1902 1903 // If we call a macro, our local variables may be corrupted due to 1904 // lack of proper lexical scoping. So, add a globally unique postfix 1905 // to every variable. 1906 // 1907 // indexBody() should have set up the Dependencies set by now. 1908 for (auto *I : Dependencies) 1909 if (I->UseMacro) { 1910 VariablePostfix = "_" + utostr(Emitter.getUniqueNumber()); 1911 break; 1912 } 1913 1914 initVariables(); 1915 1916 emitPrototype(NamePrefix); 1917 1918 if (IsUnavailable) { 1919 OS << " __attribute__((unavailable));"; 1920 } else { 1921 emitOpeningBrace(); 1922 // Emit return variable declaration first as to not trigger 1923 // -Wdeclaration-after-statement. 1924 emitReturnVarDecl(); 1925 emitShadowedArgs(); 1926 if (ReverseArguments) 1927 emitArgumentReversal(); 1928 emitBody(CallPrefix); 1929 if (ReverseArguments) 1930 emitReturnReversal(); 1931 emitReturn(); 1932 emitClosingBrace(); 1933 } 1934 OS << "\n"; 1935 1936 CurrentRecord = nullptr; 1937 } 1938 1939 void Intrinsic::indexBody() { 1940 CurrentRecord = R; 1941 1942 initVariables(); 1943 // Emit return variable declaration first as to not trigger 1944 // -Wdeclaration-after-statement. 1945 emitReturnVarDecl(); 1946 emitBody(""); 1947 OS.str(""); 1948 1949 CurrentRecord = nullptr; 1950 } 1951 1952 //===----------------------------------------------------------------------===// 1953 // NeonEmitter implementation 1954 //===----------------------------------------------------------------------===// 1955 1956 Intrinsic &NeonEmitter::getIntrinsic(StringRef Name, ArrayRef<Type> Types, 1957 std::optional<std::string> MangledName) { 1958 // First, look up the name in the intrinsic map. 1959 assert_with_loc(IntrinsicMap.find(Name) != IntrinsicMap.end(), 1960 ("Intrinsic '" + Name + "' not found!").str()); 1961 auto &V = IntrinsicMap.find(Name)->second; 1962 std::vector<Intrinsic *> GoodVec; 1963 1964 // Create a string to print if we end up failing. 1965 std::string ErrMsg = "looking up intrinsic '" + Name.str() + "("; 1966 for (unsigned I = 0; I < Types.size(); ++I) { 1967 if (I != 0) 1968 ErrMsg += ", "; 1969 ErrMsg += Types[I].str(); 1970 } 1971 ErrMsg += ")'\n"; 1972 ErrMsg += "Available overloads:\n"; 1973 1974 // Now, look through each intrinsic implementation and see if the types are 1975 // compatible. 1976 for (auto &I : V) { 1977 ErrMsg += " - " + I.getReturnType().str() + " " + I.getMangledName(); 1978 ErrMsg += "("; 1979 for (unsigned A = 0; A < I.getNumParams(); ++A) { 1980 if (A != 0) 1981 ErrMsg += ", "; 1982 ErrMsg += I.getParamType(A).str(); 1983 } 1984 ErrMsg += ")\n"; 1985 1986 if (MangledName && MangledName != I.getMangledName(true)) 1987 continue; 1988 1989 if (I.getNumParams() != Types.size()) 1990 continue; 1991 1992 unsigned ArgNum = 0; 1993 bool MatchingArgumentTypes = all_of(Types, [&](const auto &Type) { 1994 return Type == I.getParamType(ArgNum++); 1995 }); 1996 1997 if (MatchingArgumentTypes) 1998 GoodVec.push_back(&I); 1999 } 2000 2001 assert_with_loc(!GoodVec.empty(), 2002 "No compatible intrinsic found - " + ErrMsg); 2003 assert_with_loc(GoodVec.size() == 1, "Multiple overloads found - " + ErrMsg); 2004 2005 return *GoodVec.front(); 2006 } 2007 2008 void NeonEmitter::createIntrinsic(const Record *R, 2009 SmallVectorImpl<Intrinsic *> &Out) { 2010 std::string Name = std::string(R->getValueAsString("Name")); 2011 std::string Proto = std::string(R->getValueAsString("Prototype")); 2012 std::string Types = std::string(R->getValueAsString("Types")); 2013 const Record *OperationRec = R->getValueAsDef("Operation"); 2014 bool BigEndianSafe = R->getValueAsBit("BigEndianSafe"); 2015 std::string ArchGuard = std::string(R->getValueAsString("ArchGuard")); 2016 std::string TargetGuard = std::string(R->getValueAsString("TargetGuard")); 2017 bool IsUnavailable = OperationRec->getValueAsBit("Unavailable"); 2018 std::string CartesianProductWith = std::string(R->getValueAsString("CartesianProductWith")); 2019 2020 // Set the global current record. This allows assert_with_loc to produce 2021 // decent location information even when highly nested. 2022 CurrentRecord = R; 2023 2024 const ListInit *Body = OperationRec->getValueAsListInit("Ops"); 2025 2026 std::vector<TypeSpec> TypeSpecs = TypeSpec::fromTypeSpecs(Types); 2027 2028 ClassKind CK = ClassNone; 2029 if (!R->getDirectSuperClasses().empty()) 2030 CK = ClassMap[R->getDirectSuperClasses()[0].first]; 2031 2032 std::vector<std::pair<TypeSpec, TypeSpec>> NewTypeSpecs; 2033 if (!CartesianProductWith.empty()) { 2034 std::vector<TypeSpec> ProductTypeSpecs = TypeSpec::fromTypeSpecs(CartesianProductWith); 2035 for (auto TS : TypeSpecs) { 2036 Type DefaultT(TS, "."); 2037 for (auto SrcTS : ProductTypeSpecs) { 2038 Type DefaultSrcT(SrcTS, "."); 2039 if (TS == SrcTS || 2040 DefaultSrcT.getSizeInBits() != DefaultT.getSizeInBits()) 2041 continue; 2042 NewTypeSpecs.push_back(std::make_pair(TS, SrcTS)); 2043 } 2044 } 2045 } else { 2046 for (auto TS : TypeSpecs) { 2047 NewTypeSpecs.push_back(std::make_pair(TS, TS)); 2048 } 2049 } 2050 2051 sort(NewTypeSpecs); 2052 NewTypeSpecs.erase(llvm::unique(NewTypeSpecs), NewTypeSpecs.end()); 2053 auto &Entry = IntrinsicMap[Name]; 2054 2055 for (auto &I : NewTypeSpecs) { 2056 2057 // MFloat8 type is only available on AArch64. If encountered set ArchGuard 2058 // correctly. 2059 std::string NewArchGuard = ArchGuard; 2060 if (Type(I.first, ".").isMFloat8()) { 2061 if (NewArchGuard.empty()) { 2062 NewArchGuard = "defined(__aarch64__)"; 2063 } else if (NewArchGuard.find("defined(__aarch64__)") == 2064 std::string::npos) { 2065 NewArchGuard = "defined(__aarch64__) && (" + NewArchGuard + ")"; 2066 } 2067 } 2068 Entry.emplace_back(R, Name, Proto, I.first, I.second, CK, Body, *this, 2069 NewArchGuard, TargetGuard, IsUnavailable, BigEndianSafe); 2070 Out.push_back(&Entry.back()); 2071 } 2072 2073 CurrentRecord = nullptr; 2074 } 2075 2076 /// genBuiltinsDef: Generate the builtin infos, checking for unique builtin 2077 /// declarations. 2078 void NeonEmitter::genBuiltinsDef(raw_ostream &OS, 2079 SmallVectorImpl<Intrinsic *> &Defs) { 2080 // We only want to emit a builtin once, and in order of its name. 2081 std::map<std::string, Intrinsic *> Builtins; 2082 2083 llvm::StringToOffsetTable Table; 2084 Table.GetOrAddStringOffset(""); 2085 Table.GetOrAddStringOffset("n"); 2086 2087 for (auto *Def : Defs) { 2088 if (Def->hasBody()) 2089 continue; 2090 2091 if (Builtins.insert({Def->getMangledName(), Def}).second) { 2092 Table.GetOrAddStringOffset(Def->getMangledName()); 2093 Table.GetOrAddStringOffset(Def->getBuiltinTypeStr()); 2094 Table.GetOrAddStringOffset(Def->getTargetGuard()); 2095 } 2096 } 2097 2098 OS << "#ifdef GET_NEON_BUILTIN_ENUMERATORS\n"; 2099 for (const auto &[Name, Def] : Builtins) { 2100 OS << " BI__builtin_neon_" << Name << ",\n"; 2101 } 2102 OS << "#endif // GET_NEON_BUILTIN_ENUMERATORS\n\n"; 2103 2104 OS << "#ifdef GET_NEON_BUILTIN_STR_TABLE\n"; 2105 Table.EmitStringTableDef(OS, "BuiltinStrings"); 2106 OS << "#endif // GET_NEON_BUILTIN_STR_TABLE\n\n"; 2107 2108 OS << "#ifdef GET_NEON_BUILTIN_INFOS\n"; 2109 for (const auto &[Name, Def] : Builtins) { 2110 OS << " Builtin::Info{Builtin::Info::StrOffsets{" 2111 << Table.GetStringOffset(Def->getMangledName()) << " /* " 2112 << Def->getMangledName() << " */, "; 2113 OS << Table.GetStringOffset(Def->getBuiltinTypeStr()) << " /* " 2114 << Def->getBuiltinTypeStr() << " */, "; 2115 OS << Table.GetStringOffset("n") << " /* n */, "; 2116 OS << Table.GetStringOffset(Def->getTargetGuard()) << " /* " 2117 << Def->getTargetGuard() << " */}, "; 2118 OS << "HeaderDesc::NO_HEADER, ALL_LANGUAGES},\n"; 2119 } 2120 OS << "#endif // GET_NEON_BUILTIN_INFOS\n\n"; 2121 } 2122 2123 void NeonEmitter::genStreamingSVECompatibleList( 2124 raw_ostream &OS, SmallVectorImpl<Intrinsic *> &Defs) { 2125 OS << "#ifdef GET_NEON_STREAMING_COMPAT_FLAG\n"; 2126 2127 std::set<std::string> Emitted; 2128 for (auto *Def : Defs) { 2129 // If the def has a body (that is, it has Operation DAGs), it won't call 2130 // __builtin_neon_* so we don't need to generate a definition for it. 2131 if (Def->hasBody()) 2132 continue; 2133 2134 std::string Name = Def->getMangledName(); 2135 if (Emitted.find(Name) != Emitted.end()) 2136 continue; 2137 2138 // FIXME: We should make exceptions here for some NEON builtins that are 2139 // permitted in streaming mode. 2140 OS << "case NEON::BI__builtin_neon_" << Name 2141 << ": BuiltinType = ArmNonStreaming; break;\n"; 2142 Emitted.insert(Name); 2143 } 2144 OS << "#endif\n\n"; 2145 } 2146 2147 /// Generate the ARM and AArch64 overloaded type checking code for 2148 /// SemaChecking.cpp, checking for unique builtin declarations. 2149 void NeonEmitter::genOverloadTypeCheckCode(raw_ostream &OS, 2150 SmallVectorImpl<Intrinsic *> &Defs) { 2151 OS << "#ifdef GET_NEON_OVERLOAD_CHECK\n"; 2152 2153 // We record each overload check line before emitting because subsequent Inst 2154 // definitions may extend the number of permitted types (i.e. augment the 2155 // Mask). Use std::map to avoid sorting the table by hash number. 2156 struct OverloadInfo { 2157 uint64_t Mask = 0ULL; 2158 int PtrArgNum = 0; 2159 bool HasConstPtr = false; 2160 OverloadInfo() = default; 2161 }; 2162 std::map<std::string, OverloadInfo> OverloadMap; 2163 2164 for (auto *Def : Defs) { 2165 // If the def has a body (that is, it has Operation DAGs), it won't call 2166 // __builtin_neon_* so we don't need to generate a definition for it. 2167 if (Def->hasBody()) 2168 continue; 2169 // Functions which have a scalar argument cannot be overloaded, no need to 2170 // check them if we are emitting the type checking code. 2171 if (Def->protoHasScalar()) 2172 continue; 2173 2174 uint64_t Mask = 0ULL; 2175 Mask |= 1ULL << Def->getPolymorphicKeyType().getNeonEnum(); 2176 2177 // Check if the function has a pointer or const pointer argument. 2178 int PtrArgNum = -1; 2179 bool HasConstPtr = false; 2180 for (unsigned I = 0; I < Def->getNumParams(); ++I) { 2181 const auto &Type = Def->getParamType(I); 2182 if (Type.isPointer()) { 2183 PtrArgNum = I; 2184 HasConstPtr = Type.isConstPointer(); 2185 } 2186 } 2187 2188 // For sret builtins, adjust the pointer argument index. 2189 if (PtrArgNum >= 0 && Def->getReturnType().getNumVectors() > 1) 2190 PtrArgNum += 1; 2191 2192 std::string Name = Def->getName(); 2193 // Omit type checking for the pointer arguments of vld1_lane, vld1_dup, 2194 // vst1_lane, vldap1_lane, and vstl1_lane intrinsics. Using a pointer to 2195 // the vector element type with one of those operations causes codegen to 2196 // select an aligned load/store instruction. If you want an unaligned 2197 // operation, the pointer argument needs to have less alignment than element 2198 // type, so just accept any pointer type. 2199 if (Name == "vld1_lane" || Name == "vld1_dup" || Name == "vst1_lane" || 2200 Name == "vldap1_lane" || Name == "vstl1_lane") { 2201 PtrArgNum = -1; 2202 HasConstPtr = false; 2203 } 2204 2205 if (Mask) { 2206 OverloadInfo &OI = OverloadMap[Def->getMangledName()]; 2207 OI.Mask |= Mask; 2208 OI.PtrArgNum |= PtrArgNum; 2209 OI.HasConstPtr = HasConstPtr; 2210 } 2211 } 2212 2213 for (auto &I : OverloadMap) { 2214 OverloadInfo &OI = I.second; 2215 2216 OS << "case NEON::BI__builtin_neon_" << I.first << ": "; 2217 OS << "mask = 0x" << Twine::utohexstr(OI.Mask) << "ULL"; 2218 if (OI.PtrArgNum >= 0) 2219 OS << "; PtrArgNum = " << OI.PtrArgNum; 2220 if (OI.HasConstPtr) 2221 OS << "; HasConstPtr = true"; 2222 OS << "; break;\n"; 2223 } 2224 OS << "#endif\n\n"; 2225 } 2226 2227 inline bool 2228 NeonEmitter::areRangeChecksCompatible(const ArrayRef<ImmCheck> ChecksA, 2229 const ArrayRef<ImmCheck> ChecksB) { 2230 // If multiple intrinsics map to the same builtin, we must ensure that the 2231 // intended range checks performed in SemaArm.cpp do not contradict each 2232 // other, as these are emitted once per-buitlin. 2233 // 2234 // The arguments to be checked and type of each check to be performed must be 2235 // the same. The element types may differ as they will be resolved 2236 // per-intrinsic as overloaded types by SemaArm.cpp, though the vector sizes 2237 // are not and so must be the same. 2238 bool compat = 2239 std::equal(ChecksA.begin(), ChecksA.end(), ChecksB.begin(), ChecksB.end(), 2240 [](const auto &A, const auto &B) { 2241 return A.getImmArgIdx() == B.getImmArgIdx() && 2242 A.getKind() == B.getKind() && 2243 A.getVecSizeInBits() == B.getVecSizeInBits(); 2244 }); 2245 2246 return compat; 2247 } 2248 2249 void NeonEmitter::genIntrinsicRangeCheckCode( 2250 raw_ostream &OS, SmallVectorImpl<Intrinsic *> &Defs) { 2251 std::unordered_map<std::string, ArrayRef<ImmCheck>> Emitted; 2252 2253 OS << "#ifdef GET_NEON_IMMEDIATE_CHECK\n"; 2254 for (auto &Def : Defs) { 2255 // If the Def has a body (operation DAGs), it is not a __builtin_neon_ 2256 if (Def->hasBody() || !Def->hasImmediate()) 2257 continue; 2258 2259 // Sorted by immediate argument index 2260 ArrayRef<ImmCheck> Checks = Def->getImmChecks(); 2261 2262 auto [It, Inserted] = Emitted.try_emplace(Def->getMangledName(), Checks); 2263 if (!Inserted) { 2264 assert(areRangeChecksCompatible(Checks, It->second) && 2265 "Neon intrinsics with incompatible immediate range checks cannot " 2266 "share a builtin."); 2267 continue; // Ensure this is emitted only once 2268 } 2269 2270 // Emit builtin's range checks 2271 OS << "case NEON::BI__builtin_neon_" << Def->getMangledName() << ":\n"; 2272 for (const auto &Check : Checks) { 2273 OS << " ImmChecks.emplace_back(" << Check.getImmArgIdx() << ", " 2274 << Check.getKind() << ", " << Check.getElementSizeInBits() << ", " 2275 << Check.getVecSizeInBits() << ");\n" 2276 << " break;\n"; 2277 } 2278 } 2279 2280 OS << "#endif\n\n"; 2281 } 2282 2283 /// runHeader - Emit a file with sections defining: 2284 /// 1. the NEON section of BuiltinsARM.def and BuiltinsAArch64.def. 2285 /// 2. the SemaChecking code for the type overload checking. 2286 /// 3. the SemaChecking code for validation of intrinsic immediate arguments. 2287 void NeonEmitter::runHeader(raw_ostream &OS) { 2288 SmallVector<Intrinsic *, 128> Defs; 2289 for (const Record *R : Records.getAllDerivedDefinitions("Inst")) 2290 createIntrinsic(R, Defs); 2291 2292 // Generate shared BuiltinsXXX.def 2293 genBuiltinsDef(OS, Defs); 2294 2295 // Generate ARM overloaded type checking code for SemaChecking.cpp 2296 genOverloadTypeCheckCode(OS, Defs); 2297 2298 genStreamingSVECompatibleList(OS, Defs); 2299 2300 // Generate ARM range checking code for shift/lane immediates. 2301 genIntrinsicRangeCheckCode(OS, Defs); 2302 } 2303 2304 static void emitNeonTypeDefs(const std::string& types, raw_ostream &OS) { 2305 std::string TypedefTypes(types); 2306 std::vector<TypeSpec> TDTypeVec = TypeSpec::fromTypeSpecs(TypedefTypes); 2307 2308 // Emit vector typedefs. 2309 bool InIfdef = false; 2310 for (auto &TS : TDTypeVec) { 2311 bool IsA64 = false; 2312 Type T(TS, "."); 2313 if (T.isDouble() || T.isMFloat8()) 2314 IsA64 = true; 2315 2316 if (InIfdef && !IsA64) { 2317 OS << "#endif\n"; 2318 InIfdef = false; 2319 } 2320 if (!InIfdef && IsA64) { 2321 OS << "#if defined(__aarch64__) || defined(__arm64ec__)\n"; 2322 InIfdef = true; 2323 } 2324 2325 if (T.isPoly()) 2326 OS << "typedef __attribute__((neon_polyvector_type("; 2327 else 2328 OS << "typedef __attribute__((neon_vector_type("; 2329 2330 Type T2 = T; 2331 T2.makeScalar(); 2332 OS << T.getNumElements(); 2333 OS << "))) " << T2.str(); 2334 OS << " " << T.str() << ";\n"; 2335 } 2336 if (InIfdef) 2337 OS << "#endif\n"; 2338 OS << "\n"; 2339 2340 // Emit struct typedefs. 2341 InIfdef = false; 2342 for (unsigned NumMembers = 2; NumMembers <= 4; ++NumMembers) { 2343 for (auto &TS : TDTypeVec) { 2344 bool IsA64 = false; 2345 Type T(TS, "."); 2346 if (T.isDouble() || T.isMFloat8()) 2347 IsA64 = true; 2348 2349 if (InIfdef && !IsA64) { 2350 OS << "#endif\n"; 2351 InIfdef = false; 2352 } 2353 if (!InIfdef && IsA64) { 2354 OS << "#if defined(__aarch64__) || defined(__arm64ec__)\n"; 2355 InIfdef = true; 2356 } 2357 2358 const char Mods[] = { static_cast<char>('2' + (NumMembers - 2)), 0}; 2359 Type VT(TS, Mods); 2360 OS << "typedef struct " << VT.str() << " {\n"; 2361 OS << " " << T.str() << " val"; 2362 OS << "[" << NumMembers << "]"; 2363 OS << ";\n} "; 2364 OS << VT.str() << ";\n"; 2365 OS << "\n"; 2366 } 2367 } 2368 if (InIfdef) 2369 OS << "#endif\n"; 2370 } 2371 2372 /// run - Read the records in arm_neon.td and output arm_neon.h. arm_neon.h 2373 /// is comprised of type definitions and function declarations. 2374 void NeonEmitter::run(raw_ostream &OS) { 2375 OS << "/*===---- arm_neon.h - ARM Neon intrinsics " 2376 "------------------------------" 2377 "---===\n" 2378 " *\n" 2379 " * Permission is hereby granted, free of charge, to any person " 2380 "obtaining " 2381 "a copy\n" 2382 " * of this software and associated documentation files (the " 2383 "\"Software\")," 2384 " to deal\n" 2385 " * in the Software without restriction, including without limitation " 2386 "the " 2387 "rights\n" 2388 " * to use, copy, modify, merge, publish, distribute, sublicense, " 2389 "and/or sell\n" 2390 " * copies of the Software, and to permit persons to whom the Software " 2391 "is\n" 2392 " * furnished to do so, subject to the following conditions:\n" 2393 " *\n" 2394 " * The above copyright notice and this permission notice shall be " 2395 "included in\n" 2396 " * all copies or substantial portions of the Software.\n" 2397 " *\n" 2398 " * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, " 2399 "EXPRESS OR\n" 2400 " * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF " 2401 "MERCHANTABILITY,\n" 2402 " * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT " 2403 "SHALL THE\n" 2404 " * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR " 2405 "OTHER\n" 2406 " * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, " 2407 "ARISING FROM,\n" 2408 " * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER " 2409 "DEALINGS IN\n" 2410 " * THE SOFTWARE.\n" 2411 " *\n" 2412 " *===-----------------------------------------------------------------" 2413 "---" 2414 "---===\n" 2415 " */\n\n"; 2416 2417 OS << "#ifndef __ARM_NEON_H\n"; 2418 OS << "#define __ARM_NEON_H\n\n"; 2419 2420 OS << "#if !defined(__arm__) && !defined(__aarch64__) && " 2421 "!defined(__arm64ec__)\n"; 2422 OS << "#error \"<arm_neon.h> is intended only for ARM and AArch64 " 2423 "targets\"\n"; 2424 OS << "#elif !defined(__ARM_FP)\n"; 2425 OS << "#error \"NEON intrinsics not available with the soft-float ABI. " 2426 "Please use -mfloat-abi=softfp or -mfloat-abi=hard\"\n"; 2427 OS << "#else\n\n"; 2428 2429 OS << "#include <stdint.h>\n\n"; 2430 2431 OS << "#include <arm_bf16.h>\n"; 2432 2433 OS << "#include <arm_vector_types.h>\n"; 2434 2435 // For now, signedness of polynomial types depends on target 2436 OS << "#if defined(__aarch64__) || defined(__arm64ec__)\n"; 2437 OS << "typedef uint8_t poly8_t;\n"; 2438 OS << "typedef uint16_t poly16_t;\n"; 2439 OS << "typedef uint64_t poly64_t;\n"; 2440 OS << "typedef __uint128_t poly128_t;\n"; 2441 OS << "#else\n"; 2442 OS << "typedef int8_t poly8_t;\n"; 2443 OS << "typedef int16_t poly16_t;\n"; 2444 OS << "typedef int64_t poly64_t;\n"; 2445 OS << "#endif\n"; 2446 emitNeonTypeDefs("PcQPcPsQPsPlQPl", OS); 2447 2448 OS << "#define __ai static __inline__ __attribute__((__always_inline__, " 2449 "__nodebug__))\n\n"; 2450 2451 // Shufflevector arguments lists for endian-swapping vectors for big-endian 2452 // targets. For AArch64, we need to reverse every lane in the vector, but for 2453 // AArch32 we need to reverse the lanes within each 64-bit chunk of the 2454 // vector. The naming convention here is __lane_reverse_<n>_<m>, where <n> is 2455 // the length of the vector in bits, and <m> is length of each lane in bits. 2456 OS << "#if !defined(__LITTLE_ENDIAN__)\n"; 2457 OS << "#if defined(__aarch64__) || defined(__arm64ec__)\n"; 2458 OS << "#define __lane_reverse_64_32 1,0\n"; 2459 OS << "#define __lane_reverse_64_16 3,2,1,0\n"; 2460 OS << "#define __lane_reverse_64_8 7,6,5,4,3,2,1,0\n"; 2461 OS << "#define __lane_reverse_128_64 1,0\n"; 2462 OS << "#define __lane_reverse_128_32 3,2,1,0\n"; 2463 OS << "#define __lane_reverse_128_16 7,6,5,4,3,2,1,0\n"; 2464 OS << "#define __lane_reverse_128_8 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0\n"; 2465 OS << "#else\n"; 2466 OS << "#define __lane_reverse_64_32 1,0\n"; 2467 OS << "#define __lane_reverse_64_16 3,2,1,0\n"; 2468 OS << "#define __lane_reverse_64_8 7,6,5,4,3,2,1,0\n"; 2469 OS << "#define __lane_reverse_128_64 0,1\n"; 2470 OS << "#define __lane_reverse_128_32 1,0,3,2\n"; 2471 OS << "#define __lane_reverse_128_16 3,2,1,0,7,6,5,4\n"; 2472 OS << "#define __lane_reverse_128_8 7,6,5,4,3,2,1,0,15,14,13,12,11,10,9,8\n"; 2473 OS << "#endif\n"; 2474 OS << "#endif\n"; 2475 2476 SmallVector<Intrinsic *, 128> Defs; 2477 for (const Record *R : Records.getAllDerivedDefinitions("Inst")) 2478 createIntrinsic(R, Defs); 2479 2480 for (auto *I : Defs) 2481 I->indexBody(); 2482 2483 stable_sort(Defs, deref<std::less<>>()); 2484 2485 // Only emit a def when its requirements have been met. 2486 // FIXME: This loop could be made faster, but it's fast enough for now. 2487 bool MadeProgress = true; 2488 std::string InGuard; 2489 while (!Defs.empty() && MadeProgress) { 2490 MadeProgress = false; 2491 2492 for (SmallVector<Intrinsic *, 128>::iterator I = Defs.begin(); 2493 I != Defs.end(); /*No step*/) { 2494 bool DependenciesSatisfied = true; 2495 for (auto *II : (*I)->getDependencies()) { 2496 if (is_contained(Defs, II)) 2497 DependenciesSatisfied = false; 2498 } 2499 if (!DependenciesSatisfied) { 2500 // Try the next one. 2501 ++I; 2502 continue; 2503 } 2504 2505 // Emit #endif/#if pair if needed. 2506 if ((*I)->getArchGuard() != InGuard) { 2507 if (!InGuard.empty()) 2508 OS << "#endif\n"; 2509 InGuard = (*I)->getArchGuard(); 2510 if (!InGuard.empty()) 2511 OS << "#if " << InGuard << "\n"; 2512 } 2513 2514 // Actually generate the intrinsic code. 2515 OS << (*I)->generate(); 2516 2517 MadeProgress = true; 2518 I = Defs.erase(I); 2519 } 2520 } 2521 assert(Defs.empty() && "Some requirements were not satisfied!"); 2522 if (!InGuard.empty()) 2523 OS << "#endif\n"; 2524 2525 OS << "\n"; 2526 OS << "#undef __ai\n\n"; 2527 OS << "#endif /* if !defined(__ARM_NEON) */\n"; 2528 OS << "#endif /* ifndef __ARM_FP */\n"; 2529 } 2530 2531 /// run - Read the records in arm_fp16.td and output arm_fp16.h. arm_fp16.h 2532 /// is comprised of type definitions and function declarations. 2533 void NeonEmitter::runFP16(raw_ostream &OS) { 2534 OS << "/*===---- arm_fp16.h - ARM FP16 intrinsics " 2535 "------------------------------" 2536 "---===\n" 2537 " *\n" 2538 " * Permission is hereby granted, free of charge, to any person " 2539 "obtaining a copy\n" 2540 " * of this software and associated documentation files (the " 2541 "\"Software\"), to deal\n" 2542 " * in the Software without restriction, including without limitation " 2543 "the rights\n" 2544 " * to use, copy, modify, merge, publish, distribute, sublicense, " 2545 "and/or sell\n" 2546 " * copies of the Software, and to permit persons to whom the Software " 2547 "is\n" 2548 " * furnished to do so, subject to the following conditions:\n" 2549 " *\n" 2550 " * The above copyright notice and this permission notice shall be " 2551 "included in\n" 2552 " * all copies or substantial portions of the Software.\n" 2553 " *\n" 2554 " * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, " 2555 "EXPRESS OR\n" 2556 " * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF " 2557 "MERCHANTABILITY,\n" 2558 " * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT " 2559 "SHALL THE\n" 2560 " * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR " 2561 "OTHER\n" 2562 " * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, " 2563 "ARISING FROM,\n" 2564 " * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER " 2565 "DEALINGS IN\n" 2566 " * THE SOFTWARE.\n" 2567 " *\n" 2568 " *===-----------------------------------------------------------------" 2569 "---" 2570 "---===\n" 2571 " */\n\n"; 2572 2573 OS << "#ifndef __ARM_FP16_H\n"; 2574 OS << "#define __ARM_FP16_H\n\n"; 2575 2576 OS << "#include <stdint.h>\n\n"; 2577 2578 OS << "typedef __fp16 float16_t;\n"; 2579 2580 OS << "#define __ai static __inline__ __attribute__((__always_inline__, " 2581 "__nodebug__))\n\n"; 2582 2583 SmallVector<Intrinsic *, 128> Defs; 2584 for (const Record *R : Records.getAllDerivedDefinitions("Inst")) 2585 createIntrinsic(R, Defs); 2586 2587 for (auto *I : Defs) 2588 I->indexBody(); 2589 2590 stable_sort(Defs, deref<std::less<>>()); 2591 2592 // Only emit a def when its requirements have been met. 2593 // FIXME: This loop could be made faster, but it's fast enough for now. 2594 bool MadeProgress = true; 2595 std::string InGuard; 2596 while (!Defs.empty() && MadeProgress) { 2597 MadeProgress = false; 2598 2599 for (SmallVector<Intrinsic *, 128>::iterator I = Defs.begin(); 2600 I != Defs.end(); /*No step*/) { 2601 bool DependenciesSatisfied = true; 2602 for (auto *II : (*I)->getDependencies()) { 2603 if (is_contained(Defs, II)) 2604 DependenciesSatisfied = false; 2605 } 2606 if (!DependenciesSatisfied) { 2607 // Try the next one. 2608 ++I; 2609 continue; 2610 } 2611 2612 // Emit #endif/#if pair if needed. 2613 if ((*I)->getArchGuard() != InGuard) { 2614 if (!InGuard.empty()) 2615 OS << "#endif\n"; 2616 InGuard = (*I)->getArchGuard(); 2617 if (!InGuard.empty()) 2618 OS << "#if " << InGuard << "\n"; 2619 } 2620 2621 // Actually generate the intrinsic code. 2622 OS << (*I)->generate(); 2623 2624 MadeProgress = true; 2625 I = Defs.erase(I); 2626 } 2627 } 2628 assert(Defs.empty() && "Some requirements were not satisfied!"); 2629 if (!InGuard.empty()) 2630 OS << "#endif\n"; 2631 2632 OS << "\n"; 2633 OS << "#undef __ai\n\n"; 2634 OS << "#endif /* __ARM_FP16_H */\n"; 2635 } 2636 2637 void NeonEmitter::runVectorTypes(raw_ostream &OS) { 2638 OS << "/*===---- arm_vector_types - ARM vector type " 2639 "------===\n" 2640 " *\n" 2641 " *\n" 2642 " * Part of the LLVM Project, under the Apache License v2.0 with LLVM " 2643 "Exceptions.\n" 2644 " * See https://llvm.org/LICENSE.txt for license information.\n" 2645 " * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception\n" 2646 " *\n" 2647 " *===-----------------------------------------------------------------" 2648 "------===\n" 2649 " */\n\n"; 2650 OS << "#if !defined(__ARM_NEON_H) && !defined(__ARM_SVE_H)\n"; 2651 OS << "#error \"This file should not be used standalone. Please include" 2652 " arm_neon.h or arm_sve.h instead\"\n\n"; 2653 OS << "#endif\n"; 2654 OS << "#ifndef __ARM_NEON_TYPES_H\n"; 2655 OS << "#define __ARM_NEON_TYPES_H\n"; 2656 OS << "typedef float float32_t;\n"; 2657 OS << "typedef __fp16 float16_t;\n"; 2658 2659 OS << "#if defined(__aarch64__) || defined(__arm64ec__)\n"; 2660 OS << "typedef __mfp8 mfloat8_t;\n"; 2661 OS << "typedef double float64_t;\n"; 2662 OS << "#endif\n\n"; 2663 2664 OS << R"( 2665 typedef uint64_t fpm_t; 2666 2667 enum __ARM_FPM_FORMAT { __ARM_FPM_E5M2, __ARM_FPM_E4M3 }; 2668 2669 enum __ARM_FPM_OVERFLOW { __ARM_FPM_INFNAN, __ARM_FPM_SATURATE }; 2670 2671 static __inline__ fpm_t __attribute__((__always_inline__, __nodebug__)) 2672 __arm_fpm_init(void) { 2673 return 0; 2674 } 2675 2676 static __inline__ fpm_t __attribute__((__always_inline__, __nodebug__)) 2677 __arm_set_fpm_src1_format(fpm_t __fpm, enum __ARM_FPM_FORMAT __format) { 2678 return (__fpm & ~7ull) | (fpm_t)__format; 2679 } 2680 2681 static __inline__ fpm_t __attribute__((__always_inline__, __nodebug__)) 2682 __arm_set_fpm_src2_format(fpm_t __fpm, enum __ARM_FPM_FORMAT __format) { 2683 return (__fpm & ~0x38ull) | ((fpm_t)__format << 3u); 2684 } 2685 2686 static __inline__ fpm_t __attribute__((__always_inline__, __nodebug__)) 2687 __arm_set_fpm_dst_format(fpm_t __fpm, enum __ARM_FPM_FORMAT __format) { 2688 return (__fpm & ~0x1c0ull) | ((fpm_t)__format << 6u); 2689 } 2690 2691 static __inline__ fpm_t __attribute__((__always_inline__, __nodebug__)) 2692 __arm_set_fpm_overflow_mul(fpm_t __fpm, enum __ARM_FPM_OVERFLOW __behaviour) { 2693 return (__fpm & ~0x4000ull) | ((fpm_t)__behaviour << 14u); 2694 } 2695 2696 static __inline__ fpm_t __attribute__((__always_inline__, __nodebug__)) 2697 __arm_set_fpm_overflow_cvt(fpm_t __fpm, enum __ARM_FPM_OVERFLOW __behaviour) { 2698 return (__fpm & ~0x8000ull) | ((fpm_t)__behaviour << 15u); 2699 } 2700 2701 static __inline__ fpm_t __attribute__((__always_inline__, __nodebug__)) 2702 __arm_set_fpm_lscale(fpm_t __fpm, uint64_t __scale) { 2703 return (__fpm & ~0x7f0000ull) | (__scale << 16u); 2704 } 2705 2706 static __inline__ fpm_t __attribute__((__always_inline__, __nodebug__)) 2707 __arm_set_fpm_nscale(fpm_t __fpm, int64_t __scale) { 2708 return (__fpm & ~0xff000000ull) | (((fpm_t)__scale & 0xffu) << 24u); 2709 } 2710 2711 static __inline__ fpm_t __attribute__((__always_inline__, __nodebug__)) 2712 __arm_set_fpm_lscale2(fpm_t __fpm, uint64_t __scale) { 2713 return (uint32_t)__fpm | (__scale << 32u); 2714 } 2715 2716 )"; 2717 2718 emitNeonTypeDefs("cQcsQsiQilQlUcQUcUsQUsUiQUiUlQUlmQmhQhfQfdQd", OS); 2719 2720 emitNeonTypeDefs("bQb", OS); 2721 OS << "#endif // __ARM_NEON_TYPES_H\n"; 2722 } 2723 2724 void NeonEmitter::runBF16(raw_ostream &OS) { 2725 OS << "/*===---- arm_bf16.h - ARM BF16 intrinsics " 2726 "-----------------------------------===\n" 2727 " *\n" 2728 " *\n" 2729 " * Part of the LLVM Project, under the Apache License v2.0 with LLVM " 2730 "Exceptions.\n" 2731 " * See https://llvm.org/LICENSE.txt for license information.\n" 2732 " * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception\n" 2733 " *\n" 2734 " *===-----------------------------------------------------------------" 2735 "------===\n" 2736 " */\n\n"; 2737 2738 OS << "#ifndef __ARM_BF16_H\n"; 2739 OS << "#define __ARM_BF16_H\n\n"; 2740 2741 OS << "typedef __bf16 bfloat16_t;\n"; 2742 2743 OS << "#define __ai static __inline__ __attribute__((__always_inline__, " 2744 "__nodebug__))\n\n"; 2745 2746 SmallVector<Intrinsic *, 128> Defs; 2747 for (const Record *R : Records.getAllDerivedDefinitions("Inst")) 2748 createIntrinsic(R, Defs); 2749 2750 for (auto *I : Defs) 2751 I->indexBody(); 2752 2753 stable_sort(Defs, deref<std::less<>>()); 2754 2755 // Only emit a def when its requirements have been met. 2756 // FIXME: This loop could be made faster, but it's fast enough for now. 2757 bool MadeProgress = true; 2758 std::string InGuard; 2759 while (!Defs.empty() && MadeProgress) { 2760 MadeProgress = false; 2761 2762 for (SmallVector<Intrinsic *, 128>::iterator I = Defs.begin(); 2763 I != Defs.end(); /*No step*/) { 2764 bool DependenciesSatisfied = true; 2765 for (auto *II : (*I)->getDependencies()) { 2766 if (is_contained(Defs, II)) 2767 DependenciesSatisfied = false; 2768 } 2769 if (!DependenciesSatisfied) { 2770 // Try the next one. 2771 ++I; 2772 continue; 2773 } 2774 2775 // Emit #endif/#if pair if needed. 2776 if ((*I)->getArchGuard() != InGuard) { 2777 if (!InGuard.empty()) 2778 OS << "#endif\n"; 2779 InGuard = (*I)->getArchGuard(); 2780 if (!InGuard.empty()) 2781 OS << "#if " << InGuard << "\n"; 2782 } 2783 2784 // Actually generate the intrinsic code. 2785 OS << (*I)->generate(); 2786 2787 MadeProgress = true; 2788 I = Defs.erase(I); 2789 } 2790 } 2791 assert(Defs.empty() && "Some requirements were not satisfied!"); 2792 if (!InGuard.empty()) 2793 OS << "#endif\n"; 2794 2795 OS << "\n"; 2796 OS << "#undef __ai\n\n"; 2797 2798 OS << "#endif\n"; 2799 } 2800 2801 void clang::EmitNeon(const RecordKeeper &Records, raw_ostream &OS) { 2802 NeonEmitter(Records).run(OS); 2803 } 2804 2805 void clang::EmitFP16(const RecordKeeper &Records, raw_ostream &OS) { 2806 NeonEmitter(Records).runFP16(OS); 2807 } 2808 2809 void clang::EmitBF16(const RecordKeeper &Records, raw_ostream &OS) { 2810 NeonEmitter(Records).runBF16(OS); 2811 } 2812 2813 void clang::EmitNeonSema(const RecordKeeper &Records, raw_ostream &OS) { 2814 NeonEmitter(Records).runHeader(OS); 2815 } 2816 2817 void clang::EmitVectorTypes(const RecordKeeper &Records, raw_ostream &OS) { 2818 NeonEmitter(Records).runVectorTypes(OS); 2819 } 2820 2821 void clang::EmitNeonTest(const RecordKeeper &Records, raw_ostream &OS) { 2822 llvm_unreachable("Neon test generation no longer implemented!"); 2823 } 2824