1 //===- NeonEmitter.cpp - Generate arm_neon.h for use with clang -*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This tablegen backend is responsible for emitting arm_neon.h, which includes 10 // a declaration and definition of each function specified by the ARM NEON 11 // compiler interface. See ARM document DUI0348B. 12 // 13 // Each NEON instruction is implemented in terms of 1 or more functions which 14 // are suffixed with the element type of the input vectors. Functions may be 15 // implemented in terms of generic vector operations such as +, *, -, etc. or 16 // by calling a __builtin_-prefixed function which will be handled by clang's 17 // CodeGen library. 18 // 19 // Additional validation code can be generated by this file when runHeader() is 20 // called, rather than the normal run() entry point. 21 // 22 // See also the documentation in include/clang/Basic/arm_neon.td. 23 // 24 //===----------------------------------------------------------------------===// 25 26 #include "TableGenBackends.h" 27 #include "llvm/ADT/ArrayRef.h" 28 #include "llvm/ADT/DenseMap.h" 29 #include "llvm/ADT/STLExtras.h" 30 #include "llvm/ADT/SmallVector.h" 31 #include "llvm/ADT/StringExtras.h" 32 #include "llvm/ADT/StringRef.h" 33 #include "llvm/Support/Casting.h" 34 #include "llvm/Support/ErrorHandling.h" 35 #include "llvm/Support/raw_ostream.h" 36 #include "llvm/TableGen/Error.h" 37 #include "llvm/TableGen/Record.h" 38 #include "llvm/TableGen/SetTheory.h" 39 #include <algorithm> 40 #include <cassert> 41 #include <cctype> 42 #include <cstddef> 43 #include <cstdint> 44 #include <deque> 45 #include <map> 46 #include <optional> 47 #include <set> 48 #include <sstream> 49 #include <string> 50 #include <utility> 51 #include <vector> 52 53 using namespace llvm; 54 55 namespace { 56 57 // While globals are generally bad, this one allows us to perform assertions 58 // liberally and somehow still trace them back to the def they indirectly 59 // came from. 60 static Record *CurrentRecord = nullptr; 61 static void assert_with_loc(bool Assertion, const std::string &Str) { 62 if (!Assertion) { 63 if (CurrentRecord) 64 PrintFatalError(CurrentRecord->getLoc(), Str); 65 else 66 PrintFatalError(Str); 67 } 68 } 69 70 enum ClassKind { 71 ClassNone, 72 ClassI, // generic integer instruction, e.g., "i8" suffix 73 ClassS, // signed/unsigned/poly, e.g., "s8", "u8" or "p8" suffix 74 ClassW, // width-specific instruction, e.g., "8" suffix 75 ClassB, // bitcast arguments with enum argument to specify type 76 ClassL, // Logical instructions which are op instructions 77 // but we need to not emit any suffix for in our 78 // tests. 79 ClassNoTest // Instructions which we do not test since they are 80 // not TRUE instructions. 81 }; 82 83 /// NeonTypeFlags - Flags to identify the types for overloaded Neon 84 /// builtins. These must be kept in sync with the flags in 85 /// include/clang/Basic/TargetBuiltins.h. 86 namespace NeonTypeFlags { 87 88 enum { EltTypeMask = 0xf, UnsignedFlag = 0x10, QuadFlag = 0x20 }; 89 90 enum EltType { 91 Int8, 92 Int16, 93 Int32, 94 Int64, 95 Poly8, 96 Poly16, 97 Poly64, 98 Poly128, 99 Float16, 100 Float32, 101 Float64, 102 BFloat16 103 }; 104 105 } // end namespace NeonTypeFlags 106 107 class NeonEmitter; 108 109 //===----------------------------------------------------------------------===// 110 // TypeSpec 111 //===----------------------------------------------------------------------===// 112 113 /// A TypeSpec is just a simple wrapper around a string, but gets its own type 114 /// for strong typing purposes. 115 /// 116 /// A TypeSpec can be used to create a type. 117 class TypeSpec : public std::string { 118 public: 119 static std::vector<TypeSpec> fromTypeSpecs(StringRef Str) { 120 std::vector<TypeSpec> Ret; 121 TypeSpec Acc; 122 for (char I : Str.str()) { 123 if (islower(I)) { 124 Acc.push_back(I); 125 Ret.push_back(TypeSpec(Acc)); 126 Acc.clear(); 127 } else { 128 Acc.push_back(I); 129 } 130 } 131 return Ret; 132 } 133 }; 134 135 //===----------------------------------------------------------------------===// 136 // Type 137 //===----------------------------------------------------------------------===// 138 139 /// A Type. Not much more to say here. 140 class Type { 141 private: 142 TypeSpec TS; 143 144 enum TypeKind { 145 Void, 146 Float, 147 SInt, 148 UInt, 149 Poly, 150 BFloat16, 151 }; 152 TypeKind Kind; 153 bool Immediate, Constant, Pointer; 154 // ScalarForMangling and NoManglingQ are really not suited to live here as 155 // they are not related to the type. But they live in the TypeSpec (not the 156 // prototype), so this is really the only place to store them. 157 bool ScalarForMangling, NoManglingQ; 158 unsigned Bitwidth, ElementBitwidth, NumVectors; 159 160 public: 161 Type() 162 : Kind(Void), Immediate(false), Constant(false), 163 Pointer(false), ScalarForMangling(false), NoManglingQ(false), 164 Bitwidth(0), ElementBitwidth(0), NumVectors(0) {} 165 166 Type(TypeSpec TS, StringRef CharMods) 167 : TS(std::move(TS)), Kind(Void), Immediate(false), 168 Constant(false), Pointer(false), ScalarForMangling(false), 169 NoManglingQ(false), Bitwidth(0), ElementBitwidth(0), NumVectors(0) { 170 applyModifiers(CharMods); 171 } 172 173 /// Returns a type representing "void". 174 static Type getVoid() { return Type(); } 175 176 bool operator==(const Type &Other) const { return str() == Other.str(); } 177 bool operator!=(const Type &Other) const { return !operator==(Other); } 178 179 // 180 // Query functions 181 // 182 bool isScalarForMangling() const { return ScalarForMangling; } 183 bool noManglingQ() const { return NoManglingQ; } 184 185 bool isPointer() const { return Pointer; } 186 bool isValue() const { return !isVoid() && !isPointer(); } 187 bool isScalar() const { return isValue() && NumVectors == 0; } 188 bool isVector() const { return isValue() && NumVectors > 0; } 189 bool isConstPointer() const { return Constant; } 190 bool isFloating() const { return Kind == Float; } 191 bool isInteger() const { return Kind == SInt || Kind == UInt; } 192 bool isPoly() const { return Kind == Poly; } 193 bool isSigned() const { return Kind == SInt; } 194 bool isImmediate() const { return Immediate; } 195 bool isFloat() const { return isFloating() && ElementBitwidth == 32; } 196 bool isDouble() const { return isFloating() && ElementBitwidth == 64; } 197 bool isHalf() const { return isFloating() && ElementBitwidth == 16; } 198 bool isChar() const { return ElementBitwidth == 8; } 199 bool isShort() const { return isInteger() && ElementBitwidth == 16; } 200 bool isInt() const { return isInteger() && ElementBitwidth == 32; } 201 bool isLong() const { return isInteger() && ElementBitwidth == 64; } 202 bool isVoid() const { return Kind == Void; } 203 bool isBFloat16() const { return Kind == BFloat16; } 204 unsigned getNumElements() const { return Bitwidth / ElementBitwidth; } 205 unsigned getSizeInBits() const { return Bitwidth; } 206 unsigned getElementSizeInBits() const { return ElementBitwidth; } 207 unsigned getNumVectors() const { return NumVectors; } 208 209 // 210 // Mutator functions 211 // 212 void makeUnsigned() { 213 assert(!isVoid() && "not a potentially signed type"); 214 Kind = UInt; 215 } 216 void makeSigned() { 217 assert(!isVoid() && "not a potentially signed type"); 218 Kind = SInt; 219 } 220 221 void makeInteger(unsigned ElemWidth, bool Sign) { 222 assert(!isVoid() && "converting void to int probably not useful"); 223 Kind = Sign ? SInt : UInt; 224 Immediate = false; 225 ElementBitwidth = ElemWidth; 226 } 227 228 void makeImmediate(unsigned ElemWidth) { 229 Kind = SInt; 230 Immediate = true; 231 ElementBitwidth = ElemWidth; 232 } 233 234 void makeScalar() { 235 Bitwidth = ElementBitwidth; 236 NumVectors = 0; 237 } 238 239 void makeOneVector() { 240 assert(isVector()); 241 NumVectors = 1; 242 } 243 244 void make32BitElement() { 245 assert_with_loc(Bitwidth > 32, "Not enough bits to make it 32!"); 246 ElementBitwidth = 32; 247 } 248 249 void doubleLanes() { 250 assert_with_loc(Bitwidth != 128, "Can't get bigger than 128!"); 251 Bitwidth = 128; 252 } 253 254 void halveLanes() { 255 assert_with_loc(Bitwidth != 64, "Can't get smaller than 64!"); 256 Bitwidth = 64; 257 } 258 259 /// Return the C string representation of a type, which is the typename 260 /// defined in stdint.h or arm_neon.h. 261 std::string str() const; 262 263 /// Return the string representation of a type, which is an encoded 264 /// string for passing to the BUILTIN() macro in Builtins.def. 265 std::string builtin_str() const; 266 267 /// Return the value in NeonTypeFlags for this type. 268 unsigned getNeonEnum() const; 269 270 /// Parse a type from a stdint.h or arm_neon.h typedef name, 271 /// for example uint32x2_t or int64_t. 272 static Type fromTypedefName(StringRef Name); 273 274 private: 275 /// Creates the type based on the typespec string in TS. 276 /// Sets "Quad" to true if the "Q" or "H" modifiers were 277 /// seen. This is needed by applyModifier as some modifiers 278 /// only take effect if the type size was changed by "Q" or "H". 279 void applyTypespec(bool &Quad); 280 /// Applies prototype modifiers to the type. 281 void applyModifiers(StringRef Mods); 282 }; 283 284 //===----------------------------------------------------------------------===// 285 // Variable 286 //===----------------------------------------------------------------------===// 287 288 /// A variable is a simple class that just has a type and a name. 289 class Variable { 290 Type T; 291 std::string N; 292 293 public: 294 Variable() : T(Type::getVoid()) {} 295 Variable(Type T, std::string N) : T(std::move(T)), N(std::move(N)) {} 296 297 Type getType() const { return T; } 298 std::string getName() const { return "__" + N; } 299 }; 300 301 //===----------------------------------------------------------------------===// 302 // Intrinsic 303 //===----------------------------------------------------------------------===// 304 305 /// The main grunt class. This represents an instantiation of an intrinsic with 306 /// a particular typespec and prototype. 307 class Intrinsic { 308 /// The Record this intrinsic was created from. 309 Record *R; 310 /// The unmangled name. 311 std::string Name; 312 /// The input and output typespecs. InTS == OutTS except when 313 /// CartesianProductWith is non-empty - this is the case for vreinterpret. 314 TypeSpec OutTS, InTS; 315 /// The base class kind. Most intrinsics use ClassS, which has full type 316 /// info for integers (s32/u32). Some use ClassI, which doesn't care about 317 /// signedness (i32), while some (ClassB) have no type at all, only a width 318 /// (32). 319 ClassKind CK; 320 /// The list of DAGs for the body. May be empty, in which case we should 321 /// emit a builtin call. 322 ListInit *Body; 323 /// The architectural ifdef guard. 324 std::string ArchGuard; 325 /// The architectural target() guard. 326 std::string TargetGuard; 327 /// Set if the Unavailable bit is 1. This means we don't generate a body, 328 /// just an "unavailable" attribute on a declaration. 329 bool IsUnavailable; 330 /// Is this intrinsic safe for big-endian? or does it need its arguments 331 /// reversing? 332 bool BigEndianSafe; 333 334 /// The types of return value [0] and parameters [1..]. 335 std::vector<Type> Types; 336 /// The index of the key type passed to CGBuiltin.cpp for polymorphic calls. 337 int PolymorphicKeyType; 338 /// The local variables defined. 339 std::map<std::string, Variable> Variables; 340 /// NeededEarly - set if any other intrinsic depends on this intrinsic. 341 bool NeededEarly; 342 /// UseMacro - set if we should implement using a macro or unset for a 343 /// function. 344 bool UseMacro; 345 /// The set of intrinsics that this intrinsic uses/requires. 346 std::set<Intrinsic *> Dependencies; 347 /// The "base type", which is Type('d', OutTS). InBaseType is only 348 /// different if CartesianProductWith is non-empty (for vreinterpret). 349 Type BaseType, InBaseType; 350 /// The return variable. 351 Variable RetVar; 352 /// A postfix to apply to every variable. Defaults to "". 353 std::string VariablePostfix; 354 355 NeonEmitter &Emitter; 356 std::stringstream OS; 357 358 bool isBigEndianSafe() const { 359 if (BigEndianSafe) 360 return true; 361 362 for (const auto &T : Types){ 363 if (T.isVector() && T.getNumElements() > 1) 364 return false; 365 } 366 return true; 367 } 368 369 public: 370 Intrinsic(Record *R, StringRef Name, StringRef Proto, TypeSpec OutTS, 371 TypeSpec InTS, ClassKind CK, ListInit *Body, NeonEmitter &Emitter, 372 StringRef ArchGuard, StringRef TargetGuard, bool IsUnavailable, bool BigEndianSafe) 373 : R(R), Name(Name.str()), OutTS(OutTS), InTS(InTS), CK(CK), Body(Body), 374 ArchGuard(ArchGuard.str()), TargetGuard(TargetGuard.str()), IsUnavailable(IsUnavailable), 375 BigEndianSafe(BigEndianSafe), PolymorphicKeyType(0), NeededEarly(false), 376 UseMacro(false), BaseType(OutTS, "."), InBaseType(InTS, "."), 377 Emitter(Emitter) { 378 // Modify the TypeSpec per-argument to get a concrete Type, and create 379 // known variables for each. 380 // Types[0] is the return value. 381 unsigned Pos = 0; 382 Types.emplace_back(OutTS, getNextModifiers(Proto, Pos)); 383 StringRef Mods = getNextModifiers(Proto, Pos); 384 while (!Mods.empty()) { 385 Types.emplace_back(InTS, Mods); 386 if (Mods.contains('!')) 387 PolymorphicKeyType = Types.size() - 1; 388 389 Mods = getNextModifiers(Proto, Pos); 390 } 391 392 for (const auto &Type : Types) { 393 // If this builtin takes an immediate argument, we need to #define it rather 394 // than use a standard declaration, so that SemaChecking can range check 395 // the immediate passed by the user. 396 397 // Pointer arguments need to use macros to avoid hiding aligned attributes 398 // from the pointer type. 399 400 // It is not permitted to pass or return an __fp16 by value, so intrinsics 401 // taking a scalar float16_t must be implemented as macros. 402 if (Type.isImmediate() || Type.isPointer() || 403 (Type.isScalar() && Type.isHalf())) 404 UseMacro = true; 405 } 406 } 407 408 /// Get the Record that this intrinsic is based off. 409 Record *getRecord() const { return R; } 410 /// Get the set of Intrinsics that this intrinsic calls. 411 /// this is the set of immediate dependencies, NOT the 412 /// transitive closure. 413 const std::set<Intrinsic *> &getDependencies() const { return Dependencies; } 414 /// Get the architectural guard string (#ifdef). 415 std::string getArchGuard() const { return ArchGuard; } 416 std::string getTargetGuard() const { return TargetGuard; } 417 /// Get the non-mangled name. 418 std::string getName() const { return Name; } 419 420 /// Return true if the intrinsic takes an immediate operand. 421 bool hasImmediate() const { 422 return llvm::any_of(Types, [](const Type &T) { return T.isImmediate(); }); 423 } 424 425 /// Return the parameter index of the immediate operand. 426 unsigned getImmediateIdx() const { 427 for (unsigned Idx = 0; Idx < Types.size(); ++Idx) 428 if (Types[Idx].isImmediate()) 429 return Idx - 1; 430 llvm_unreachable("Intrinsic has no immediate"); 431 } 432 433 434 unsigned getNumParams() const { return Types.size() - 1; } 435 Type getReturnType() const { return Types[0]; } 436 Type getParamType(unsigned I) const { return Types[I + 1]; } 437 Type getBaseType() const { return BaseType; } 438 Type getPolymorphicKeyType() const { return Types[PolymorphicKeyType]; } 439 440 /// Return true if the prototype has a scalar argument. 441 bool protoHasScalar() const; 442 443 /// Return the index that parameter PIndex will sit at 444 /// in a generated function call. This is often just PIndex, 445 /// but may not be as things such as multiple-vector operands 446 /// and sret parameters need to be taken into account. 447 unsigned getGeneratedParamIdx(unsigned PIndex) { 448 unsigned Idx = 0; 449 if (getReturnType().getNumVectors() > 1) 450 // Multiple vectors are passed as sret. 451 ++Idx; 452 453 for (unsigned I = 0; I < PIndex; ++I) 454 Idx += std::max(1U, getParamType(I).getNumVectors()); 455 456 return Idx; 457 } 458 459 bool hasBody() const { return Body && !Body->getValues().empty(); } 460 461 void setNeededEarly() { NeededEarly = true; } 462 463 bool operator<(const Intrinsic &Other) const { 464 // Sort lexicographically on a three-tuple (ArchGuard, TargetGuard, Name) 465 if (ArchGuard != Other.ArchGuard) 466 return ArchGuard < Other.ArchGuard; 467 if (TargetGuard != Other.TargetGuard) 468 return TargetGuard < Other.TargetGuard; 469 return Name < Other.Name; 470 } 471 472 ClassKind getClassKind(bool UseClassBIfScalar = false) { 473 if (UseClassBIfScalar && !protoHasScalar()) 474 return ClassB; 475 return CK; 476 } 477 478 /// Return the name, mangled with type information. 479 /// If ForceClassS is true, use ClassS (u32/s32) instead 480 /// of the intrinsic's own type class. 481 std::string getMangledName(bool ForceClassS = false) const; 482 /// Return the type code for a builtin function call. 483 std::string getInstTypeCode(Type T, ClassKind CK) const; 484 /// Return the type string for a BUILTIN() macro in Builtins.def. 485 std::string getBuiltinTypeStr(); 486 487 /// Generate the intrinsic, returning code. 488 std::string generate(); 489 /// Perform type checking and populate the dependency graph, but 490 /// don't generate code yet. 491 void indexBody(); 492 493 private: 494 StringRef getNextModifiers(StringRef Proto, unsigned &Pos) const; 495 496 std::string mangleName(std::string Name, ClassKind CK) const; 497 498 void initVariables(); 499 std::string replaceParamsIn(std::string S); 500 501 void emitBodyAsBuiltinCall(); 502 503 void generateImpl(bool ReverseArguments, 504 StringRef NamePrefix, StringRef CallPrefix); 505 void emitReturn(); 506 void emitBody(StringRef CallPrefix); 507 void emitShadowedArgs(); 508 void emitArgumentReversal(); 509 void emitReturnVarDecl(); 510 void emitReturnReversal(); 511 void emitReverseVariable(Variable &Dest, Variable &Src); 512 void emitNewLine(); 513 void emitClosingBrace(); 514 void emitOpeningBrace(); 515 void emitPrototype(StringRef NamePrefix); 516 517 class DagEmitter { 518 Intrinsic &Intr; 519 StringRef CallPrefix; 520 521 public: 522 DagEmitter(Intrinsic &Intr, StringRef CallPrefix) : 523 Intr(Intr), CallPrefix(CallPrefix) { 524 } 525 std::pair<Type, std::string> emitDagArg(Init *Arg, std::string ArgName); 526 std::pair<Type, std::string> emitDagSaveTemp(DagInit *DI); 527 std::pair<Type, std::string> emitDagSplat(DagInit *DI); 528 std::pair<Type, std::string> emitDagDup(DagInit *DI); 529 std::pair<Type, std::string> emitDagDupTyped(DagInit *DI); 530 std::pair<Type, std::string> emitDagShuffle(DagInit *DI); 531 std::pair<Type, std::string> emitDagCast(DagInit *DI, bool IsBitCast); 532 std::pair<Type, std::string> emitDagCall(DagInit *DI, 533 bool MatchMangledName); 534 std::pair<Type, std::string> emitDagNameReplace(DagInit *DI); 535 std::pair<Type, std::string> emitDagLiteral(DagInit *DI); 536 std::pair<Type, std::string> emitDagOp(DagInit *DI); 537 std::pair<Type, std::string> emitDag(DagInit *DI); 538 }; 539 }; 540 541 //===----------------------------------------------------------------------===// 542 // NeonEmitter 543 //===----------------------------------------------------------------------===// 544 545 class NeonEmitter { 546 RecordKeeper &Records; 547 DenseMap<Record *, ClassKind> ClassMap; 548 std::map<std::string, std::deque<Intrinsic>> IntrinsicMap; 549 unsigned UniqueNumber; 550 551 void createIntrinsic(Record *R, SmallVectorImpl<Intrinsic *> &Out); 552 void genBuiltinsDef(raw_ostream &OS, SmallVectorImpl<Intrinsic *> &Defs); 553 void genStreamingSVECompatibleList(raw_ostream &OS, 554 SmallVectorImpl<Intrinsic *> &Defs); 555 void genOverloadTypeCheckCode(raw_ostream &OS, 556 SmallVectorImpl<Intrinsic *> &Defs); 557 void genIntrinsicRangeCheckCode(raw_ostream &OS, 558 SmallVectorImpl<Intrinsic *> &Defs); 559 560 public: 561 /// Called by Intrinsic - this attempts to get an intrinsic that takes 562 /// the given types as arguments. 563 Intrinsic &getIntrinsic(StringRef Name, ArrayRef<Type> Types, 564 std::optional<std::string> MangledName); 565 566 /// Called by Intrinsic - returns a globally-unique number. 567 unsigned getUniqueNumber() { return UniqueNumber++; } 568 569 NeonEmitter(RecordKeeper &R) : Records(R), UniqueNumber(0) { 570 Record *SI = R.getClass("SInst"); 571 Record *II = R.getClass("IInst"); 572 Record *WI = R.getClass("WInst"); 573 Record *SOpI = R.getClass("SOpInst"); 574 Record *IOpI = R.getClass("IOpInst"); 575 Record *WOpI = R.getClass("WOpInst"); 576 Record *LOpI = R.getClass("LOpInst"); 577 Record *NoTestOpI = R.getClass("NoTestOpInst"); 578 579 ClassMap[SI] = ClassS; 580 ClassMap[II] = ClassI; 581 ClassMap[WI] = ClassW; 582 ClassMap[SOpI] = ClassS; 583 ClassMap[IOpI] = ClassI; 584 ClassMap[WOpI] = ClassW; 585 ClassMap[LOpI] = ClassL; 586 ClassMap[NoTestOpI] = ClassNoTest; 587 } 588 589 // Emit arm_neon.h.inc 590 void run(raw_ostream &o); 591 592 // Emit arm_fp16.h.inc 593 void runFP16(raw_ostream &o); 594 595 // Emit arm_bf16.h.inc 596 void runBF16(raw_ostream &o); 597 598 void runVectorTypes(raw_ostream &o); 599 600 // Emit all the __builtin prototypes used in arm_neon.h, arm_fp16.h and 601 // arm_bf16.h 602 void runHeader(raw_ostream &o); 603 }; 604 605 } // end anonymous namespace 606 607 //===----------------------------------------------------------------------===// 608 // Type implementation 609 //===----------------------------------------------------------------------===// 610 611 std::string Type::str() const { 612 if (isVoid()) 613 return "void"; 614 std::string S; 615 616 if (isInteger() && !isSigned()) 617 S += "u"; 618 619 if (isPoly()) 620 S += "poly"; 621 else if (isFloating()) 622 S += "float"; 623 else if (isBFloat16()) 624 S += "bfloat"; 625 else 626 S += "int"; 627 628 S += utostr(ElementBitwidth); 629 if (isVector()) 630 S += "x" + utostr(getNumElements()); 631 if (NumVectors > 1) 632 S += "x" + utostr(NumVectors); 633 S += "_t"; 634 635 if (Constant) 636 S += " const"; 637 if (Pointer) 638 S += " *"; 639 640 return S; 641 } 642 643 std::string Type::builtin_str() const { 644 std::string S; 645 if (isVoid()) 646 return "v"; 647 648 if (isPointer()) { 649 // All pointers are void pointers. 650 S = "v"; 651 if (isConstPointer()) 652 S += "C"; 653 S += "*"; 654 return S; 655 } else if (isInteger()) 656 switch (ElementBitwidth) { 657 case 8: S += "c"; break; 658 case 16: S += "s"; break; 659 case 32: S += "i"; break; 660 case 64: S += "Wi"; break; 661 case 128: S += "LLLi"; break; 662 default: llvm_unreachable("Unhandled case!"); 663 } 664 else if (isBFloat16()) { 665 assert(ElementBitwidth == 16 && "BFloat16 can only be 16 bits"); 666 S += "y"; 667 } else 668 switch (ElementBitwidth) { 669 case 16: S += "h"; break; 670 case 32: S += "f"; break; 671 case 64: S += "d"; break; 672 default: llvm_unreachable("Unhandled case!"); 673 } 674 675 // FIXME: NECESSARY??????????????????????????????????????????????????????????????????????? 676 if (isChar() && !isPointer() && isSigned()) 677 // Make chars explicitly signed. 678 S = "S" + S; 679 else if (isInteger() && !isSigned()) 680 S = "U" + S; 681 682 // Constant indices are "int", but have the "constant expression" modifier. 683 if (isImmediate()) { 684 assert(isInteger() && isSigned()); 685 S = "I" + S; 686 } 687 688 if (isScalar()) 689 return S; 690 691 std::string Ret; 692 for (unsigned I = 0; I < NumVectors; ++I) 693 Ret += "V" + utostr(getNumElements()) + S; 694 695 return Ret; 696 } 697 698 unsigned Type::getNeonEnum() const { 699 unsigned Addend; 700 switch (ElementBitwidth) { 701 case 8: Addend = 0; break; 702 case 16: Addend = 1; break; 703 case 32: Addend = 2; break; 704 case 64: Addend = 3; break; 705 case 128: Addend = 4; break; 706 default: llvm_unreachable("Unhandled element bitwidth!"); 707 } 708 709 unsigned Base = (unsigned)NeonTypeFlags::Int8 + Addend; 710 if (isPoly()) { 711 // Adjustment needed because Poly32 doesn't exist. 712 if (Addend >= 2) 713 --Addend; 714 Base = (unsigned)NeonTypeFlags::Poly8 + Addend; 715 } 716 if (isFloating()) { 717 assert(Addend != 0 && "Float8 doesn't exist!"); 718 Base = (unsigned)NeonTypeFlags::Float16 + (Addend - 1); 719 } 720 721 if (isBFloat16()) { 722 assert(Addend == 1 && "BFloat16 is only 16 bit"); 723 Base = (unsigned)NeonTypeFlags::BFloat16; 724 } 725 726 if (Bitwidth == 128) 727 Base |= (unsigned)NeonTypeFlags::QuadFlag; 728 if (isInteger() && !isSigned()) 729 Base |= (unsigned)NeonTypeFlags::UnsignedFlag; 730 731 return Base; 732 } 733 734 Type Type::fromTypedefName(StringRef Name) { 735 Type T; 736 T.Kind = SInt; 737 738 if (Name.front() == 'u') { 739 T.Kind = UInt; 740 Name = Name.drop_front(); 741 } 742 743 if (Name.starts_with("float")) { 744 T.Kind = Float; 745 Name = Name.drop_front(5); 746 } else if (Name.starts_with("poly")) { 747 T.Kind = Poly; 748 Name = Name.drop_front(4); 749 } else if (Name.starts_with("bfloat")) { 750 T.Kind = BFloat16; 751 Name = Name.drop_front(6); 752 } else { 753 assert(Name.starts_with("int")); 754 Name = Name.drop_front(3); 755 } 756 757 unsigned I = 0; 758 for (I = 0; I < Name.size(); ++I) { 759 if (!isdigit(Name[I])) 760 break; 761 } 762 Name.substr(0, I).getAsInteger(10, T.ElementBitwidth); 763 Name = Name.drop_front(I); 764 765 T.Bitwidth = T.ElementBitwidth; 766 T.NumVectors = 1; 767 768 if (Name.front() == 'x') { 769 Name = Name.drop_front(); 770 unsigned I = 0; 771 for (I = 0; I < Name.size(); ++I) { 772 if (!isdigit(Name[I])) 773 break; 774 } 775 unsigned NumLanes; 776 Name.substr(0, I).getAsInteger(10, NumLanes); 777 Name = Name.drop_front(I); 778 T.Bitwidth = T.ElementBitwidth * NumLanes; 779 } else { 780 // Was scalar. 781 T.NumVectors = 0; 782 } 783 if (Name.front() == 'x') { 784 Name = Name.drop_front(); 785 unsigned I = 0; 786 for (I = 0; I < Name.size(); ++I) { 787 if (!isdigit(Name[I])) 788 break; 789 } 790 Name.substr(0, I).getAsInteger(10, T.NumVectors); 791 Name = Name.drop_front(I); 792 } 793 794 assert(Name.starts_with("_t") && "Malformed typedef!"); 795 return T; 796 } 797 798 void Type::applyTypespec(bool &Quad) { 799 std::string S = TS; 800 ScalarForMangling = false; 801 Kind = SInt; 802 ElementBitwidth = ~0U; 803 NumVectors = 1; 804 805 for (char I : S) { 806 switch (I) { 807 case 'S': 808 ScalarForMangling = true; 809 break; 810 case 'H': 811 NoManglingQ = true; 812 Quad = true; 813 break; 814 case 'Q': 815 Quad = true; 816 break; 817 case 'P': 818 Kind = Poly; 819 break; 820 case 'U': 821 Kind = UInt; 822 break; 823 case 'c': 824 ElementBitwidth = 8; 825 break; 826 case 'h': 827 Kind = Float; 828 [[fallthrough]]; 829 case 's': 830 ElementBitwidth = 16; 831 break; 832 case 'f': 833 Kind = Float; 834 [[fallthrough]]; 835 case 'i': 836 ElementBitwidth = 32; 837 break; 838 case 'd': 839 Kind = Float; 840 [[fallthrough]]; 841 case 'l': 842 ElementBitwidth = 64; 843 break; 844 case 'k': 845 ElementBitwidth = 128; 846 // Poly doesn't have a 128x1 type. 847 if (isPoly()) 848 NumVectors = 0; 849 break; 850 case 'b': 851 Kind = BFloat16; 852 ElementBitwidth = 16; 853 break; 854 default: 855 llvm_unreachable("Unhandled type code!"); 856 } 857 } 858 assert(ElementBitwidth != ~0U && "Bad element bitwidth!"); 859 860 Bitwidth = Quad ? 128 : 64; 861 } 862 863 void Type::applyModifiers(StringRef Mods) { 864 bool AppliedQuad = false; 865 applyTypespec(AppliedQuad); 866 867 for (char Mod : Mods) { 868 switch (Mod) { 869 case '.': 870 break; 871 case 'v': 872 Kind = Void; 873 break; 874 case 'S': 875 Kind = SInt; 876 break; 877 case 'U': 878 Kind = UInt; 879 break; 880 case 'B': 881 Kind = BFloat16; 882 ElementBitwidth = 16; 883 break; 884 case 'F': 885 Kind = Float; 886 break; 887 case 'P': 888 Kind = Poly; 889 break; 890 case '>': 891 assert(ElementBitwidth < 128); 892 ElementBitwidth *= 2; 893 break; 894 case '<': 895 assert(ElementBitwidth > 8); 896 ElementBitwidth /= 2; 897 break; 898 case '1': 899 NumVectors = 0; 900 break; 901 case '2': 902 NumVectors = 2; 903 break; 904 case '3': 905 NumVectors = 3; 906 break; 907 case '4': 908 NumVectors = 4; 909 break; 910 case '*': 911 Pointer = true; 912 break; 913 case 'c': 914 Constant = true; 915 break; 916 case 'Q': 917 Bitwidth = 128; 918 break; 919 case 'q': 920 Bitwidth = 64; 921 break; 922 case 'I': 923 Kind = SInt; 924 ElementBitwidth = Bitwidth = 32; 925 NumVectors = 0; 926 Immediate = true; 927 break; 928 case 'p': 929 if (isPoly()) 930 Kind = UInt; 931 break; 932 case '!': 933 // Key type, handled elsewhere. 934 break; 935 default: 936 llvm_unreachable("Unhandled character!"); 937 } 938 } 939 } 940 941 //===----------------------------------------------------------------------===// 942 // Intrinsic implementation 943 //===----------------------------------------------------------------------===// 944 945 StringRef Intrinsic::getNextModifiers(StringRef Proto, unsigned &Pos) const { 946 if (Proto.size() == Pos) 947 return StringRef(); 948 else if (Proto[Pos] != '(') 949 return Proto.substr(Pos++, 1); 950 951 size_t Start = Pos + 1; 952 size_t End = Proto.find(')', Start); 953 assert_with_loc(End != StringRef::npos, "unmatched modifier group paren"); 954 Pos = End + 1; 955 return Proto.slice(Start, End); 956 } 957 958 std::string Intrinsic::getInstTypeCode(Type T, ClassKind CK) const { 959 char typeCode = '\0'; 960 bool printNumber = true; 961 962 if (CK == ClassB && TargetGuard == "") 963 return ""; 964 965 if (T.isBFloat16()) 966 return "bf16"; 967 968 if (T.isPoly()) 969 typeCode = 'p'; 970 else if (T.isInteger()) 971 typeCode = T.isSigned() ? 's' : 'u'; 972 else 973 typeCode = 'f'; 974 975 if (CK == ClassI) { 976 switch (typeCode) { 977 default: 978 break; 979 case 's': 980 case 'u': 981 case 'p': 982 typeCode = 'i'; 983 break; 984 } 985 } 986 if (CK == ClassB && TargetGuard == "") { 987 typeCode = '\0'; 988 } 989 990 std::string S; 991 if (typeCode != '\0') 992 S.push_back(typeCode); 993 if (printNumber) 994 S += utostr(T.getElementSizeInBits()); 995 996 return S; 997 } 998 999 std::string Intrinsic::getBuiltinTypeStr() { 1000 ClassKind LocalCK = getClassKind(true); 1001 std::string S; 1002 1003 Type RetT = getReturnType(); 1004 if ((LocalCK == ClassI || LocalCK == ClassW) && RetT.isScalar() && 1005 !RetT.isFloating() && !RetT.isBFloat16()) 1006 RetT.makeInteger(RetT.getElementSizeInBits(), false); 1007 1008 // Since the return value must be one type, return a vector type of the 1009 // appropriate width which we will bitcast. An exception is made for 1010 // returning structs of 2, 3, or 4 vectors which are returned in a sret-like 1011 // fashion, storing them to a pointer arg. 1012 if (RetT.getNumVectors() > 1) { 1013 S += "vv*"; // void result with void* first argument 1014 } else { 1015 if (RetT.isPoly()) 1016 RetT.makeInteger(RetT.getElementSizeInBits(), false); 1017 if (!RetT.isScalar() && RetT.isInteger() && !RetT.isSigned()) 1018 RetT.makeSigned(); 1019 1020 if (LocalCK == ClassB && RetT.isValue() && !RetT.isScalar()) 1021 // Cast to vector of 8-bit elements. 1022 RetT.makeInteger(8, true); 1023 1024 S += RetT.builtin_str(); 1025 } 1026 1027 for (unsigned I = 0; I < getNumParams(); ++I) { 1028 Type T = getParamType(I); 1029 if (T.isPoly()) 1030 T.makeInteger(T.getElementSizeInBits(), false); 1031 1032 if (LocalCK == ClassB && !T.isScalar()) 1033 T.makeInteger(8, true); 1034 // Halves always get converted to 8-bit elements. 1035 if (T.isHalf() && T.isVector() && !T.isScalarForMangling()) 1036 T.makeInteger(8, true); 1037 1038 if (LocalCK == ClassI && T.isInteger()) 1039 T.makeSigned(); 1040 1041 if (hasImmediate() && getImmediateIdx() == I) 1042 T.makeImmediate(32); 1043 1044 S += T.builtin_str(); 1045 } 1046 1047 // Extra constant integer to hold type class enum for this function, e.g. s8 1048 if (LocalCK == ClassB) 1049 S += "i"; 1050 1051 return S; 1052 } 1053 1054 std::string Intrinsic::getMangledName(bool ForceClassS) const { 1055 // Check if the prototype has a scalar operand with the type of the vector 1056 // elements. If not, bitcasting the args will take care of arg checking. 1057 // The actual signedness etc. will be taken care of with special enums. 1058 ClassKind LocalCK = CK; 1059 if (!protoHasScalar()) 1060 LocalCK = ClassB; 1061 1062 return mangleName(Name, ForceClassS ? ClassS : LocalCK); 1063 } 1064 1065 std::string Intrinsic::mangleName(std::string Name, ClassKind LocalCK) const { 1066 std::string typeCode = getInstTypeCode(BaseType, LocalCK); 1067 std::string S = Name; 1068 1069 if (Name == "vcvt_f16_f32" || Name == "vcvt_f32_f16" || 1070 Name == "vcvt_f32_f64" || Name == "vcvt_f64_f32" || 1071 Name == "vcvt_f32_bf16") 1072 return Name; 1073 1074 if (!typeCode.empty()) { 1075 // If the name ends with _xN (N = 2,3,4), insert the typeCode before _xN. 1076 if (Name.size() >= 3 && isdigit(Name.back()) && 1077 Name[Name.length() - 2] == 'x' && Name[Name.length() - 3] == '_') 1078 S.insert(S.length() - 3, "_" + typeCode); 1079 else 1080 S += "_" + typeCode; 1081 } 1082 1083 if (BaseType != InBaseType) { 1084 // A reinterpret - out the input base type at the end. 1085 S += "_" + getInstTypeCode(InBaseType, LocalCK); 1086 } 1087 1088 if (LocalCK == ClassB && TargetGuard == "") 1089 S += "_v"; 1090 1091 // Insert a 'q' before the first '_' character so that it ends up before 1092 // _lane or _n on vector-scalar operations. 1093 if (BaseType.getSizeInBits() == 128 && !BaseType.noManglingQ()) { 1094 size_t Pos = S.find('_'); 1095 S.insert(Pos, "q"); 1096 } 1097 1098 char Suffix = '\0'; 1099 if (BaseType.isScalarForMangling()) { 1100 switch (BaseType.getElementSizeInBits()) { 1101 case 8: Suffix = 'b'; break; 1102 case 16: Suffix = 'h'; break; 1103 case 32: Suffix = 's'; break; 1104 case 64: Suffix = 'd'; break; 1105 default: llvm_unreachable("Bad suffix!"); 1106 } 1107 } 1108 if (Suffix != '\0') { 1109 size_t Pos = S.find('_'); 1110 S.insert(Pos, &Suffix, 1); 1111 } 1112 1113 return S; 1114 } 1115 1116 std::string Intrinsic::replaceParamsIn(std::string S) { 1117 while (S.find('$') != std::string::npos) { 1118 size_t Pos = S.find('$'); 1119 size_t End = Pos + 1; 1120 while (isalpha(S[End])) 1121 ++End; 1122 1123 std::string VarName = S.substr(Pos + 1, End - Pos - 1); 1124 assert_with_loc(Variables.find(VarName) != Variables.end(), 1125 "Variable not defined!"); 1126 S.replace(Pos, End - Pos, Variables.find(VarName)->second.getName()); 1127 } 1128 1129 return S; 1130 } 1131 1132 void Intrinsic::initVariables() { 1133 Variables.clear(); 1134 1135 // Modify the TypeSpec per-argument to get a concrete Type, and create 1136 // known variables for each. 1137 for (unsigned I = 1; I < Types.size(); ++I) { 1138 char NameC = '0' + (I - 1); 1139 std::string Name = "p"; 1140 Name.push_back(NameC); 1141 1142 Variables[Name] = Variable(Types[I], Name + VariablePostfix); 1143 } 1144 RetVar = Variable(Types[0], "ret" + VariablePostfix); 1145 } 1146 1147 void Intrinsic::emitPrototype(StringRef NamePrefix) { 1148 if (UseMacro) { 1149 OS << "#define "; 1150 } else { 1151 OS << "__ai "; 1152 if (TargetGuard != "") 1153 OS << "__attribute__((target(\"" << TargetGuard << "\"))) "; 1154 OS << Types[0].str() << " "; 1155 } 1156 1157 OS << NamePrefix.str() << mangleName(Name, ClassS) << "("; 1158 1159 for (unsigned I = 0; I < getNumParams(); ++I) { 1160 if (I != 0) 1161 OS << ", "; 1162 1163 char NameC = '0' + I; 1164 std::string Name = "p"; 1165 Name.push_back(NameC); 1166 assert(Variables.find(Name) != Variables.end()); 1167 Variable &V = Variables[Name]; 1168 1169 if (!UseMacro) 1170 OS << V.getType().str() << " "; 1171 OS << V.getName(); 1172 } 1173 1174 OS << ")"; 1175 } 1176 1177 void Intrinsic::emitOpeningBrace() { 1178 if (UseMacro) 1179 OS << " __extension__ ({"; 1180 else 1181 OS << " {"; 1182 emitNewLine(); 1183 } 1184 1185 void Intrinsic::emitClosingBrace() { 1186 if (UseMacro) 1187 OS << "})"; 1188 else 1189 OS << "}"; 1190 } 1191 1192 void Intrinsic::emitNewLine() { 1193 if (UseMacro) 1194 OS << " \\\n"; 1195 else 1196 OS << "\n"; 1197 } 1198 1199 void Intrinsic::emitReverseVariable(Variable &Dest, Variable &Src) { 1200 if (Dest.getType().getNumVectors() > 1) { 1201 emitNewLine(); 1202 1203 for (unsigned K = 0; K < Dest.getType().getNumVectors(); ++K) { 1204 OS << " " << Dest.getName() << ".val[" << K << "] = " 1205 << "__builtin_shufflevector(" 1206 << Src.getName() << ".val[" << K << "], " 1207 << Src.getName() << ".val[" << K << "]"; 1208 for (int J = Dest.getType().getNumElements() - 1; J >= 0; --J) 1209 OS << ", " << J; 1210 OS << ");"; 1211 emitNewLine(); 1212 } 1213 } else { 1214 OS << " " << Dest.getName() 1215 << " = __builtin_shufflevector(" << Src.getName() << ", " << Src.getName(); 1216 for (int J = Dest.getType().getNumElements() - 1; J >= 0; --J) 1217 OS << ", " << J; 1218 OS << ");"; 1219 emitNewLine(); 1220 } 1221 } 1222 1223 void Intrinsic::emitArgumentReversal() { 1224 if (isBigEndianSafe()) 1225 return; 1226 1227 // Reverse all vector arguments. 1228 for (unsigned I = 0; I < getNumParams(); ++I) { 1229 std::string Name = "p" + utostr(I); 1230 std::string NewName = "rev" + utostr(I); 1231 1232 Variable &V = Variables[Name]; 1233 Variable NewV(V.getType(), NewName + VariablePostfix); 1234 1235 if (!NewV.getType().isVector() || NewV.getType().getNumElements() == 1) 1236 continue; 1237 1238 OS << " " << NewV.getType().str() << " " << NewV.getName() << ";"; 1239 emitReverseVariable(NewV, V); 1240 V = NewV; 1241 } 1242 } 1243 1244 void Intrinsic::emitReturnVarDecl() { 1245 assert(RetVar.getType() == Types[0]); 1246 // Create a return variable, if we're not void. 1247 if (!RetVar.getType().isVoid()) { 1248 OS << " " << RetVar.getType().str() << " " << RetVar.getName() << ";"; 1249 emitNewLine(); 1250 } 1251 } 1252 1253 void Intrinsic::emitReturnReversal() { 1254 if (isBigEndianSafe()) 1255 return; 1256 if (!getReturnType().isVector() || getReturnType().isVoid() || 1257 getReturnType().getNumElements() == 1) 1258 return; 1259 emitReverseVariable(RetVar, RetVar); 1260 } 1261 1262 void Intrinsic::emitShadowedArgs() { 1263 // Macro arguments are not type-checked like inline function arguments, 1264 // so assign them to local temporaries to get the right type checking. 1265 if (!UseMacro) 1266 return; 1267 1268 for (unsigned I = 0; I < getNumParams(); ++I) { 1269 // Do not create a temporary for an immediate argument. 1270 // That would defeat the whole point of using a macro! 1271 if (getParamType(I).isImmediate()) 1272 continue; 1273 // Do not create a temporary for pointer arguments. The input 1274 // pointer may have an alignment hint. 1275 if (getParamType(I).isPointer()) 1276 continue; 1277 1278 std::string Name = "p" + utostr(I); 1279 1280 assert(Variables.find(Name) != Variables.end()); 1281 Variable &V = Variables[Name]; 1282 1283 std::string NewName = "s" + utostr(I); 1284 Variable V2(V.getType(), NewName + VariablePostfix); 1285 1286 OS << " " << V2.getType().str() << " " << V2.getName() << " = " 1287 << V.getName() << ";"; 1288 emitNewLine(); 1289 1290 V = V2; 1291 } 1292 } 1293 1294 bool Intrinsic::protoHasScalar() const { 1295 return llvm::any_of( 1296 Types, [](const Type &T) { return T.isScalar() && !T.isImmediate(); }); 1297 } 1298 1299 void Intrinsic::emitBodyAsBuiltinCall() { 1300 std::string S; 1301 1302 // If this builtin returns a struct 2, 3, or 4 vectors, pass it as an implicit 1303 // sret-like argument. 1304 bool SRet = getReturnType().getNumVectors() >= 2; 1305 1306 StringRef N = Name; 1307 ClassKind LocalCK = CK; 1308 if (!protoHasScalar()) 1309 LocalCK = ClassB; 1310 1311 if (!getReturnType().isVoid() && !SRet) 1312 S += "(" + RetVar.getType().str() + ") "; 1313 1314 S += "__builtin_neon_" + mangleName(std::string(N), LocalCK) + "("; 1315 1316 if (SRet) 1317 S += "&" + RetVar.getName() + ", "; 1318 1319 for (unsigned I = 0; I < getNumParams(); ++I) { 1320 Variable &V = Variables["p" + utostr(I)]; 1321 Type T = V.getType(); 1322 1323 // Handle multiple-vector values specially, emitting each subvector as an 1324 // argument to the builtin. 1325 if (T.getNumVectors() > 1) { 1326 // Check if an explicit cast is needed. 1327 std::string Cast; 1328 if (LocalCK == ClassB) { 1329 Type T2 = T; 1330 T2.makeOneVector(); 1331 T2.makeInteger(8, /*Sign=*/true); 1332 Cast = "(" + T2.str() + ")"; 1333 } 1334 1335 for (unsigned J = 0; J < T.getNumVectors(); ++J) 1336 S += Cast + V.getName() + ".val[" + utostr(J) + "], "; 1337 continue; 1338 } 1339 1340 std::string Arg = V.getName(); 1341 Type CastToType = T; 1342 1343 // Check if an explicit cast is needed. 1344 if (CastToType.isVector() && 1345 (LocalCK == ClassB || (T.isHalf() && !T.isScalarForMangling()))) { 1346 CastToType.makeInteger(8, true); 1347 Arg = "(" + CastToType.str() + ")" + Arg; 1348 } else if (CastToType.isVector() && LocalCK == ClassI) { 1349 if (CastToType.isInteger()) 1350 CastToType.makeSigned(); 1351 Arg = "(" + CastToType.str() + ")" + Arg; 1352 } 1353 1354 S += Arg + ", "; 1355 } 1356 1357 // Extra constant integer to hold type class enum for this function, e.g. s8 1358 if (getClassKind(true) == ClassB) { 1359 S += utostr(getPolymorphicKeyType().getNeonEnum()); 1360 } else { 1361 // Remove extraneous ", ". 1362 S.pop_back(); 1363 S.pop_back(); 1364 } 1365 S += ");"; 1366 1367 std::string RetExpr; 1368 if (!SRet && !RetVar.getType().isVoid()) 1369 RetExpr = RetVar.getName() + " = "; 1370 1371 OS << " " << RetExpr << S; 1372 emitNewLine(); 1373 } 1374 1375 void Intrinsic::emitBody(StringRef CallPrefix) { 1376 std::vector<std::string> Lines; 1377 1378 if (!Body || Body->getValues().empty()) { 1379 // Nothing specific to output - must output a builtin. 1380 emitBodyAsBuiltinCall(); 1381 return; 1382 } 1383 1384 // We have a list of "things to output". The last should be returned. 1385 for (auto *I : Body->getValues()) { 1386 if (StringInit *SI = dyn_cast<StringInit>(I)) { 1387 Lines.push_back(replaceParamsIn(SI->getAsString())); 1388 } else if (DagInit *DI = dyn_cast<DagInit>(I)) { 1389 DagEmitter DE(*this, CallPrefix); 1390 Lines.push_back(DE.emitDag(DI).second + ";"); 1391 } 1392 } 1393 1394 assert(!Lines.empty() && "Empty def?"); 1395 if (!RetVar.getType().isVoid()) 1396 Lines.back().insert(0, RetVar.getName() + " = "); 1397 1398 for (auto &L : Lines) { 1399 OS << " " << L; 1400 emitNewLine(); 1401 } 1402 } 1403 1404 void Intrinsic::emitReturn() { 1405 if (RetVar.getType().isVoid()) 1406 return; 1407 if (UseMacro) 1408 OS << " " << RetVar.getName() << ";"; 1409 else 1410 OS << " return " << RetVar.getName() << ";"; 1411 emitNewLine(); 1412 } 1413 1414 std::pair<Type, std::string> Intrinsic::DagEmitter::emitDag(DagInit *DI) { 1415 // At this point we should only be seeing a def. 1416 DefInit *DefI = cast<DefInit>(DI->getOperator()); 1417 std::string Op = DefI->getAsString(); 1418 1419 if (Op == "cast" || Op == "bitcast") 1420 return emitDagCast(DI, Op == "bitcast"); 1421 if (Op == "shuffle") 1422 return emitDagShuffle(DI); 1423 if (Op == "dup") 1424 return emitDagDup(DI); 1425 if (Op == "dup_typed") 1426 return emitDagDupTyped(DI); 1427 if (Op == "splat") 1428 return emitDagSplat(DI); 1429 if (Op == "save_temp") 1430 return emitDagSaveTemp(DI); 1431 if (Op == "op") 1432 return emitDagOp(DI); 1433 if (Op == "call" || Op == "call_mangled") 1434 return emitDagCall(DI, Op == "call_mangled"); 1435 if (Op == "name_replace") 1436 return emitDagNameReplace(DI); 1437 if (Op == "literal") 1438 return emitDagLiteral(DI); 1439 assert_with_loc(false, "Unknown operation!"); 1440 return std::make_pair(Type::getVoid(), ""); 1441 } 1442 1443 std::pair<Type, std::string> Intrinsic::DagEmitter::emitDagOp(DagInit *DI) { 1444 std::string Op = cast<StringInit>(DI->getArg(0))->getAsUnquotedString(); 1445 if (DI->getNumArgs() == 2) { 1446 // Unary op. 1447 std::pair<Type, std::string> R = 1448 emitDagArg(DI->getArg(1), std::string(DI->getArgNameStr(1))); 1449 return std::make_pair(R.first, Op + R.second); 1450 } else { 1451 assert(DI->getNumArgs() == 3 && "Can only handle unary and binary ops!"); 1452 std::pair<Type, std::string> R1 = 1453 emitDagArg(DI->getArg(1), std::string(DI->getArgNameStr(1))); 1454 std::pair<Type, std::string> R2 = 1455 emitDagArg(DI->getArg(2), std::string(DI->getArgNameStr(2))); 1456 assert_with_loc(R1.first == R2.first, "Argument type mismatch!"); 1457 return std::make_pair(R1.first, R1.second + " " + Op + " " + R2.second); 1458 } 1459 } 1460 1461 std::pair<Type, std::string> 1462 Intrinsic::DagEmitter::emitDagCall(DagInit *DI, bool MatchMangledName) { 1463 std::vector<Type> Types; 1464 std::vector<std::string> Values; 1465 for (unsigned I = 0; I < DI->getNumArgs() - 1; ++I) { 1466 std::pair<Type, std::string> R = 1467 emitDagArg(DI->getArg(I + 1), std::string(DI->getArgNameStr(I + 1))); 1468 Types.push_back(R.first); 1469 Values.push_back(R.second); 1470 } 1471 1472 // Look up the called intrinsic. 1473 std::string N; 1474 if (StringInit *SI = dyn_cast<StringInit>(DI->getArg(0))) 1475 N = SI->getAsUnquotedString(); 1476 else 1477 N = emitDagArg(DI->getArg(0), "").second; 1478 std::optional<std::string> MangledName; 1479 if (MatchMangledName) { 1480 if (Intr.getRecord()->getValueAsBit("isLaneQ")) 1481 N += "q"; 1482 MangledName = Intr.mangleName(N, ClassS); 1483 } 1484 Intrinsic &Callee = Intr.Emitter.getIntrinsic(N, Types, MangledName); 1485 1486 // Make sure the callee is known as an early def. 1487 Callee.setNeededEarly(); 1488 Intr.Dependencies.insert(&Callee); 1489 1490 // Now create the call itself. 1491 std::string S; 1492 if (!Callee.isBigEndianSafe()) 1493 S += CallPrefix.str(); 1494 S += Callee.getMangledName(true) + "("; 1495 for (unsigned I = 0; I < DI->getNumArgs() - 1; ++I) { 1496 if (I != 0) 1497 S += ", "; 1498 S += Values[I]; 1499 } 1500 S += ")"; 1501 1502 return std::make_pair(Callee.getReturnType(), S); 1503 } 1504 1505 std::pair<Type, std::string> Intrinsic::DagEmitter::emitDagCast(DagInit *DI, 1506 bool IsBitCast){ 1507 // (cast MOD* VAL) -> cast VAL to type given by MOD. 1508 std::pair<Type, std::string> R = 1509 emitDagArg(DI->getArg(DI->getNumArgs() - 1), 1510 std::string(DI->getArgNameStr(DI->getNumArgs() - 1))); 1511 Type castToType = R.first; 1512 for (unsigned ArgIdx = 0; ArgIdx < DI->getNumArgs() - 1; ++ArgIdx) { 1513 1514 // MOD can take several forms: 1515 // 1. $X - take the type of parameter / variable X. 1516 // 2. The value "R" - take the type of the return type. 1517 // 3. a type string 1518 // 4. The value "U" or "S" to switch the signedness. 1519 // 5. The value "H" or "D" to half or double the bitwidth. 1520 // 6. The value "8" to convert to 8-bit (signed) integer lanes. 1521 if (!DI->getArgNameStr(ArgIdx).empty()) { 1522 assert_with_loc(Intr.Variables.find(std::string( 1523 DI->getArgNameStr(ArgIdx))) != Intr.Variables.end(), 1524 "Variable not found"); 1525 castToType = 1526 Intr.Variables[std::string(DI->getArgNameStr(ArgIdx))].getType(); 1527 } else { 1528 StringInit *SI = dyn_cast<StringInit>(DI->getArg(ArgIdx)); 1529 assert_with_loc(SI, "Expected string type or $Name for cast type"); 1530 1531 if (SI->getAsUnquotedString() == "R") { 1532 castToType = Intr.getReturnType(); 1533 } else if (SI->getAsUnquotedString() == "U") { 1534 castToType.makeUnsigned(); 1535 } else if (SI->getAsUnquotedString() == "S") { 1536 castToType.makeSigned(); 1537 } else if (SI->getAsUnquotedString() == "H") { 1538 castToType.halveLanes(); 1539 } else if (SI->getAsUnquotedString() == "D") { 1540 castToType.doubleLanes(); 1541 } else if (SI->getAsUnquotedString() == "8") { 1542 castToType.makeInteger(8, true); 1543 } else if (SI->getAsUnquotedString() == "32") { 1544 castToType.make32BitElement(); 1545 } else { 1546 castToType = Type::fromTypedefName(SI->getAsUnquotedString()); 1547 assert_with_loc(!castToType.isVoid(), "Unknown typedef"); 1548 } 1549 } 1550 } 1551 1552 std::string S; 1553 if (IsBitCast) { 1554 // Emit a reinterpret cast. The second operand must be an lvalue, so create 1555 // a temporary. 1556 std::string N = "reint"; 1557 unsigned I = 0; 1558 while (Intr.Variables.find(N) != Intr.Variables.end()) 1559 N = "reint" + utostr(++I); 1560 Intr.Variables[N] = Variable(R.first, N + Intr.VariablePostfix); 1561 1562 Intr.OS << R.first.str() << " " << Intr.Variables[N].getName() << " = " 1563 << R.second << ";"; 1564 Intr.emitNewLine(); 1565 1566 S = "*(" + castToType.str() + " *) &" + Intr.Variables[N].getName() + ""; 1567 } else { 1568 // Emit a normal (static) cast. 1569 S = "(" + castToType.str() + ")(" + R.second + ")"; 1570 } 1571 1572 return std::make_pair(castToType, S); 1573 } 1574 1575 std::pair<Type, std::string> Intrinsic::DagEmitter::emitDagShuffle(DagInit *DI){ 1576 // See the documentation in arm_neon.td for a description of these operators. 1577 class LowHalf : public SetTheory::Operator { 1578 public: 1579 void apply(SetTheory &ST, DagInit *Expr, SetTheory::RecSet &Elts, 1580 ArrayRef<SMLoc> Loc) override { 1581 SetTheory::RecSet Elts2; 1582 ST.evaluate(Expr->arg_begin(), Expr->arg_end(), Elts2, Loc); 1583 Elts.insert(Elts2.begin(), Elts2.begin() + (Elts2.size() / 2)); 1584 } 1585 }; 1586 1587 class HighHalf : public SetTheory::Operator { 1588 public: 1589 void apply(SetTheory &ST, DagInit *Expr, SetTheory::RecSet &Elts, 1590 ArrayRef<SMLoc> Loc) override { 1591 SetTheory::RecSet Elts2; 1592 ST.evaluate(Expr->arg_begin(), Expr->arg_end(), Elts2, Loc); 1593 Elts.insert(Elts2.begin() + (Elts2.size() / 2), Elts2.end()); 1594 } 1595 }; 1596 1597 class Rev : public SetTheory::Operator { 1598 unsigned ElementSize; 1599 1600 public: 1601 Rev(unsigned ElementSize) : ElementSize(ElementSize) {} 1602 1603 void apply(SetTheory &ST, DagInit *Expr, SetTheory::RecSet &Elts, 1604 ArrayRef<SMLoc> Loc) override { 1605 SetTheory::RecSet Elts2; 1606 ST.evaluate(Expr->arg_begin() + 1, Expr->arg_end(), Elts2, Loc); 1607 1608 int64_t VectorSize = cast<IntInit>(Expr->getArg(0))->getValue(); 1609 VectorSize /= ElementSize; 1610 1611 std::vector<Record *> Revved; 1612 for (unsigned VI = 0; VI < Elts2.size(); VI += VectorSize) { 1613 for (int LI = VectorSize - 1; LI >= 0; --LI) { 1614 Revved.push_back(Elts2[VI + LI]); 1615 } 1616 } 1617 1618 Elts.insert(Revved.begin(), Revved.end()); 1619 } 1620 }; 1621 1622 class MaskExpander : public SetTheory::Expander { 1623 unsigned N; 1624 1625 public: 1626 MaskExpander(unsigned N) : N(N) {} 1627 1628 void expand(SetTheory &ST, Record *R, SetTheory::RecSet &Elts) override { 1629 unsigned Addend = 0; 1630 if (R->getName() == "mask0") 1631 Addend = 0; 1632 else if (R->getName() == "mask1") 1633 Addend = N; 1634 else 1635 return; 1636 for (unsigned I = 0; I < N; ++I) 1637 Elts.insert(R->getRecords().getDef("sv" + utostr(I + Addend))); 1638 } 1639 }; 1640 1641 // (shuffle arg1, arg2, sequence) 1642 std::pair<Type, std::string> Arg1 = 1643 emitDagArg(DI->getArg(0), std::string(DI->getArgNameStr(0))); 1644 std::pair<Type, std::string> Arg2 = 1645 emitDagArg(DI->getArg(1), std::string(DI->getArgNameStr(1))); 1646 assert_with_loc(Arg1.first == Arg2.first, 1647 "Different types in arguments to shuffle!"); 1648 1649 SetTheory ST; 1650 SetTheory::RecSet Elts; 1651 ST.addOperator("lowhalf", std::make_unique<LowHalf>()); 1652 ST.addOperator("highhalf", std::make_unique<HighHalf>()); 1653 ST.addOperator("rev", 1654 std::make_unique<Rev>(Arg1.first.getElementSizeInBits())); 1655 ST.addExpander("MaskExpand", 1656 std::make_unique<MaskExpander>(Arg1.first.getNumElements())); 1657 ST.evaluate(DI->getArg(2), Elts, std::nullopt); 1658 1659 std::string S = "__builtin_shufflevector(" + Arg1.second + ", " + Arg2.second; 1660 for (auto &E : Elts) { 1661 StringRef Name = E->getName(); 1662 assert_with_loc(Name.starts_with("sv"), 1663 "Incorrect element kind in shuffle mask!"); 1664 S += ", " + Name.drop_front(2).str(); 1665 } 1666 S += ")"; 1667 1668 // Recalculate the return type - the shuffle may have halved or doubled it. 1669 Type T(Arg1.first); 1670 if (Elts.size() > T.getNumElements()) { 1671 assert_with_loc( 1672 Elts.size() == T.getNumElements() * 2, 1673 "Can only double or half the number of elements in a shuffle!"); 1674 T.doubleLanes(); 1675 } else if (Elts.size() < T.getNumElements()) { 1676 assert_with_loc( 1677 Elts.size() == T.getNumElements() / 2, 1678 "Can only double or half the number of elements in a shuffle!"); 1679 T.halveLanes(); 1680 } 1681 1682 return std::make_pair(T, S); 1683 } 1684 1685 std::pair<Type, std::string> Intrinsic::DagEmitter::emitDagDup(DagInit *DI) { 1686 assert_with_loc(DI->getNumArgs() == 1, "dup() expects one argument"); 1687 std::pair<Type, std::string> A = 1688 emitDagArg(DI->getArg(0), std::string(DI->getArgNameStr(0))); 1689 assert_with_loc(A.first.isScalar(), "dup() expects a scalar argument"); 1690 1691 Type T = Intr.getBaseType(); 1692 assert_with_loc(T.isVector(), "dup() used but default type is scalar!"); 1693 std::string S = "(" + T.str() + ") {"; 1694 for (unsigned I = 0; I < T.getNumElements(); ++I) { 1695 if (I != 0) 1696 S += ", "; 1697 S += A.second; 1698 } 1699 S += "}"; 1700 1701 return std::make_pair(T, S); 1702 } 1703 1704 std::pair<Type, std::string> Intrinsic::DagEmitter::emitDagDupTyped(DagInit *DI) { 1705 assert_with_loc(DI->getNumArgs() == 2, "dup_typed() expects two arguments"); 1706 std::pair<Type, std::string> B = 1707 emitDagArg(DI->getArg(1), std::string(DI->getArgNameStr(1))); 1708 assert_with_loc(B.first.isScalar(), 1709 "dup_typed() requires a scalar as the second argument"); 1710 Type T; 1711 // If the type argument is a constant string, construct the type directly. 1712 if (StringInit *SI = dyn_cast<StringInit>(DI->getArg(0))) { 1713 T = Type::fromTypedefName(SI->getAsUnquotedString()); 1714 assert_with_loc(!T.isVoid(), "Unknown typedef"); 1715 } else 1716 T = emitDagArg(DI->getArg(0), std::string(DI->getArgNameStr(0))).first; 1717 1718 assert_with_loc(T.isVector(), "dup_typed() used but target type is scalar!"); 1719 std::string S = "(" + T.str() + ") {"; 1720 for (unsigned I = 0; I < T.getNumElements(); ++I) { 1721 if (I != 0) 1722 S += ", "; 1723 S += B.second; 1724 } 1725 S += "}"; 1726 1727 return std::make_pair(T, S); 1728 } 1729 1730 std::pair<Type, std::string> Intrinsic::DagEmitter::emitDagSplat(DagInit *DI) { 1731 assert_with_loc(DI->getNumArgs() == 2, "splat() expects two arguments"); 1732 std::pair<Type, std::string> A = 1733 emitDagArg(DI->getArg(0), std::string(DI->getArgNameStr(0))); 1734 std::pair<Type, std::string> B = 1735 emitDagArg(DI->getArg(1), std::string(DI->getArgNameStr(1))); 1736 1737 assert_with_loc(B.first.isScalar(), 1738 "splat() requires a scalar int as the second argument"); 1739 1740 std::string S = "__builtin_shufflevector(" + A.second + ", " + A.second; 1741 for (unsigned I = 0; I < Intr.getBaseType().getNumElements(); ++I) { 1742 S += ", " + B.second; 1743 } 1744 S += ")"; 1745 1746 return std::make_pair(Intr.getBaseType(), S); 1747 } 1748 1749 std::pair<Type, std::string> Intrinsic::DagEmitter::emitDagSaveTemp(DagInit *DI) { 1750 assert_with_loc(DI->getNumArgs() == 2, "save_temp() expects two arguments"); 1751 std::pair<Type, std::string> A = 1752 emitDagArg(DI->getArg(1), std::string(DI->getArgNameStr(1))); 1753 1754 assert_with_loc(!A.first.isVoid(), 1755 "Argument to save_temp() must have non-void type!"); 1756 1757 std::string N = std::string(DI->getArgNameStr(0)); 1758 assert_with_loc(!N.empty(), 1759 "save_temp() expects a name as the first argument"); 1760 1761 assert_with_loc(Intr.Variables.find(N) == Intr.Variables.end(), 1762 "Variable already defined!"); 1763 Intr.Variables[N] = Variable(A.first, N + Intr.VariablePostfix); 1764 1765 std::string S = 1766 A.first.str() + " " + Intr.Variables[N].getName() + " = " + A.second; 1767 1768 return std::make_pair(Type::getVoid(), S); 1769 } 1770 1771 std::pair<Type, std::string> 1772 Intrinsic::DagEmitter::emitDagNameReplace(DagInit *DI) { 1773 std::string S = Intr.Name; 1774 1775 assert_with_loc(DI->getNumArgs() == 2, "name_replace requires 2 arguments!"); 1776 std::string ToReplace = cast<StringInit>(DI->getArg(0))->getAsUnquotedString(); 1777 std::string ReplaceWith = cast<StringInit>(DI->getArg(1))->getAsUnquotedString(); 1778 1779 size_t Idx = S.find(ToReplace); 1780 1781 assert_with_loc(Idx != std::string::npos, "name should contain '" + ToReplace + "'!"); 1782 S.replace(Idx, ToReplace.size(), ReplaceWith); 1783 1784 return std::make_pair(Type::getVoid(), S); 1785 } 1786 1787 std::pair<Type, std::string> Intrinsic::DagEmitter::emitDagLiteral(DagInit *DI){ 1788 std::string Ty = cast<StringInit>(DI->getArg(0))->getAsUnquotedString(); 1789 std::string Value = cast<StringInit>(DI->getArg(1))->getAsUnquotedString(); 1790 return std::make_pair(Type::fromTypedefName(Ty), Value); 1791 } 1792 1793 std::pair<Type, std::string> 1794 Intrinsic::DagEmitter::emitDagArg(Init *Arg, std::string ArgName) { 1795 if (!ArgName.empty()) { 1796 assert_with_loc(!Arg->isComplete(), 1797 "Arguments must either be DAGs or names, not both!"); 1798 assert_with_loc(Intr.Variables.find(ArgName) != Intr.Variables.end(), 1799 "Variable not defined!"); 1800 Variable &V = Intr.Variables[ArgName]; 1801 return std::make_pair(V.getType(), V.getName()); 1802 } 1803 1804 assert(Arg && "Neither ArgName nor Arg?!"); 1805 DagInit *DI = dyn_cast<DagInit>(Arg); 1806 assert_with_loc(DI, "Arguments must either be DAGs or names!"); 1807 1808 return emitDag(DI); 1809 } 1810 1811 std::string Intrinsic::generate() { 1812 // Avoid duplicated code for big and little endian 1813 if (isBigEndianSafe()) { 1814 generateImpl(false, "", ""); 1815 return OS.str(); 1816 } 1817 // Little endian intrinsics are simple and don't require any argument 1818 // swapping. 1819 OS << "#ifdef __LITTLE_ENDIAN__\n"; 1820 1821 generateImpl(false, "", ""); 1822 1823 OS << "#else\n"; 1824 1825 // Big endian intrinsics are more complex. The user intended these 1826 // intrinsics to operate on a vector "as-if" loaded by (V)LDR, 1827 // but we load as-if (V)LD1. So we should swap all arguments and 1828 // swap the return value too. 1829 // 1830 // If we call sub-intrinsics, we should call a version that does 1831 // not re-swap the arguments! 1832 generateImpl(true, "", "__noswap_"); 1833 1834 // If we're needed early, create a non-swapping variant for 1835 // big-endian. 1836 if (NeededEarly) { 1837 generateImpl(false, "__noswap_", "__noswap_"); 1838 } 1839 OS << "#endif\n\n"; 1840 1841 return OS.str(); 1842 } 1843 1844 void Intrinsic::generateImpl(bool ReverseArguments, 1845 StringRef NamePrefix, StringRef CallPrefix) { 1846 CurrentRecord = R; 1847 1848 // If we call a macro, our local variables may be corrupted due to 1849 // lack of proper lexical scoping. So, add a globally unique postfix 1850 // to every variable. 1851 // 1852 // indexBody() should have set up the Dependencies set by now. 1853 for (auto *I : Dependencies) 1854 if (I->UseMacro) { 1855 VariablePostfix = "_" + utostr(Emitter.getUniqueNumber()); 1856 break; 1857 } 1858 1859 initVariables(); 1860 1861 emitPrototype(NamePrefix); 1862 1863 if (IsUnavailable) { 1864 OS << " __attribute__((unavailable));"; 1865 } else { 1866 emitOpeningBrace(); 1867 // Emit return variable declaration first as to not trigger 1868 // -Wdeclaration-after-statement. 1869 emitReturnVarDecl(); 1870 emitShadowedArgs(); 1871 if (ReverseArguments) 1872 emitArgumentReversal(); 1873 emitBody(CallPrefix); 1874 if (ReverseArguments) 1875 emitReturnReversal(); 1876 emitReturn(); 1877 emitClosingBrace(); 1878 } 1879 OS << "\n"; 1880 1881 CurrentRecord = nullptr; 1882 } 1883 1884 void Intrinsic::indexBody() { 1885 CurrentRecord = R; 1886 1887 initVariables(); 1888 // Emit return variable declaration first as to not trigger 1889 // -Wdeclaration-after-statement. 1890 emitReturnVarDecl(); 1891 emitBody(""); 1892 OS.str(""); 1893 1894 CurrentRecord = nullptr; 1895 } 1896 1897 //===----------------------------------------------------------------------===// 1898 // NeonEmitter implementation 1899 //===----------------------------------------------------------------------===// 1900 1901 Intrinsic &NeonEmitter::getIntrinsic(StringRef Name, ArrayRef<Type> Types, 1902 std::optional<std::string> MangledName) { 1903 // First, look up the name in the intrinsic map. 1904 assert_with_loc(IntrinsicMap.find(Name.str()) != IntrinsicMap.end(), 1905 ("Intrinsic '" + Name + "' not found!").str()); 1906 auto &V = IntrinsicMap.find(Name.str())->second; 1907 std::vector<Intrinsic *> GoodVec; 1908 1909 // Create a string to print if we end up failing. 1910 std::string ErrMsg = "looking up intrinsic '" + Name.str() + "("; 1911 for (unsigned I = 0; I < Types.size(); ++I) { 1912 if (I != 0) 1913 ErrMsg += ", "; 1914 ErrMsg += Types[I].str(); 1915 } 1916 ErrMsg += ")'\n"; 1917 ErrMsg += "Available overloads:\n"; 1918 1919 // Now, look through each intrinsic implementation and see if the types are 1920 // compatible. 1921 for (auto &I : V) { 1922 ErrMsg += " - " + I.getReturnType().str() + " " + I.getMangledName(); 1923 ErrMsg += "("; 1924 for (unsigned A = 0; A < I.getNumParams(); ++A) { 1925 if (A != 0) 1926 ErrMsg += ", "; 1927 ErrMsg += I.getParamType(A).str(); 1928 } 1929 ErrMsg += ")\n"; 1930 1931 if (MangledName && MangledName != I.getMangledName(true)) 1932 continue; 1933 1934 if (I.getNumParams() != Types.size()) 1935 continue; 1936 1937 unsigned ArgNum = 0; 1938 bool MatchingArgumentTypes = llvm::all_of(Types, [&](const auto &Type) { 1939 return Type == I.getParamType(ArgNum++); 1940 }); 1941 1942 if (MatchingArgumentTypes) 1943 GoodVec.push_back(&I); 1944 } 1945 1946 assert_with_loc(!GoodVec.empty(), 1947 "No compatible intrinsic found - " + ErrMsg); 1948 assert_with_loc(GoodVec.size() == 1, "Multiple overloads found - " + ErrMsg); 1949 1950 return *GoodVec.front(); 1951 } 1952 1953 void NeonEmitter::createIntrinsic(Record *R, 1954 SmallVectorImpl<Intrinsic *> &Out) { 1955 std::string Name = std::string(R->getValueAsString("Name")); 1956 std::string Proto = std::string(R->getValueAsString("Prototype")); 1957 std::string Types = std::string(R->getValueAsString("Types")); 1958 Record *OperationRec = R->getValueAsDef("Operation"); 1959 bool BigEndianSafe = R->getValueAsBit("BigEndianSafe"); 1960 std::string ArchGuard = std::string(R->getValueAsString("ArchGuard")); 1961 std::string TargetGuard = std::string(R->getValueAsString("TargetGuard")); 1962 bool IsUnavailable = OperationRec->getValueAsBit("Unavailable"); 1963 std::string CartesianProductWith = std::string(R->getValueAsString("CartesianProductWith")); 1964 1965 // Set the global current record. This allows assert_with_loc to produce 1966 // decent location information even when highly nested. 1967 CurrentRecord = R; 1968 1969 ListInit *Body = OperationRec->getValueAsListInit("Ops"); 1970 1971 std::vector<TypeSpec> TypeSpecs = TypeSpec::fromTypeSpecs(Types); 1972 1973 ClassKind CK = ClassNone; 1974 if (R->getSuperClasses().size() >= 2) 1975 CK = ClassMap[R->getSuperClasses()[1].first]; 1976 1977 std::vector<std::pair<TypeSpec, TypeSpec>> NewTypeSpecs; 1978 if (!CartesianProductWith.empty()) { 1979 std::vector<TypeSpec> ProductTypeSpecs = TypeSpec::fromTypeSpecs(CartesianProductWith); 1980 for (auto TS : TypeSpecs) { 1981 Type DefaultT(TS, "."); 1982 for (auto SrcTS : ProductTypeSpecs) { 1983 Type DefaultSrcT(SrcTS, "."); 1984 if (TS == SrcTS || 1985 DefaultSrcT.getSizeInBits() != DefaultT.getSizeInBits()) 1986 continue; 1987 NewTypeSpecs.push_back(std::make_pair(TS, SrcTS)); 1988 } 1989 } 1990 } else { 1991 for (auto TS : TypeSpecs) { 1992 NewTypeSpecs.push_back(std::make_pair(TS, TS)); 1993 } 1994 } 1995 1996 llvm::sort(NewTypeSpecs); 1997 NewTypeSpecs.erase(std::unique(NewTypeSpecs.begin(), NewTypeSpecs.end()), 1998 NewTypeSpecs.end()); 1999 auto &Entry = IntrinsicMap[Name]; 2000 2001 for (auto &I : NewTypeSpecs) { 2002 Entry.emplace_back(R, Name, Proto, I.first, I.second, CK, Body, *this, 2003 ArchGuard, TargetGuard, IsUnavailable, BigEndianSafe); 2004 Out.push_back(&Entry.back()); 2005 } 2006 2007 CurrentRecord = nullptr; 2008 } 2009 2010 /// genBuiltinsDef: Generate the BuiltinsARM.def and BuiltinsAArch64.def 2011 /// declaration of builtins, checking for unique builtin declarations. 2012 void NeonEmitter::genBuiltinsDef(raw_ostream &OS, 2013 SmallVectorImpl<Intrinsic *> &Defs) { 2014 OS << "#ifdef GET_NEON_BUILTINS\n"; 2015 2016 // We only want to emit a builtin once, and we want to emit them in 2017 // alphabetical order, so use a std::set. 2018 std::set<std::pair<std::string, std::string>> Builtins; 2019 2020 for (auto *Def : Defs) { 2021 if (Def->hasBody()) 2022 continue; 2023 2024 std::string S = "__builtin_neon_" + Def->getMangledName() + ", \""; 2025 S += Def->getBuiltinTypeStr(); 2026 S += "\", \"n\""; 2027 2028 Builtins.emplace(S, Def->getTargetGuard()); 2029 } 2030 2031 for (auto &S : Builtins) { 2032 if (S.second == "") 2033 OS << "BUILTIN("; 2034 else 2035 OS << "TARGET_BUILTIN("; 2036 OS << S.first; 2037 if (S.second == "") 2038 OS << ")\n"; 2039 else 2040 OS << ", \"" << S.second << "\")\n"; 2041 } 2042 2043 OS << "#endif\n\n"; 2044 } 2045 2046 void NeonEmitter::genStreamingSVECompatibleList( 2047 raw_ostream &OS, SmallVectorImpl<Intrinsic *> &Defs) { 2048 OS << "#ifdef GET_NEON_STREAMING_COMPAT_FLAG\n"; 2049 2050 std::set<std::string> Emitted; 2051 for (auto *Def : Defs) { 2052 // If the def has a body (that is, it has Operation DAGs), it won't call 2053 // __builtin_neon_* so we don't need to generate a definition for it. 2054 if (Def->hasBody()) 2055 continue; 2056 2057 std::string Name = Def->getMangledName(); 2058 if (Emitted.find(Name) != Emitted.end()) 2059 continue; 2060 2061 // FIXME: We should make exceptions here for some NEON builtins that are 2062 // permitted in streaming mode. 2063 OS << "case NEON::BI__builtin_neon_" << Name 2064 << ": BuiltinType = ArmNonStreaming; break;\n"; 2065 Emitted.insert(Name); 2066 } 2067 OS << "#endif\n\n"; 2068 } 2069 2070 /// Generate the ARM and AArch64 overloaded type checking code for 2071 /// SemaChecking.cpp, checking for unique builtin declarations. 2072 void NeonEmitter::genOverloadTypeCheckCode(raw_ostream &OS, 2073 SmallVectorImpl<Intrinsic *> &Defs) { 2074 OS << "#ifdef GET_NEON_OVERLOAD_CHECK\n"; 2075 2076 // We record each overload check line before emitting because subsequent Inst 2077 // definitions may extend the number of permitted types (i.e. augment the 2078 // Mask). Use std::map to avoid sorting the table by hash number. 2079 struct OverloadInfo { 2080 uint64_t Mask = 0ULL; 2081 int PtrArgNum = 0; 2082 bool HasConstPtr = false; 2083 OverloadInfo() = default; 2084 }; 2085 std::map<std::string, OverloadInfo> OverloadMap; 2086 2087 for (auto *Def : Defs) { 2088 // If the def has a body (that is, it has Operation DAGs), it won't call 2089 // __builtin_neon_* so we don't need to generate a definition for it. 2090 if (Def->hasBody()) 2091 continue; 2092 // Functions which have a scalar argument cannot be overloaded, no need to 2093 // check them if we are emitting the type checking code. 2094 if (Def->protoHasScalar()) 2095 continue; 2096 2097 uint64_t Mask = 0ULL; 2098 Mask |= 1ULL << Def->getPolymorphicKeyType().getNeonEnum(); 2099 2100 // Check if the function has a pointer or const pointer argument. 2101 int PtrArgNum = -1; 2102 bool HasConstPtr = false; 2103 for (unsigned I = 0; I < Def->getNumParams(); ++I) { 2104 const auto &Type = Def->getParamType(I); 2105 if (Type.isPointer()) { 2106 PtrArgNum = I; 2107 HasConstPtr = Type.isConstPointer(); 2108 } 2109 } 2110 2111 // For sret builtins, adjust the pointer argument index. 2112 if (PtrArgNum >= 0 && Def->getReturnType().getNumVectors() > 1) 2113 PtrArgNum += 1; 2114 2115 std::string Name = Def->getName(); 2116 // Omit type checking for the pointer arguments of vld1_lane, vld1_dup, 2117 // vst1_lane, vldap1_lane, and vstl1_lane intrinsics. Using a pointer to 2118 // the vector element type with one of those operations causes codegen to 2119 // select an aligned load/store instruction. If you want an unaligned 2120 // operation, the pointer argument needs to have less alignment than element 2121 // type, so just accept any pointer type. 2122 if (Name == "vld1_lane" || Name == "vld1_dup" || Name == "vst1_lane" || 2123 Name == "vldap1_lane" || Name == "vstl1_lane") { 2124 PtrArgNum = -1; 2125 HasConstPtr = false; 2126 } 2127 2128 if (Mask) { 2129 std::string Name = Def->getMangledName(); 2130 OverloadMap.insert(std::make_pair(Name, OverloadInfo())); 2131 OverloadInfo &OI = OverloadMap[Name]; 2132 OI.Mask |= Mask; 2133 OI.PtrArgNum |= PtrArgNum; 2134 OI.HasConstPtr = HasConstPtr; 2135 } 2136 } 2137 2138 for (auto &I : OverloadMap) { 2139 OverloadInfo &OI = I.second; 2140 2141 OS << "case NEON::BI__builtin_neon_" << I.first << ": "; 2142 OS << "mask = 0x" << Twine::utohexstr(OI.Mask) << "ULL"; 2143 if (OI.PtrArgNum >= 0) 2144 OS << "; PtrArgNum = " << OI.PtrArgNum; 2145 if (OI.HasConstPtr) 2146 OS << "; HasConstPtr = true"; 2147 OS << "; break;\n"; 2148 } 2149 OS << "#endif\n\n"; 2150 } 2151 2152 void NeonEmitter::genIntrinsicRangeCheckCode(raw_ostream &OS, 2153 SmallVectorImpl<Intrinsic *> &Defs) { 2154 OS << "#ifdef GET_NEON_IMMEDIATE_CHECK\n"; 2155 2156 std::set<std::string> Emitted; 2157 2158 for (auto *Def : Defs) { 2159 if (Def->hasBody()) 2160 continue; 2161 // Functions which do not have an immediate do not need to have range 2162 // checking code emitted. 2163 if (!Def->hasImmediate()) 2164 continue; 2165 if (Emitted.find(Def->getMangledName()) != Emitted.end()) 2166 continue; 2167 2168 std::string LowerBound, UpperBound; 2169 2170 Record *R = Def->getRecord(); 2171 if (R->getValueAsBit("isVXAR")) { 2172 //VXAR takes an immediate in the range [0, 63] 2173 LowerBound = "0"; 2174 UpperBound = "63"; 2175 } else if (R->getValueAsBit("isVCVT_N")) { 2176 // VCVT between floating- and fixed-point values takes an immediate 2177 // in the range [1, 32) for f32 or [1, 64) for f64 or [1, 16) for f16. 2178 LowerBound = "1"; 2179 if (Def->getBaseType().getElementSizeInBits() == 16 || 2180 Def->getName().find('h') != std::string::npos) 2181 // VCVTh operating on FP16 intrinsics in range [1, 16) 2182 UpperBound = "15"; 2183 else if (Def->getBaseType().getElementSizeInBits() == 32) 2184 UpperBound = "31"; 2185 else 2186 UpperBound = "63"; 2187 } else if (R->getValueAsBit("isScalarShift")) { 2188 // Right shifts have an 'r' in the name, left shifts do not. Convert 2189 // instructions have the same bounds and right shifts. 2190 if (Def->getName().find('r') != std::string::npos || 2191 Def->getName().find("cvt") != std::string::npos) 2192 LowerBound = "1"; 2193 2194 UpperBound = utostr(Def->getReturnType().getElementSizeInBits() - 1); 2195 } else if (R->getValueAsBit("isShift")) { 2196 // Builtins which are overloaded by type will need to have their upper 2197 // bound computed at Sema time based on the type constant. 2198 2199 // Right shifts have an 'r' in the name, left shifts do not. 2200 if (Def->getName().find('r') != std::string::npos) 2201 LowerBound = "1"; 2202 UpperBound = "RFT(TV, true)"; 2203 } else if (Def->getClassKind(true) == ClassB) { 2204 // ClassB intrinsics have a type (and hence lane number) that is only 2205 // known at runtime. 2206 if (R->getValueAsBit("isLaneQ")) 2207 UpperBound = "RFT(TV, false, true)"; 2208 else 2209 UpperBound = "RFT(TV, false, false)"; 2210 } else { 2211 // The immediate generally refers to a lane in the preceding argument. 2212 assert(Def->getImmediateIdx() > 0); 2213 Type T = Def->getParamType(Def->getImmediateIdx() - 1); 2214 UpperBound = utostr(T.getNumElements() - 1); 2215 } 2216 2217 // Calculate the index of the immediate that should be range checked. 2218 unsigned Idx = Def->getNumParams(); 2219 if (Def->hasImmediate()) 2220 Idx = Def->getGeneratedParamIdx(Def->getImmediateIdx()); 2221 2222 OS << "case NEON::BI__builtin_neon_" << Def->getMangledName() << ": " 2223 << "i = " << Idx << ";"; 2224 if (!LowerBound.empty()) 2225 OS << " l = " << LowerBound << ";"; 2226 if (!UpperBound.empty()) 2227 OS << " u = " << UpperBound << ";"; 2228 OS << " break;\n"; 2229 2230 Emitted.insert(Def->getMangledName()); 2231 } 2232 2233 OS << "#endif\n\n"; 2234 } 2235 2236 /// runHeader - Emit a file with sections defining: 2237 /// 1. the NEON section of BuiltinsARM.def and BuiltinsAArch64.def. 2238 /// 2. the SemaChecking code for the type overload checking. 2239 /// 3. the SemaChecking code for validation of intrinsic immediate arguments. 2240 void NeonEmitter::runHeader(raw_ostream &OS) { 2241 std::vector<Record *> RV = Records.getAllDerivedDefinitions("Inst"); 2242 2243 SmallVector<Intrinsic *, 128> Defs; 2244 for (auto *R : RV) 2245 createIntrinsic(R, Defs); 2246 2247 // Generate shared BuiltinsXXX.def 2248 genBuiltinsDef(OS, Defs); 2249 2250 // Generate ARM overloaded type checking code for SemaChecking.cpp 2251 genOverloadTypeCheckCode(OS, Defs); 2252 2253 genStreamingSVECompatibleList(OS, Defs); 2254 2255 // Generate ARM range checking code for shift/lane immediates. 2256 genIntrinsicRangeCheckCode(OS, Defs); 2257 } 2258 2259 static void emitNeonTypeDefs(const std::string& types, raw_ostream &OS) { 2260 std::string TypedefTypes(types); 2261 std::vector<TypeSpec> TDTypeVec = TypeSpec::fromTypeSpecs(TypedefTypes); 2262 2263 // Emit vector typedefs. 2264 bool InIfdef = false; 2265 for (auto &TS : TDTypeVec) { 2266 bool IsA64 = false; 2267 Type T(TS, "."); 2268 if (T.isDouble()) 2269 IsA64 = true; 2270 2271 if (InIfdef && !IsA64) { 2272 OS << "#endif\n"; 2273 InIfdef = false; 2274 } 2275 if (!InIfdef && IsA64) { 2276 OS << "#ifdef __aarch64__\n"; 2277 InIfdef = true; 2278 } 2279 2280 if (T.isPoly()) 2281 OS << "typedef __attribute__((neon_polyvector_type("; 2282 else 2283 OS << "typedef __attribute__((neon_vector_type("; 2284 2285 Type T2 = T; 2286 T2.makeScalar(); 2287 OS << T.getNumElements() << "))) "; 2288 OS << T2.str(); 2289 OS << " " << T.str() << ";\n"; 2290 } 2291 if (InIfdef) 2292 OS << "#endif\n"; 2293 OS << "\n"; 2294 2295 // Emit struct typedefs. 2296 InIfdef = false; 2297 for (unsigned NumMembers = 2; NumMembers <= 4; ++NumMembers) { 2298 for (auto &TS : TDTypeVec) { 2299 bool IsA64 = false; 2300 Type T(TS, "."); 2301 if (T.isDouble()) 2302 IsA64 = true; 2303 2304 if (InIfdef && !IsA64) { 2305 OS << "#endif\n"; 2306 InIfdef = false; 2307 } 2308 if (!InIfdef && IsA64) { 2309 OS << "#ifdef __aarch64__\n"; 2310 InIfdef = true; 2311 } 2312 2313 const char Mods[] = { static_cast<char>('2' + (NumMembers - 2)), 0}; 2314 Type VT(TS, Mods); 2315 OS << "typedef struct " << VT.str() << " {\n"; 2316 OS << " " << T.str() << " val"; 2317 OS << "[" << NumMembers << "]"; 2318 OS << ";\n} "; 2319 OS << VT.str() << ";\n"; 2320 OS << "\n"; 2321 } 2322 } 2323 if (InIfdef) 2324 OS << "#endif\n"; 2325 } 2326 2327 /// run - Read the records in arm_neon.td and output arm_neon.h. arm_neon.h 2328 /// is comprised of type definitions and function declarations. 2329 void NeonEmitter::run(raw_ostream &OS) { 2330 OS << "/*===---- arm_neon.h - ARM Neon intrinsics " 2331 "------------------------------" 2332 "---===\n" 2333 " *\n" 2334 " * Permission is hereby granted, free of charge, to any person " 2335 "obtaining " 2336 "a copy\n" 2337 " * of this software and associated documentation files (the " 2338 "\"Software\")," 2339 " to deal\n" 2340 " * in the Software without restriction, including without limitation " 2341 "the " 2342 "rights\n" 2343 " * to use, copy, modify, merge, publish, distribute, sublicense, " 2344 "and/or sell\n" 2345 " * copies of the Software, and to permit persons to whom the Software " 2346 "is\n" 2347 " * furnished to do so, subject to the following conditions:\n" 2348 " *\n" 2349 " * The above copyright notice and this permission notice shall be " 2350 "included in\n" 2351 " * all copies or substantial portions of the Software.\n" 2352 " *\n" 2353 " * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, " 2354 "EXPRESS OR\n" 2355 " * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF " 2356 "MERCHANTABILITY,\n" 2357 " * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT " 2358 "SHALL THE\n" 2359 " * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR " 2360 "OTHER\n" 2361 " * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, " 2362 "ARISING FROM,\n" 2363 " * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER " 2364 "DEALINGS IN\n" 2365 " * THE SOFTWARE.\n" 2366 " *\n" 2367 " *===-----------------------------------------------------------------" 2368 "---" 2369 "---===\n" 2370 " */\n\n"; 2371 2372 OS << "#ifndef __ARM_NEON_H\n"; 2373 OS << "#define __ARM_NEON_H\n\n"; 2374 2375 OS << "#ifndef __ARM_FP\n"; 2376 OS << "#error \"NEON intrinsics not available with the soft-float ABI. " 2377 "Please use -mfloat-abi=softfp or -mfloat-abi=hard\"\n"; 2378 OS << "#else\n\n"; 2379 2380 OS << "#if !defined(__ARM_NEON)\n"; 2381 OS << "#error \"NEON support not enabled\"\n"; 2382 OS << "#else\n\n"; 2383 2384 OS << "#include <stdint.h>\n\n"; 2385 2386 OS << "#include <arm_bf16.h>\n"; 2387 2388 OS << "#include <arm_vector_types.h>\n"; 2389 2390 // For now, signedness of polynomial types depends on target 2391 OS << "#ifdef __aarch64__\n"; 2392 OS << "typedef uint8_t poly8_t;\n"; 2393 OS << "typedef uint16_t poly16_t;\n"; 2394 OS << "typedef uint64_t poly64_t;\n"; 2395 OS << "typedef __uint128_t poly128_t;\n"; 2396 OS << "#else\n"; 2397 OS << "typedef int8_t poly8_t;\n"; 2398 OS << "typedef int16_t poly16_t;\n"; 2399 OS << "typedef int64_t poly64_t;\n"; 2400 OS << "#endif\n"; 2401 emitNeonTypeDefs("PcQPcPsQPsPlQPl", OS); 2402 2403 OS << "#define __ai static __inline__ __attribute__((__always_inline__, " 2404 "__nodebug__))\n\n"; 2405 2406 SmallVector<Intrinsic *, 128> Defs; 2407 std::vector<Record *> RV = Records.getAllDerivedDefinitions("Inst"); 2408 for (auto *R : RV) 2409 createIntrinsic(R, Defs); 2410 2411 for (auto *I : Defs) 2412 I->indexBody(); 2413 2414 llvm::stable_sort(Defs, llvm::deref<std::less<>>()); 2415 2416 // Only emit a def when its requirements have been met. 2417 // FIXME: This loop could be made faster, but it's fast enough for now. 2418 bool MadeProgress = true; 2419 std::string InGuard; 2420 while (!Defs.empty() && MadeProgress) { 2421 MadeProgress = false; 2422 2423 for (SmallVector<Intrinsic *, 128>::iterator I = Defs.begin(); 2424 I != Defs.end(); /*No step*/) { 2425 bool DependenciesSatisfied = true; 2426 for (auto *II : (*I)->getDependencies()) { 2427 if (llvm::is_contained(Defs, II)) 2428 DependenciesSatisfied = false; 2429 } 2430 if (!DependenciesSatisfied) { 2431 // Try the next one. 2432 ++I; 2433 continue; 2434 } 2435 2436 // Emit #endif/#if pair if needed. 2437 if ((*I)->getArchGuard() != InGuard) { 2438 if (!InGuard.empty()) 2439 OS << "#endif\n"; 2440 InGuard = (*I)->getArchGuard(); 2441 if (!InGuard.empty()) 2442 OS << "#if " << InGuard << "\n"; 2443 } 2444 2445 // Actually generate the intrinsic code. 2446 OS << (*I)->generate(); 2447 2448 MadeProgress = true; 2449 I = Defs.erase(I); 2450 } 2451 } 2452 assert(Defs.empty() && "Some requirements were not satisfied!"); 2453 if (!InGuard.empty()) 2454 OS << "#endif\n"; 2455 2456 OS << "\n"; 2457 OS << "#undef __ai\n\n"; 2458 OS << "#endif /* if !defined(__ARM_NEON) */\n"; 2459 OS << "#endif /* ifndef __ARM_FP */\n"; 2460 OS << "#endif /* __ARM_NEON_H */\n"; 2461 } 2462 2463 /// run - Read the records in arm_fp16.td and output arm_fp16.h. arm_fp16.h 2464 /// is comprised of type definitions and function declarations. 2465 void NeonEmitter::runFP16(raw_ostream &OS) { 2466 OS << "/*===---- arm_fp16.h - ARM FP16 intrinsics " 2467 "------------------------------" 2468 "---===\n" 2469 " *\n" 2470 " * Permission is hereby granted, free of charge, to any person " 2471 "obtaining a copy\n" 2472 " * of this software and associated documentation files (the " 2473 "\"Software\"), to deal\n" 2474 " * in the Software without restriction, including without limitation " 2475 "the rights\n" 2476 " * to use, copy, modify, merge, publish, distribute, sublicense, " 2477 "and/or sell\n" 2478 " * copies of the Software, and to permit persons to whom the Software " 2479 "is\n" 2480 " * furnished to do so, subject to the following conditions:\n" 2481 " *\n" 2482 " * The above copyright notice and this permission notice shall be " 2483 "included in\n" 2484 " * all copies or substantial portions of the Software.\n" 2485 " *\n" 2486 " * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, " 2487 "EXPRESS OR\n" 2488 " * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF " 2489 "MERCHANTABILITY,\n" 2490 " * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT " 2491 "SHALL THE\n" 2492 " * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR " 2493 "OTHER\n" 2494 " * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, " 2495 "ARISING FROM,\n" 2496 " * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER " 2497 "DEALINGS IN\n" 2498 " * THE SOFTWARE.\n" 2499 " *\n" 2500 " *===-----------------------------------------------------------------" 2501 "---" 2502 "---===\n" 2503 " */\n\n"; 2504 2505 OS << "#ifndef __ARM_FP16_H\n"; 2506 OS << "#define __ARM_FP16_H\n\n"; 2507 2508 OS << "#include <stdint.h>\n\n"; 2509 2510 OS << "typedef __fp16 float16_t;\n"; 2511 2512 OS << "#define __ai static __inline__ __attribute__((__always_inline__, " 2513 "__nodebug__))\n\n"; 2514 2515 SmallVector<Intrinsic *, 128> Defs; 2516 std::vector<Record *> RV = Records.getAllDerivedDefinitions("Inst"); 2517 for (auto *R : RV) 2518 createIntrinsic(R, Defs); 2519 2520 for (auto *I : Defs) 2521 I->indexBody(); 2522 2523 llvm::stable_sort(Defs, llvm::deref<std::less<>>()); 2524 2525 // Only emit a def when its requirements have been met. 2526 // FIXME: This loop could be made faster, but it's fast enough for now. 2527 bool MadeProgress = true; 2528 std::string InGuard; 2529 while (!Defs.empty() && MadeProgress) { 2530 MadeProgress = false; 2531 2532 for (SmallVector<Intrinsic *, 128>::iterator I = Defs.begin(); 2533 I != Defs.end(); /*No step*/) { 2534 bool DependenciesSatisfied = true; 2535 for (auto *II : (*I)->getDependencies()) { 2536 if (llvm::is_contained(Defs, II)) 2537 DependenciesSatisfied = false; 2538 } 2539 if (!DependenciesSatisfied) { 2540 // Try the next one. 2541 ++I; 2542 continue; 2543 } 2544 2545 // Emit #endif/#if pair if needed. 2546 if ((*I)->getArchGuard() != InGuard) { 2547 if (!InGuard.empty()) 2548 OS << "#endif\n"; 2549 InGuard = (*I)->getArchGuard(); 2550 if (!InGuard.empty()) 2551 OS << "#if " << InGuard << "\n"; 2552 } 2553 2554 // Actually generate the intrinsic code. 2555 OS << (*I)->generate(); 2556 2557 MadeProgress = true; 2558 I = Defs.erase(I); 2559 } 2560 } 2561 assert(Defs.empty() && "Some requirements were not satisfied!"); 2562 if (!InGuard.empty()) 2563 OS << "#endif\n"; 2564 2565 OS << "\n"; 2566 OS << "#undef __ai\n\n"; 2567 OS << "#endif /* __ARM_FP16_H */\n"; 2568 } 2569 2570 void NeonEmitter::runVectorTypes(raw_ostream &OS) { 2571 OS << "/*===---- arm_vector_types - ARM vector type " 2572 "------===\n" 2573 " *\n" 2574 " *\n" 2575 " * Part of the LLVM Project, under the Apache License v2.0 with LLVM " 2576 "Exceptions.\n" 2577 " * See https://llvm.org/LICENSE.txt for license information.\n" 2578 " * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception\n" 2579 " *\n" 2580 " *===-----------------------------------------------------------------" 2581 "------===\n" 2582 " */\n\n"; 2583 OS << "#if !defined(__ARM_NEON_H) && !defined(__ARM_SVE_H)\n"; 2584 OS << "#error \"This file should not be used standalone. Please include" 2585 " arm_neon.h or arm_sve.h instead\"\n\n"; 2586 OS << "#endif\n"; 2587 OS << "#ifndef __ARM_NEON_TYPES_H\n"; 2588 OS << "#define __ARM_NEON_TYPES_H\n"; 2589 OS << "typedef float float32_t;\n"; 2590 OS << "typedef __fp16 float16_t;\n"; 2591 2592 OS << "#ifdef __aarch64__\n"; 2593 OS << "typedef double float64_t;\n"; 2594 OS << "#endif\n\n"; 2595 2596 emitNeonTypeDefs("cQcsQsiQilQlUcQUcUsQUsUiQUiUlQUlhQhfQfdQd", OS); 2597 2598 emitNeonTypeDefs("bQb", OS); 2599 OS << "#endif // __ARM_NEON_TYPES_H\n"; 2600 } 2601 2602 void NeonEmitter::runBF16(raw_ostream &OS) { 2603 OS << "/*===---- arm_bf16.h - ARM BF16 intrinsics " 2604 "-----------------------------------===\n" 2605 " *\n" 2606 " *\n" 2607 " * Part of the LLVM Project, under the Apache License v2.0 with LLVM " 2608 "Exceptions.\n" 2609 " * See https://llvm.org/LICENSE.txt for license information.\n" 2610 " * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception\n" 2611 " *\n" 2612 " *===-----------------------------------------------------------------" 2613 "------===\n" 2614 " */\n\n"; 2615 2616 OS << "#ifndef __ARM_BF16_H\n"; 2617 OS << "#define __ARM_BF16_H\n\n"; 2618 2619 OS << "typedef __bf16 bfloat16_t;\n"; 2620 2621 OS << "#define __ai static __inline__ __attribute__((__always_inline__, " 2622 "__nodebug__))\n\n"; 2623 2624 SmallVector<Intrinsic *, 128> Defs; 2625 std::vector<Record *> RV = Records.getAllDerivedDefinitions("Inst"); 2626 for (auto *R : RV) 2627 createIntrinsic(R, Defs); 2628 2629 for (auto *I : Defs) 2630 I->indexBody(); 2631 2632 llvm::stable_sort(Defs, llvm::deref<std::less<>>()); 2633 2634 // Only emit a def when its requirements have been met. 2635 // FIXME: This loop could be made faster, but it's fast enough for now. 2636 bool MadeProgress = true; 2637 std::string InGuard; 2638 while (!Defs.empty() && MadeProgress) { 2639 MadeProgress = false; 2640 2641 for (SmallVector<Intrinsic *, 128>::iterator I = Defs.begin(); 2642 I != Defs.end(); /*No step*/) { 2643 bool DependenciesSatisfied = true; 2644 for (auto *II : (*I)->getDependencies()) { 2645 if (llvm::is_contained(Defs, II)) 2646 DependenciesSatisfied = false; 2647 } 2648 if (!DependenciesSatisfied) { 2649 // Try the next one. 2650 ++I; 2651 continue; 2652 } 2653 2654 // Emit #endif/#if pair if needed. 2655 if ((*I)->getArchGuard() != InGuard) { 2656 if (!InGuard.empty()) 2657 OS << "#endif\n"; 2658 InGuard = (*I)->getArchGuard(); 2659 if (!InGuard.empty()) 2660 OS << "#if " << InGuard << "\n"; 2661 } 2662 2663 // Actually generate the intrinsic code. 2664 OS << (*I)->generate(); 2665 2666 MadeProgress = true; 2667 I = Defs.erase(I); 2668 } 2669 } 2670 assert(Defs.empty() && "Some requirements were not satisfied!"); 2671 if (!InGuard.empty()) 2672 OS << "#endif\n"; 2673 2674 OS << "\n"; 2675 OS << "#undef __ai\n\n"; 2676 2677 OS << "#endif\n"; 2678 } 2679 2680 void clang::EmitNeon(RecordKeeper &Records, raw_ostream &OS) { 2681 NeonEmitter(Records).run(OS); 2682 } 2683 2684 void clang::EmitFP16(RecordKeeper &Records, raw_ostream &OS) { 2685 NeonEmitter(Records).runFP16(OS); 2686 } 2687 2688 void clang::EmitBF16(RecordKeeper &Records, raw_ostream &OS) { 2689 NeonEmitter(Records).runBF16(OS); 2690 } 2691 2692 void clang::EmitNeonSema(RecordKeeper &Records, raw_ostream &OS) { 2693 NeonEmitter(Records).runHeader(OS); 2694 } 2695 2696 void clang::EmitVectorTypes(RecordKeeper &Records, raw_ostream &OS) { 2697 NeonEmitter(Records).runVectorTypes(OS); 2698 } 2699 2700 void clang::EmitNeonTest(RecordKeeper &Records, raw_ostream &OS) { 2701 llvm_unreachable("Neon test generation no longer implemented!"); 2702 } 2703