1 //===- NeonEmitter.cpp - Generate arm_neon.h for use with clang -*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This tablegen backend is responsible for emitting arm_neon.h, which includes 10 // a declaration and definition of each function specified by the ARM NEON 11 // compiler interface. See ARM document DUI0348B. 12 // 13 // Each NEON instruction is implemented in terms of 1 or more functions which 14 // are suffixed with the element type of the input vectors. Functions may be 15 // implemented in terms of generic vector operations such as +, *, -, etc. or 16 // by calling a __builtin_-prefixed function which will be handled by clang's 17 // CodeGen library. 18 // 19 // Additional validation code can be generated by this file when runHeader() is 20 // called, rather than the normal run() entry point. 21 // 22 // See also the documentation in include/clang/Basic/arm_neon.td. 23 // 24 //===----------------------------------------------------------------------===// 25 26 #include "TableGenBackends.h" 27 #include "llvm/ADT/ArrayRef.h" 28 #include "llvm/ADT/DenseMap.h" 29 #include "llvm/ADT/None.h" 30 #include "llvm/ADT/Optional.h" 31 #include "llvm/ADT/STLExtras.h" 32 #include "llvm/ADT/SmallVector.h" 33 #include "llvm/ADT/StringExtras.h" 34 #include "llvm/ADT/StringRef.h" 35 #include "llvm/Support/Casting.h" 36 #include "llvm/Support/ErrorHandling.h" 37 #include "llvm/Support/raw_ostream.h" 38 #include "llvm/TableGen/Error.h" 39 #include "llvm/TableGen/Record.h" 40 #include "llvm/TableGen/SetTheory.h" 41 #include <algorithm> 42 #include <cassert> 43 #include <cctype> 44 #include <cstddef> 45 #include <cstdint> 46 #include <deque> 47 #include <map> 48 #include <set> 49 #include <sstream> 50 #include <string> 51 #include <utility> 52 #include <vector> 53 54 using namespace llvm; 55 56 namespace { 57 58 // While globals are generally bad, this one allows us to perform assertions 59 // liberally and somehow still trace them back to the def they indirectly 60 // came from. 61 static Record *CurrentRecord = nullptr; 62 static void assert_with_loc(bool Assertion, const std::string &Str) { 63 if (!Assertion) { 64 if (CurrentRecord) 65 PrintFatalError(CurrentRecord->getLoc(), Str); 66 else 67 PrintFatalError(Str); 68 } 69 } 70 71 enum ClassKind { 72 ClassNone, 73 ClassI, // generic integer instruction, e.g., "i8" suffix 74 ClassS, // signed/unsigned/poly, e.g., "s8", "u8" or "p8" suffix 75 ClassW, // width-specific instruction, e.g., "8" suffix 76 ClassB, // bitcast arguments with enum argument to specify type 77 ClassL, // Logical instructions which are op instructions 78 // but we need to not emit any suffix for in our 79 // tests. 80 ClassNoTest // Instructions which we do not test since they are 81 // not TRUE instructions. 82 }; 83 84 /// NeonTypeFlags - Flags to identify the types for overloaded Neon 85 /// builtins. These must be kept in sync with the flags in 86 /// include/clang/Basic/TargetBuiltins.h. 87 namespace NeonTypeFlags { 88 89 enum { EltTypeMask = 0xf, UnsignedFlag = 0x10, QuadFlag = 0x20 }; 90 91 enum EltType { 92 Int8, 93 Int16, 94 Int32, 95 Int64, 96 Poly8, 97 Poly16, 98 Poly64, 99 Poly128, 100 Float16, 101 Float32, 102 Float64, 103 BFloat16 104 }; 105 106 } // end namespace NeonTypeFlags 107 108 class NeonEmitter; 109 110 //===----------------------------------------------------------------------===// 111 // TypeSpec 112 //===----------------------------------------------------------------------===// 113 114 /// A TypeSpec is just a simple wrapper around a string, but gets its own type 115 /// for strong typing purposes. 116 /// 117 /// A TypeSpec can be used to create a type. 118 class TypeSpec : public std::string { 119 public: 120 static std::vector<TypeSpec> fromTypeSpecs(StringRef Str) { 121 std::vector<TypeSpec> Ret; 122 TypeSpec Acc; 123 for (char I : Str.str()) { 124 if (islower(I)) { 125 Acc.push_back(I); 126 Ret.push_back(TypeSpec(Acc)); 127 Acc.clear(); 128 } else { 129 Acc.push_back(I); 130 } 131 } 132 return Ret; 133 } 134 }; 135 136 //===----------------------------------------------------------------------===// 137 // Type 138 //===----------------------------------------------------------------------===// 139 140 /// A Type. Not much more to say here. 141 class Type { 142 private: 143 TypeSpec TS; 144 145 enum TypeKind { 146 Void, 147 Float, 148 SInt, 149 UInt, 150 Poly, 151 BFloat16, 152 }; 153 TypeKind Kind; 154 bool Immediate, Constant, Pointer; 155 // ScalarForMangling and NoManglingQ are really not suited to live here as 156 // they are not related to the type. But they live in the TypeSpec (not the 157 // prototype), so this is really the only place to store them. 158 bool ScalarForMangling, NoManglingQ; 159 unsigned Bitwidth, ElementBitwidth, NumVectors; 160 161 public: 162 Type() 163 : Kind(Void), Immediate(false), Constant(false), 164 Pointer(false), ScalarForMangling(false), NoManglingQ(false), 165 Bitwidth(0), ElementBitwidth(0), NumVectors(0) {} 166 167 Type(TypeSpec TS, StringRef CharMods) 168 : TS(std::move(TS)), Kind(Void), Immediate(false), 169 Constant(false), Pointer(false), ScalarForMangling(false), 170 NoManglingQ(false), Bitwidth(0), ElementBitwidth(0), NumVectors(0) { 171 applyModifiers(CharMods); 172 } 173 174 /// Returns a type representing "void". 175 static Type getVoid() { return Type(); } 176 177 bool operator==(const Type &Other) const { return str() == Other.str(); } 178 bool operator!=(const Type &Other) const { return !operator==(Other); } 179 180 // 181 // Query functions 182 // 183 bool isScalarForMangling() const { return ScalarForMangling; } 184 bool noManglingQ() const { return NoManglingQ; } 185 186 bool isPointer() const { return Pointer; } 187 bool isValue() const { return !isVoid() && !isPointer(); } 188 bool isScalar() const { return isValue() && NumVectors == 0; } 189 bool isVector() const { return isValue() && NumVectors > 0; } 190 bool isConstPointer() const { return Constant; } 191 bool isFloating() const { return Kind == Float; } 192 bool isInteger() const { return Kind == SInt || Kind == UInt; } 193 bool isPoly() const { return Kind == Poly; } 194 bool isSigned() const { return Kind == SInt; } 195 bool isImmediate() const { return Immediate; } 196 bool isFloat() const { return isFloating() && ElementBitwidth == 32; } 197 bool isDouble() const { return isFloating() && ElementBitwidth == 64; } 198 bool isHalf() const { return isFloating() && ElementBitwidth == 16; } 199 bool isChar() const { return ElementBitwidth == 8; } 200 bool isShort() const { return isInteger() && ElementBitwidth == 16; } 201 bool isInt() const { return isInteger() && ElementBitwidth == 32; } 202 bool isLong() const { return isInteger() && ElementBitwidth == 64; } 203 bool isVoid() const { return Kind == Void; } 204 bool isBFloat16() const { return Kind == BFloat16; } 205 unsigned getNumElements() const { return Bitwidth / ElementBitwidth; } 206 unsigned getSizeInBits() const { return Bitwidth; } 207 unsigned getElementSizeInBits() const { return ElementBitwidth; } 208 unsigned getNumVectors() const { return NumVectors; } 209 210 // 211 // Mutator functions 212 // 213 void makeUnsigned() { 214 assert(!isVoid() && "not a potentially signed type"); 215 Kind = UInt; 216 } 217 void makeSigned() { 218 assert(!isVoid() && "not a potentially signed type"); 219 Kind = SInt; 220 } 221 222 void makeInteger(unsigned ElemWidth, bool Sign) { 223 assert(!isVoid() && "converting void to int probably not useful"); 224 Kind = Sign ? SInt : UInt; 225 Immediate = false; 226 ElementBitwidth = ElemWidth; 227 } 228 229 void makeImmediate(unsigned ElemWidth) { 230 Kind = SInt; 231 Immediate = true; 232 ElementBitwidth = ElemWidth; 233 } 234 235 void makeScalar() { 236 Bitwidth = ElementBitwidth; 237 NumVectors = 0; 238 } 239 240 void makeOneVector() { 241 assert(isVector()); 242 NumVectors = 1; 243 } 244 245 void make32BitElement() { 246 assert_with_loc(Bitwidth > 32, "Not enough bits to make it 32!"); 247 ElementBitwidth = 32; 248 } 249 250 void doubleLanes() { 251 assert_with_loc(Bitwidth != 128, "Can't get bigger than 128!"); 252 Bitwidth = 128; 253 } 254 255 void halveLanes() { 256 assert_with_loc(Bitwidth != 64, "Can't get smaller than 64!"); 257 Bitwidth = 64; 258 } 259 260 /// Return the C string representation of a type, which is the typename 261 /// defined in stdint.h or arm_neon.h. 262 std::string str() const; 263 264 /// Return the string representation of a type, which is an encoded 265 /// string for passing to the BUILTIN() macro in Builtins.def. 266 std::string builtin_str() const; 267 268 /// Return the value in NeonTypeFlags for this type. 269 unsigned getNeonEnum() const; 270 271 /// Parse a type from a stdint.h or arm_neon.h typedef name, 272 /// for example uint32x2_t or int64_t. 273 static Type fromTypedefName(StringRef Name); 274 275 private: 276 /// Creates the type based on the typespec string in TS. 277 /// Sets "Quad" to true if the "Q" or "H" modifiers were 278 /// seen. This is needed by applyModifier as some modifiers 279 /// only take effect if the type size was changed by "Q" or "H". 280 void applyTypespec(bool &Quad); 281 /// Applies prototype modifiers to the type. 282 void applyModifiers(StringRef Mods); 283 }; 284 285 //===----------------------------------------------------------------------===// 286 // Variable 287 //===----------------------------------------------------------------------===// 288 289 /// A variable is a simple class that just has a type and a name. 290 class Variable { 291 Type T; 292 std::string N; 293 294 public: 295 Variable() : T(Type::getVoid()), N("") {} 296 Variable(Type T, std::string N) : T(std::move(T)), N(std::move(N)) {} 297 298 Type getType() const { return T; } 299 std::string getName() const { return "__" + N; } 300 }; 301 302 //===----------------------------------------------------------------------===// 303 // Intrinsic 304 //===----------------------------------------------------------------------===// 305 306 /// The main grunt class. This represents an instantiation of an intrinsic with 307 /// a particular typespec and prototype. 308 class Intrinsic { 309 /// The Record this intrinsic was created from. 310 Record *R; 311 /// The unmangled name. 312 std::string Name; 313 /// The input and output typespecs. InTS == OutTS except when 314 /// CartesianProductWith is non-empty - this is the case for vreinterpret. 315 TypeSpec OutTS, InTS; 316 /// The base class kind. Most intrinsics use ClassS, which has full type 317 /// info for integers (s32/u32). Some use ClassI, which doesn't care about 318 /// signedness (i32), while some (ClassB) have no type at all, only a width 319 /// (32). 320 ClassKind CK; 321 /// The list of DAGs for the body. May be empty, in which case we should 322 /// emit a builtin call. 323 ListInit *Body; 324 /// The architectural #ifdef guard. 325 std::string Guard; 326 /// Set if the Unavailable bit is 1. This means we don't generate a body, 327 /// just an "unavailable" attribute on a declaration. 328 bool IsUnavailable; 329 /// Is this intrinsic safe for big-endian? or does it need its arguments 330 /// reversing? 331 bool BigEndianSafe; 332 333 /// The types of return value [0] and parameters [1..]. 334 std::vector<Type> Types; 335 /// The index of the key type passed to CGBuiltin.cpp for polymorphic calls. 336 int PolymorphicKeyType; 337 /// The local variables defined. 338 std::map<std::string, Variable> Variables; 339 /// NeededEarly - set if any other intrinsic depends on this intrinsic. 340 bool NeededEarly; 341 /// UseMacro - set if we should implement using a macro or unset for a 342 /// function. 343 bool UseMacro; 344 /// The set of intrinsics that this intrinsic uses/requires. 345 std::set<Intrinsic *> Dependencies; 346 /// The "base type", which is Type('d', OutTS). InBaseType is only 347 /// different if CartesianProductWith is non-empty (for vreinterpret). 348 Type BaseType, InBaseType; 349 /// The return variable. 350 Variable RetVar; 351 /// A postfix to apply to every variable. Defaults to "". 352 std::string VariablePostfix; 353 354 NeonEmitter &Emitter; 355 std::stringstream OS; 356 357 bool isBigEndianSafe() const { 358 if (BigEndianSafe) 359 return true; 360 361 for (const auto &T : Types){ 362 if (T.isVector() && T.getNumElements() > 1) 363 return false; 364 } 365 return true; 366 } 367 368 public: 369 Intrinsic(Record *R, StringRef Name, StringRef Proto, TypeSpec OutTS, 370 TypeSpec InTS, ClassKind CK, ListInit *Body, NeonEmitter &Emitter, 371 StringRef Guard, bool IsUnavailable, bool BigEndianSafe) 372 : R(R), Name(Name.str()), OutTS(OutTS), InTS(InTS), CK(CK), Body(Body), 373 Guard(Guard.str()), IsUnavailable(IsUnavailable), 374 BigEndianSafe(BigEndianSafe), PolymorphicKeyType(0), NeededEarly(false), 375 UseMacro(false), BaseType(OutTS, "."), InBaseType(InTS, "."), 376 Emitter(Emitter) { 377 // Modify the TypeSpec per-argument to get a concrete Type, and create 378 // known variables for each. 379 // Types[0] is the return value. 380 unsigned Pos = 0; 381 Types.emplace_back(OutTS, getNextModifiers(Proto, Pos)); 382 StringRef Mods = getNextModifiers(Proto, Pos); 383 while (!Mods.empty()) { 384 Types.emplace_back(InTS, Mods); 385 if (Mods.find("!") != StringRef::npos) 386 PolymorphicKeyType = Types.size() - 1; 387 388 Mods = getNextModifiers(Proto, Pos); 389 } 390 391 for (auto Type : Types) { 392 // If this builtin takes an immediate argument, we need to #define it rather 393 // than use a standard declaration, so that SemaChecking can range check 394 // the immediate passed by the user. 395 396 // Pointer arguments need to use macros to avoid hiding aligned attributes 397 // from the pointer type. 398 399 // It is not permitted to pass or return an __fp16 by value, so intrinsics 400 // taking a scalar float16_t must be implemented as macros. 401 if (Type.isImmediate() || Type.isPointer() || 402 (Type.isScalar() && Type.isHalf())) 403 UseMacro = true; 404 } 405 } 406 407 /// Get the Record that this intrinsic is based off. 408 Record *getRecord() const { return R; } 409 /// Get the set of Intrinsics that this intrinsic calls. 410 /// this is the set of immediate dependencies, NOT the 411 /// transitive closure. 412 const std::set<Intrinsic *> &getDependencies() const { return Dependencies; } 413 /// Get the architectural guard string (#ifdef). 414 std::string getGuard() const { return Guard; } 415 /// Get the non-mangled name. 416 std::string getName() const { return Name; } 417 418 /// Return true if the intrinsic takes an immediate operand. 419 bool hasImmediate() const { 420 return std::any_of(Types.begin(), Types.end(), 421 [](const Type &T) { return T.isImmediate(); }); 422 } 423 424 /// Return the parameter index of the immediate operand. 425 unsigned getImmediateIdx() const { 426 for (unsigned Idx = 0; Idx < Types.size(); ++Idx) 427 if (Types[Idx].isImmediate()) 428 return Idx - 1; 429 llvm_unreachable("Intrinsic has no immediate"); 430 } 431 432 433 unsigned getNumParams() const { return Types.size() - 1; } 434 Type getReturnType() const { return Types[0]; } 435 Type getParamType(unsigned I) const { return Types[I + 1]; } 436 Type getBaseType() const { return BaseType; } 437 Type getPolymorphicKeyType() const { return Types[PolymorphicKeyType]; } 438 439 /// Return true if the prototype has a scalar argument. 440 bool protoHasScalar() const; 441 442 /// Return the index that parameter PIndex will sit at 443 /// in a generated function call. This is often just PIndex, 444 /// but may not be as things such as multiple-vector operands 445 /// and sret parameters need to be taken into accont. 446 unsigned getGeneratedParamIdx(unsigned PIndex) { 447 unsigned Idx = 0; 448 if (getReturnType().getNumVectors() > 1) 449 // Multiple vectors are passed as sret. 450 ++Idx; 451 452 for (unsigned I = 0; I < PIndex; ++I) 453 Idx += std::max(1U, getParamType(I).getNumVectors()); 454 455 return Idx; 456 } 457 458 bool hasBody() const { return Body && !Body->getValues().empty(); } 459 460 void setNeededEarly() { NeededEarly = true; } 461 462 bool operator<(const Intrinsic &Other) const { 463 // Sort lexicographically on a two-tuple (Guard, Name) 464 if (Guard != Other.Guard) 465 return Guard < Other.Guard; 466 return Name < Other.Name; 467 } 468 469 ClassKind getClassKind(bool UseClassBIfScalar = false) { 470 if (UseClassBIfScalar && !protoHasScalar()) 471 return ClassB; 472 return CK; 473 } 474 475 /// Return the name, mangled with type information. 476 /// If ForceClassS is true, use ClassS (u32/s32) instead 477 /// of the intrinsic's own type class. 478 std::string getMangledName(bool ForceClassS = false) const; 479 /// Return the type code for a builtin function call. 480 std::string getInstTypeCode(Type T, ClassKind CK) const; 481 /// Return the type string for a BUILTIN() macro in Builtins.def. 482 std::string getBuiltinTypeStr(); 483 484 /// Generate the intrinsic, returning code. 485 std::string generate(); 486 /// Perform type checking and populate the dependency graph, but 487 /// don't generate code yet. 488 void indexBody(); 489 490 private: 491 StringRef getNextModifiers(StringRef Proto, unsigned &Pos) const; 492 493 std::string mangleName(std::string Name, ClassKind CK) const; 494 495 void initVariables(); 496 std::string replaceParamsIn(std::string S); 497 498 void emitBodyAsBuiltinCall(); 499 500 void generateImpl(bool ReverseArguments, 501 StringRef NamePrefix, StringRef CallPrefix); 502 void emitReturn(); 503 void emitBody(StringRef CallPrefix); 504 void emitShadowedArgs(); 505 void emitArgumentReversal(); 506 void emitReturnReversal(); 507 void emitReverseVariable(Variable &Dest, Variable &Src); 508 void emitNewLine(); 509 void emitClosingBrace(); 510 void emitOpeningBrace(); 511 void emitPrototype(StringRef NamePrefix); 512 513 class DagEmitter { 514 Intrinsic &Intr; 515 StringRef CallPrefix; 516 517 public: 518 DagEmitter(Intrinsic &Intr, StringRef CallPrefix) : 519 Intr(Intr), CallPrefix(CallPrefix) { 520 } 521 std::pair<Type, std::string> emitDagArg(Init *Arg, std::string ArgName); 522 std::pair<Type, std::string> emitDagSaveTemp(DagInit *DI); 523 std::pair<Type, std::string> emitDagSplat(DagInit *DI); 524 std::pair<Type, std::string> emitDagDup(DagInit *DI); 525 std::pair<Type, std::string> emitDagDupTyped(DagInit *DI); 526 std::pair<Type, std::string> emitDagShuffle(DagInit *DI); 527 std::pair<Type, std::string> emitDagCast(DagInit *DI, bool IsBitCast); 528 std::pair<Type, std::string> emitDagCall(DagInit *DI, 529 bool MatchMangledName); 530 std::pair<Type, std::string> emitDagNameReplace(DagInit *DI); 531 std::pair<Type, std::string> emitDagLiteral(DagInit *DI); 532 std::pair<Type, std::string> emitDagOp(DagInit *DI); 533 std::pair<Type, std::string> emitDag(DagInit *DI); 534 }; 535 }; 536 537 //===----------------------------------------------------------------------===// 538 // NeonEmitter 539 //===----------------------------------------------------------------------===// 540 541 class NeonEmitter { 542 RecordKeeper &Records; 543 DenseMap<Record *, ClassKind> ClassMap; 544 std::map<std::string, std::deque<Intrinsic>> IntrinsicMap; 545 unsigned UniqueNumber; 546 547 void createIntrinsic(Record *R, SmallVectorImpl<Intrinsic *> &Out); 548 void genBuiltinsDef(raw_ostream &OS, SmallVectorImpl<Intrinsic *> &Defs); 549 void genOverloadTypeCheckCode(raw_ostream &OS, 550 SmallVectorImpl<Intrinsic *> &Defs); 551 void genIntrinsicRangeCheckCode(raw_ostream &OS, 552 SmallVectorImpl<Intrinsic *> &Defs); 553 554 public: 555 /// Called by Intrinsic - this attempts to get an intrinsic that takes 556 /// the given types as arguments. 557 Intrinsic &getIntrinsic(StringRef Name, ArrayRef<Type> Types, 558 Optional<std::string> MangledName); 559 560 /// Called by Intrinsic - returns a globally-unique number. 561 unsigned getUniqueNumber() { return UniqueNumber++; } 562 563 NeonEmitter(RecordKeeper &R) : Records(R), UniqueNumber(0) { 564 Record *SI = R.getClass("SInst"); 565 Record *II = R.getClass("IInst"); 566 Record *WI = R.getClass("WInst"); 567 Record *SOpI = R.getClass("SOpInst"); 568 Record *IOpI = R.getClass("IOpInst"); 569 Record *WOpI = R.getClass("WOpInst"); 570 Record *LOpI = R.getClass("LOpInst"); 571 Record *NoTestOpI = R.getClass("NoTestOpInst"); 572 573 ClassMap[SI] = ClassS; 574 ClassMap[II] = ClassI; 575 ClassMap[WI] = ClassW; 576 ClassMap[SOpI] = ClassS; 577 ClassMap[IOpI] = ClassI; 578 ClassMap[WOpI] = ClassW; 579 ClassMap[LOpI] = ClassL; 580 ClassMap[NoTestOpI] = ClassNoTest; 581 } 582 583 // run - Emit arm_neon.h.inc 584 void run(raw_ostream &o); 585 586 // runFP16 - Emit arm_fp16.h.inc 587 void runFP16(raw_ostream &o); 588 589 // runBF16 - Emit arm_bf16.h.inc 590 void runBF16(raw_ostream &o); 591 592 // runHeader - Emit all the __builtin prototypes used in arm_neon.h, 593 // arm_fp16.h and arm_bf16.h 594 void runHeader(raw_ostream &o); 595 596 // runTests - Emit tests for all the Neon intrinsics. 597 void runTests(raw_ostream &o); 598 }; 599 600 } // end anonymous namespace 601 602 //===----------------------------------------------------------------------===// 603 // Type implementation 604 //===----------------------------------------------------------------------===// 605 606 std::string Type::str() const { 607 if (isVoid()) 608 return "void"; 609 std::string S; 610 611 if (isInteger() && !isSigned()) 612 S += "u"; 613 614 if (isPoly()) 615 S += "poly"; 616 else if (isFloating()) 617 S += "float"; 618 else if (isBFloat16()) 619 S += "bfloat"; 620 else 621 S += "int"; 622 623 S += utostr(ElementBitwidth); 624 if (isVector()) 625 S += "x" + utostr(getNumElements()); 626 if (NumVectors > 1) 627 S += "x" + utostr(NumVectors); 628 S += "_t"; 629 630 if (Constant) 631 S += " const"; 632 if (Pointer) 633 S += " *"; 634 635 return S; 636 } 637 638 std::string Type::builtin_str() const { 639 std::string S; 640 if (isVoid()) 641 return "v"; 642 643 if (isPointer()) { 644 // All pointers are void pointers. 645 S = "v"; 646 if (isConstPointer()) 647 S += "C"; 648 S += "*"; 649 return S; 650 } else if (isInteger()) 651 switch (ElementBitwidth) { 652 case 8: S += "c"; break; 653 case 16: S += "s"; break; 654 case 32: S += "i"; break; 655 case 64: S += "Wi"; break; 656 case 128: S += "LLLi"; break; 657 default: llvm_unreachable("Unhandled case!"); 658 } 659 else if (isBFloat16()) { 660 assert(ElementBitwidth == 16 && "BFloat16 can only be 16 bits"); 661 S += "y"; 662 } else 663 switch (ElementBitwidth) { 664 case 16: S += "h"; break; 665 case 32: S += "f"; break; 666 case 64: S += "d"; break; 667 default: llvm_unreachable("Unhandled case!"); 668 } 669 670 // FIXME: NECESSARY??????????????????????????????????????????????????????????????????????? 671 if (isChar() && !isPointer() && isSigned()) 672 // Make chars explicitly signed. 673 S = "S" + S; 674 else if (isInteger() && !isSigned()) 675 S = "U" + S; 676 677 // Constant indices are "int", but have the "constant expression" modifier. 678 if (isImmediate()) { 679 assert(isInteger() && isSigned()); 680 S = "I" + S; 681 } 682 683 if (isScalar()) 684 return S; 685 686 std::string Ret; 687 for (unsigned I = 0; I < NumVectors; ++I) 688 Ret += "V" + utostr(getNumElements()) + S; 689 690 return Ret; 691 } 692 693 unsigned Type::getNeonEnum() const { 694 unsigned Addend; 695 switch (ElementBitwidth) { 696 case 8: Addend = 0; break; 697 case 16: Addend = 1; break; 698 case 32: Addend = 2; break; 699 case 64: Addend = 3; break; 700 case 128: Addend = 4; break; 701 default: llvm_unreachable("Unhandled element bitwidth!"); 702 } 703 704 unsigned Base = (unsigned)NeonTypeFlags::Int8 + Addend; 705 if (isPoly()) { 706 // Adjustment needed because Poly32 doesn't exist. 707 if (Addend >= 2) 708 --Addend; 709 Base = (unsigned)NeonTypeFlags::Poly8 + Addend; 710 } 711 if (isFloating()) { 712 assert(Addend != 0 && "Float8 doesn't exist!"); 713 Base = (unsigned)NeonTypeFlags::Float16 + (Addend - 1); 714 } 715 716 if (isBFloat16()) { 717 assert(Addend == 1 && "BFloat16 is only 16 bit"); 718 Base = (unsigned)NeonTypeFlags::BFloat16; 719 } 720 721 if (Bitwidth == 128) 722 Base |= (unsigned)NeonTypeFlags::QuadFlag; 723 if (isInteger() && !isSigned()) 724 Base |= (unsigned)NeonTypeFlags::UnsignedFlag; 725 726 return Base; 727 } 728 729 Type Type::fromTypedefName(StringRef Name) { 730 Type T; 731 T.Kind = SInt; 732 733 if (Name.front() == 'u') { 734 T.Kind = UInt; 735 Name = Name.drop_front(); 736 } 737 738 if (Name.startswith("float")) { 739 T.Kind = Float; 740 Name = Name.drop_front(5); 741 } else if (Name.startswith("poly")) { 742 T.Kind = Poly; 743 Name = Name.drop_front(4); 744 } else if (Name.startswith("bfloat")) { 745 T.Kind = BFloat16; 746 Name = Name.drop_front(6); 747 } else { 748 assert(Name.startswith("int")); 749 Name = Name.drop_front(3); 750 } 751 752 unsigned I = 0; 753 for (I = 0; I < Name.size(); ++I) { 754 if (!isdigit(Name[I])) 755 break; 756 } 757 Name.substr(0, I).getAsInteger(10, T.ElementBitwidth); 758 Name = Name.drop_front(I); 759 760 T.Bitwidth = T.ElementBitwidth; 761 T.NumVectors = 1; 762 763 if (Name.front() == 'x') { 764 Name = Name.drop_front(); 765 unsigned I = 0; 766 for (I = 0; I < Name.size(); ++I) { 767 if (!isdigit(Name[I])) 768 break; 769 } 770 unsigned NumLanes; 771 Name.substr(0, I).getAsInteger(10, NumLanes); 772 Name = Name.drop_front(I); 773 T.Bitwidth = T.ElementBitwidth * NumLanes; 774 } else { 775 // Was scalar. 776 T.NumVectors = 0; 777 } 778 if (Name.front() == 'x') { 779 Name = Name.drop_front(); 780 unsigned I = 0; 781 for (I = 0; I < Name.size(); ++I) { 782 if (!isdigit(Name[I])) 783 break; 784 } 785 Name.substr(0, I).getAsInteger(10, T.NumVectors); 786 Name = Name.drop_front(I); 787 } 788 789 assert(Name.startswith("_t") && "Malformed typedef!"); 790 return T; 791 } 792 793 void Type::applyTypespec(bool &Quad) { 794 std::string S = TS; 795 ScalarForMangling = false; 796 Kind = SInt; 797 ElementBitwidth = ~0U; 798 NumVectors = 1; 799 800 for (char I : S) { 801 switch (I) { 802 case 'S': 803 ScalarForMangling = true; 804 break; 805 case 'H': 806 NoManglingQ = true; 807 Quad = true; 808 break; 809 case 'Q': 810 Quad = true; 811 break; 812 case 'P': 813 Kind = Poly; 814 break; 815 case 'U': 816 Kind = UInt; 817 break; 818 case 'c': 819 ElementBitwidth = 8; 820 break; 821 case 'h': 822 Kind = Float; 823 LLVM_FALLTHROUGH; 824 case 's': 825 ElementBitwidth = 16; 826 break; 827 case 'f': 828 Kind = Float; 829 LLVM_FALLTHROUGH; 830 case 'i': 831 ElementBitwidth = 32; 832 break; 833 case 'd': 834 Kind = Float; 835 LLVM_FALLTHROUGH; 836 case 'l': 837 ElementBitwidth = 64; 838 break; 839 case 'k': 840 ElementBitwidth = 128; 841 // Poly doesn't have a 128x1 type. 842 if (isPoly()) 843 NumVectors = 0; 844 break; 845 case 'b': 846 Kind = BFloat16; 847 ElementBitwidth = 16; 848 break; 849 default: 850 llvm_unreachable("Unhandled type code!"); 851 } 852 } 853 assert(ElementBitwidth != ~0U && "Bad element bitwidth!"); 854 855 Bitwidth = Quad ? 128 : 64; 856 } 857 858 void Type::applyModifiers(StringRef Mods) { 859 bool AppliedQuad = false; 860 applyTypespec(AppliedQuad); 861 862 for (char Mod : Mods) { 863 switch (Mod) { 864 case '.': 865 break; 866 case 'v': 867 Kind = Void; 868 break; 869 case 'S': 870 Kind = SInt; 871 break; 872 case 'U': 873 Kind = UInt; 874 break; 875 case 'B': 876 Kind = BFloat16; 877 ElementBitwidth = 16; 878 break; 879 case 'F': 880 Kind = Float; 881 break; 882 case 'P': 883 Kind = Poly; 884 break; 885 case '>': 886 assert(ElementBitwidth < 128); 887 ElementBitwidth *= 2; 888 break; 889 case '<': 890 assert(ElementBitwidth > 8); 891 ElementBitwidth /= 2; 892 break; 893 case '1': 894 NumVectors = 0; 895 break; 896 case '2': 897 NumVectors = 2; 898 break; 899 case '3': 900 NumVectors = 3; 901 break; 902 case '4': 903 NumVectors = 4; 904 break; 905 case '*': 906 Pointer = true; 907 break; 908 case 'c': 909 Constant = true; 910 break; 911 case 'Q': 912 Bitwidth = 128; 913 break; 914 case 'q': 915 Bitwidth = 64; 916 break; 917 case 'I': 918 Kind = SInt; 919 ElementBitwidth = Bitwidth = 32; 920 NumVectors = 0; 921 Immediate = true; 922 break; 923 case 'p': 924 if (isPoly()) 925 Kind = UInt; 926 break; 927 case '!': 928 // Key type, handled elsewhere. 929 break; 930 default: 931 llvm_unreachable("Unhandled character!"); 932 } 933 } 934 } 935 936 //===----------------------------------------------------------------------===// 937 // Intrinsic implementation 938 //===----------------------------------------------------------------------===// 939 940 StringRef Intrinsic::getNextModifiers(StringRef Proto, unsigned &Pos) const { 941 if (Proto.size() == Pos) 942 return StringRef(); 943 else if (Proto[Pos] != '(') 944 return Proto.substr(Pos++, 1); 945 946 size_t Start = Pos + 1; 947 size_t End = Proto.find(')', Start); 948 assert_with_loc(End != StringRef::npos, "unmatched modifier group paren"); 949 Pos = End + 1; 950 return Proto.slice(Start, End); 951 } 952 953 std::string Intrinsic::getInstTypeCode(Type T, ClassKind CK) const { 954 char typeCode = '\0'; 955 bool printNumber = true; 956 957 if (CK == ClassB) 958 return ""; 959 960 if (T.isBFloat16()) 961 return "bf16"; 962 963 if (T.isPoly()) 964 typeCode = 'p'; 965 else if (T.isInteger()) 966 typeCode = T.isSigned() ? 's' : 'u'; 967 else 968 typeCode = 'f'; 969 970 if (CK == ClassI) { 971 switch (typeCode) { 972 default: 973 break; 974 case 's': 975 case 'u': 976 case 'p': 977 typeCode = 'i'; 978 break; 979 } 980 } 981 if (CK == ClassB) { 982 typeCode = '\0'; 983 } 984 985 std::string S; 986 if (typeCode != '\0') 987 S.push_back(typeCode); 988 if (printNumber) 989 S += utostr(T.getElementSizeInBits()); 990 991 return S; 992 } 993 994 std::string Intrinsic::getBuiltinTypeStr() { 995 ClassKind LocalCK = getClassKind(true); 996 std::string S; 997 998 Type RetT = getReturnType(); 999 if ((LocalCK == ClassI || LocalCK == ClassW) && RetT.isScalar() && 1000 !RetT.isFloating() && !RetT.isBFloat16()) 1001 RetT.makeInteger(RetT.getElementSizeInBits(), false); 1002 1003 // Since the return value must be one type, return a vector type of the 1004 // appropriate width which we will bitcast. An exception is made for 1005 // returning structs of 2, 3, or 4 vectors which are returned in a sret-like 1006 // fashion, storing them to a pointer arg. 1007 if (RetT.getNumVectors() > 1) { 1008 S += "vv*"; // void result with void* first argument 1009 } else { 1010 if (RetT.isPoly()) 1011 RetT.makeInteger(RetT.getElementSizeInBits(), false); 1012 if (!RetT.isScalar() && RetT.isInteger() && !RetT.isSigned()) 1013 RetT.makeSigned(); 1014 1015 if (LocalCK == ClassB && RetT.isValue() && !RetT.isScalar()) 1016 // Cast to vector of 8-bit elements. 1017 RetT.makeInteger(8, true); 1018 1019 S += RetT.builtin_str(); 1020 } 1021 1022 for (unsigned I = 0; I < getNumParams(); ++I) { 1023 Type T = getParamType(I); 1024 if (T.isPoly()) 1025 T.makeInteger(T.getElementSizeInBits(), false); 1026 1027 if (LocalCK == ClassB && !T.isScalar()) 1028 T.makeInteger(8, true); 1029 // Halves always get converted to 8-bit elements. 1030 if (T.isHalf() && T.isVector() && !T.isScalarForMangling()) 1031 T.makeInteger(8, true); 1032 1033 if (LocalCK == ClassI && T.isInteger()) 1034 T.makeSigned(); 1035 1036 if (hasImmediate() && getImmediateIdx() == I) 1037 T.makeImmediate(32); 1038 1039 S += T.builtin_str(); 1040 } 1041 1042 // Extra constant integer to hold type class enum for this function, e.g. s8 1043 if (LocalCK == ClassB) 1044 S += "i"; 1045 1046 return S; 1047 } 1048 1049 std::string Intrinsic::getMangledName(bool ForceClassS) const { 1050 // Check if the prototype has a scalar operand with the type of the vector 1051 // elements. If not, bitcasting the args will take care of arg checking. 1052 // The actual signedness etc. will be taken care of with special enums. 1053 ClassKind LocalCK = CK; 1054 if (!protoHasScalar()) 1055 LocalCK = ClassB; 1056 1057 return mangleName(Name, ForceClassS ? ClassS : LocalCK); 1058 } 1059 1060 std::string Intrinsic::mangleName(std::string Name, ClassKind LocalCK) const { 1061 std::string typeCode = getInstTypeCode(BaseType, LocalCK); 1062 std::string S = Name; 1063 1064 if (Name == "vcvt_f16_f32" || Name == "vcvt_f32_f16" || 1065 Name == "vcvt_f32_f64" || Name == "vcvt_f64_f32" || 1066 Name == "vcvt_f32_bf16") 1067 return Name; 1068 1069 if (!typeCode.empty()) { 1070 // If the name ends with _xN (N = 2,3,4), insert the typeCode before _xN. 1071 if (Name.size() >= 3 && isdigit(Name.back()) && 1072 Name[Name.length() - 2] == 'x' && Name[Name.length() - 3] == '_') 1073 S.insert(S.length() - 3, "_" + typeCode); 1074 else 1075 S += "_" + typeCode; 1076 } 1077 1078 if (BaseType != InBaseType) { 1079 // A reinterpret - out the input base type at the end. 1080 S += "_" + getInstTypeCode(InBaseType, LocalCK); 1081 } 1082 1083 if (LocalCK == ClassB) 1084 S += "_v"; 1085 1086 // Insert a 'q' before the first '_' character so that it ends up before 1087 // _lane or _n on vector-scalar operations. 1088 if (BaseType.getSizeInBits() == 128 && !BaseType.noManglingQ()) { 1089 size_t Pos = S.find('_'); 1090 S.insert(Pos, "q"); 1091 } 1092 1093 char Suffix = '\0'; 1094 if (BaseType.isScalarForMangling()) { 1095 switch (BaseType.getElementSizeInBits()) { 1096 case 8: Suffix = 'b'; break; 1097 case 16: Suffix = 'h'; break; 1098 case 32: Suffix = 's'; break; 1099 case 64: Suffix = 'd'; break; 1100 default: llvm_unreachable("Bad suffix!"); 1101 } 1102 } 1103 if (Suffix != '\0') { 1104 size_t Pos = S.find('_'); 1105 S.insert(Pos, &Suffix, 1); 1106 } 1107 1108 return S; 1109 } 1110 1111 std::string Intrinsic::replaceParamsIn(std::string S) { 1112 while (S.find('$') != std::string::npos) { 1113 size_t Pos = S.find('$'); 1114 size_t End = Pos + 1; 1115 while (isalpha(S[End])) 1116 ++End; 1117 1118 std::string VarName = S.substr(Pos + 1, End - Pos - 1); 1119 assert_with_loc(Variables.find(VarName) != Variables.end(), 1120 "Variable not defined!"); 1121 S.replace(Pos, End - Pos, Variables.find(VarName)->second.getName()); 1122 } 1123 1124 return S; 1125 } 1126 1127 void Intrinsic::initVariables() { 1128 Variables.clear(); 1129 1130 // Modify the TypeSpec per-argument to get a concrete Type, and create 1131 // known variables for each. 1132 for (unsigned I = 1; I < Types.size(); ++I) { 1133 char NameC = '0' + (I - 1); 1134 std::string Name = "p"; 1135 Name.push_back(NameC); 1136 1137 Variables[Name] = Variable(Types[I], Name + VariablePostfix); 1138 } 1139 RetVar = Variable(Types[0], "ret" + VariablePostfix); 1140 } 1141 1142 void Intrinsic::emitPrototype(StringRef NamePrefix) { 1143 if (UseMacro) 1144 OS << "#define "; 1145 else 1146 OS << "__ai " << Types[0].str() << " "; 1147 1148 OS << NamePrefix.str() << mangleName(Name, ClassS) << "("; 1149 1150 for (unsigned I = 0; I < getNumParams(); ++I) { 1151 if (I != 0) 1152 OS << ", "; 1153 1154 char NameC = '0' + I; 1155 std::string Name = "p"; 1156 Name.push_back(NameC); 1157 assert(Variables.find(Name) != Variables.end()); 1158 Variable &V = Variables[Name]; 1159 1160 if (!UseMacro) 1161 OS << V.getType().str() << " "; 1162 OS << V.getName(); 1163 } 1164 1165 OS << ")"; 1166 } 1167 1168 void Intrinsic::emitOpeningBrace() { 1169 if (UseMacro) 1170 OS << " __extension__ ({"; 1171 else 1172 OS << " {"; 1173 emitNewLine(); 1174 } 1175 1176 void Intrinsic::emitClosingBrace() { 1177 if (UseMacro) 1178 OS << "})"; 1179 else 1180 OS << "}"; 1181 } 1182 1183 void Intrinsic::emitNewLine() { 1184 if (UseMacro) 1185 OS << " \\\n"; 1186 else 1187 OS << "\n"; 1188 } 1189 1190 void Intrinsic::emitReverseVariable(Variable &Dest, Variable &Src) { 1191 if (Dest.getType().getNumVectors() > 1) { 1192 emitNewLine(); 1193 1194 for (unsigned K = 0; K < Dest.getType().getNumVectors(); ++K) { 1195 OS << " " << Dest.getName() << ".val[" << K << "] = " 1196 << "__builtin_shufflevector(" 1197 << Src.getName() << ".val[" << K << "], " 1198 << Src.getName() << ".val[" << K << "]"; 1199 for (int J = Dest.getType().getNumElements() - 1; J >= 0; --J) 1200 OS << ", " << J; 1201 OS << ");"; 1202 emitNewLine(); 1203 } 1204 } else { 1205 OS << " " << Dest.getName() 1206 << " = __builtin_shufflevector(" << Src.getName() << ", " << Src.getName(); 1207 for (int J = Dest.getType().getNumElements() - 1; J >= 0; --J) 1208 OS << ", " << J; 1209 OS << ");"; 1210 emitNewLine(); 1211 } 1212 } 1213 1214 void Intrinsic::emitArgumentReversal() { 1215 if (isBigEndianSafe()) 1216 return; 1217 1218 // Reverse all vector arguments. 1219 for (unsigned I = 0; I < getNumParams(); ++I) { 1220 std::string Name = "p" + utostr(I); 1221 std::string NewName = "rev" + utostr(I); 1222 1223 Variable &V = Variables[Name]; 1224 Variable NewV(V.getType(), NewName + VariablePostfix); 1225 1226 if (!NewV.getType().isVector() || NewV.getType().getNumElements() == 1) 1227 continue; 1228 1229 OS << " " << NewV.getType().str() << " " << NewV.getName() << ";"; 1230 emitReverseVariable(NewV, V); 1231 V = NewV; 1232 } 1233 } 1234 1235 void Intrinsic::emitReturnReversal() { 1236 if (isBigEndianSafe()) 1237 return; 1238 if (!getReturnType().isVector() || getReturnType().isVoid() || 1239 getReturnType().getNumElements() == 1) 1240 return; 1241 emitReverseVariable(RetVar, RetVar); 1242 } 1243 1244 void Intrinsic::emitShadowedArgs() { 1245 // Macro arguments are not type-checked like inline function arguments, 1246 // so assign them to local temporaries to get the right type checking. 1247 if (!UseMacro) 1248 return; 1249 1250 for (unsigned I = 0; I < getNumParams(); ++I) { 1251 // Do not create a temporary for an immediate argument. 1252 // That would defeat the whole point of using a macro! 1253 if (getParamType(I).isImmediate()) 1254 continue; 1255 // Do not create a temporary for pointer arguments. The input 1256 // pointer may have an alignment hint. 1257 if (getParamType(I).isPointer()) 1258 continue; 1259 1260 std::string Name = "p" + utostr(I); 1261 1262 assert(Variables.find(Name) != Variables.end()); 1263 Variable &V = Variables[Name]; 1264 1265 std::string NewName = "s" + utostr(I); 1266 Variable V2(V.getType(), NewName + VariablePostfix); 1267 1268 OS << " " << V2.getType().str() << " " << V2.getName() << " = " 1269 << V.getName() << ";"; 1270 emitNewLine(); 1271 1272 V = V2; 1273 } 1274 } 1275 1276 bool Intrinsic::protoHasScalar() const { 1277 return std::any_of(Types.begin(), Types.end(), [](const Type &T) { 1278 return T.isScalar() && !T.isImmediate(); 1279 }); 1280 } 1281 1282 void Intrinsic::emitBodyAsBuiltinCall() { 1283 std::string S; 1284 1285 // If this builtin returns a struct 2, 3, or 4 vectors, pass it as an implicit 1286 // sret-like argument. 1287 bool SRet = getReturnType().getNumVectors() >= 2; 1288 1289 StringRef N = Name; 1290 ClassKind LocalCK = CK; 1291 if (!protoHasScalar()) 1292 LocalCK = ClassB; 1293 1294 if (!getReturnType().isVoid() && !SRet) 1295 S += "(" + RetVar.getType().str() + ") "; 1296 1297 S += "__builtin_neon_" + mangleName(std::string(N), LocalCK) + "("; 1298 1299 if (SRet) 1300 S += "&" + RetVar.getName() + ", "; 1301 1302 for (unsigned I = 0; I < getNumParams(); ++I) { 1303 Variable &V = Variables["p" + utostr(I)]; 1304 Type T = V.getType(); 1305 1306 // Handle multiple-vector values specially, emitting each subvector as an 1307 // argument to the builtin. 1308 if (T.getNumVectors() > 1) { 1309 // Check if an explicit cast is needed. 1310 std::string Cast; 1311 if (LocalCK == ClassB) { 1312 Type T2 = T; 1313 T2.makeOneVector(); 1314 T2.makeInteger(8, /*Signed=*/true); 1315 Cast = "(" + T2.str() + ")"; 1316 } 1317 1318 for (unsigned J = 0; J < T.getNumVectors(); ++J) 1319 S += Cast + V.getName() + ".val[" + utostr(J) + "], "; 1320 continue; 1321 } 1322 1323 std::string Arg = V.getName(); 1324 Type CastToType = T; 1325 1326 // Check if an explicit cast is needed. 1327 if (CastToType.isVector() && 1328 (LocalCK == ClassB || (T.isHalf() && !T.isScalarForMangling()))) { 1329 CastToType.makeInteger(8, true); 1330 Arg = "(" + CastToType.str() + ")" + Arg; 1331 } else if (CastToType.isVector() && LocalCK == ClassI) { 1332 if (CastToType.isInteger()) 1333 CastToType.makeSigned(); 1334 Arg = "(" + CastToType.str() + ")" + Arg; 1335 } 1336 1337 S += Arg + ", "; 1338 } 1339 1340 // Extra constant integer to hold type class enum for this function, e.g. s8 1341 if (getClassKind(true) == ClassB) { 1342 S += utostr(getPolymorphicKeyType().getNeonEnum()); 1343 } else { 1344 // Remove extraneous ", ". 1345 S.pop_back(); 1346 S.pop_back(); 1347 } 1348 S += ");"; 1349 1350 std::string RetExpr; 1351 if (!SRet && !RetVar.getType().isVoid()) 1352 RetExpr = RetVar.getName() + " = "; 1353 1354 OS << " " << RetExpr << S; 1355 emitNewLine(); 1356 } 1357 1358 void Intrinsic::emitBody(StringRef CallPrefix) { 1359 std::vector<std::string> Lines; 1360 1361 assert(RetVar.getType() == Types[0]); 1362 // Create a return variable, if we're not void. 1363 if (!RetVar.getType().isVoid()) { 1364 OS << " " << RetVar.getType().str() << " " << RetVar.getName() << ";"; 1365 emitNewLine(); 1366 } 1367 1368 if (!Body || Body->getValues().empty()) { 1369 // Nothing specific to output - must output a builtin. 1370 emitBodyAsBuiltinCall(); 1371 return; 1372 } 1373 1374 // We have a list of "things to output". The last should be returned. 1375 for (auto *I : Body->getValues()) { 1376 if (StringInit *SI = dyn_cast<StringInit>(I)) { 1377 Lines.push_back(replaceParamsIn(SI->getAsString())); 1378 } else if (DagInit *DI = dyn_cast<DagInit>(I)) { 1379 DagEmitter DE(*this, CallPrefix); 1380 Lines.push_back(DE.emitDag(DI).second + ";"); 1381 } 1382 } 1383 1384 assert(!Lines.empty() && "Empty def?"); 1385 if (!RetVar.getType().isVoid()) 1386 Lines.back().insert(0, RetVar.getName() + " = "); 1387 1388 for (auto &L : Lines) { 1389 OS << " " << L; 1390 emitNewLine(); 1391 } 1392 } 1393 1394 void Intrinsic::emitReturn() { 1395 if (RetVar.getType().isVoid()) 1396 return; 1397 if (UseMacro) 1398 OS << " " << RetVar.getName() << ";"; 1399 else 1400 OS << " return " << RetVar.getName() << ";"; 1401 emitNewLine(); 1402 } 1403 1404 std::pair<Type, std::string> Intrinsic::DagEmitter::emitDag(DagInit *DI) { 1405 // At this point we should only be seeing a def. 1406 DefInit *DefI = cast<DefInit>(DI->getOperator()); 1407 std::string Op = DefI->getAsString(); 1408 1409 if (Op == "cast" || Op == "bitcast") 1410 return emitDagCast(DI, Op == "bitcast"); 1411 if (Op == "shuffle") 1412 return emitDagShuffle(DI); 1413 if (Op == "dup") 1414 return emitDagDup(DI); 1415 if (Op == "dup_typed") 1416 return emitDagDupTyped(DI); 1417 if (Op == "splat") 1418 return emitDagSplat(DI); 1419 if (Op == "save_temp") 1420 return emitDagSaveTemp(DI); 1421 if (Op == "op") 1422 return emitDagOp(DI); 1423 if (Op == "call" || Op == "call_mangled") 1424 return emitDagCall(DI, Op == "call_mangled"); 1425 if (Op == "name_replace") 1426 return emitDagNameReplace(DI); 1427 if (Op == "literal") 1428 return emitDagLiteral(DI); 1429 assert_with_loc(false, "Unknown operation!"); 1430 return std::make_pair(Type::getVoid(), ""); 1431 } 1432 1433 std::pair<Type, std::string> Intrinsic::DagEmitter::emitDagOp(DagInit *DI) { 1434 std::string Op = cast<StringInit>(DI->getArg(0))->getAsUnquotedString(); 1435 if (DI->getNumArgs() == 2) { 1436 // Unary op. 1437 std::pair<Type, std::string> R = 1438 emitDagArg(DI->getArg(1), std::string(DI->getArgNameStr(1))); 1439 return std::make_pair(R.first, Op + R.second); 1440 } else { 1441 assert(DI->getNumArgs() == 3 && "Can only handle unary and binary ops!"); 1442 std::pair<Type, std::string> R1 = 1443 emitDagArg(DI->getArg(1), std::string(DI->getArgNameStr(1))); 1444 std::pair<Type, std::string> R2 = 1445 emitDagArg(DI->getArg(2), std::string(DI->getArgNameStr(2))); 1446 assert_with_loc(R1.first == R2.first, "Argument type mismatch!"); 1447 return std::make_pair(R1.first, R1.second + " " + Op + " " + R2.second); 1448 } 1449 } 1450 1451 std::pair<Type, std::string> 1452 Intrinsic::DagEmitter::emitDagCall(DagInit *DI, bool MatchMangledName) { 1453 std::vector<Type> Types; 1454 std::vector<std::string> Values; 1455 for (unsigned I = 0; I < DI->getNumArgs() - 1; ++I) { 1456 std::pair<Type, std::string> R = 1457 emitDagArg(DI->getArg(I + 1), std::string(DI->getArgNameStr(I + 1))); 1458 Types.push_back(R.first); 1459 Values.push_back(R.second); 1460 } 1461 1462 // Look up the called intrinsic. 1463 std::string N; 1464 if (StringInit *SI = dyn_cast<StringInit>(DI->getArg(0))) 1465 N = SI->getAsUnquotedString(); 1466 else 1467 N = emitDagArg(DI->getArg(0), "").second; 1468 Optional<std::string> MangledName; 1469 if (MatchMangledName) { 1470 if (Intr.getRecord()->getValueAsBit("isLaneQ")) 1471 N += "q"; 1472 MangledName = Intr.mangleName(N, ClassS); 1473 } 1474 Intrinsic &Callee = Intr.Emitter.getIntrinsic(N, Types, MangledName); 1475 1476 // Make sure the callee is known as an early def. 1477 Callee.setNeededEarly(); 1478 Intr.Dependencies.insert(&Callee); 1479 1480 // Now create the call itself. 1481 std::string S = ""; 1482 if (!Callee.isBigEndianSafe()) 1483 S += CallPrefix.str(); 1484 S += Callee.getMangledName(true) + "("; 1485 for (unsigned I = 0; I < DI->getNumArgs() - 1; ++I) { 1486 if (I != 0) 1487 S += ", "; 1488 S += Values[I]; 1489 } 1490 S += ")"; 1491 1492 return std::make_pair(Callee.getReturnType(), S); 1493 } 1494 1495 std::pair<Type, std::string> Intrinsic::DagEmitter::emitDagCast(DagInit *DI, 1496 bool IsBitCast){ 1497 // (cast MOD* VAL) -> cast VAL to type given by MOD. 1498 std::pair<Type, std::string> R = 1499 emitDagArg(DI->getArg(DI->getNumArgs() - 1), 1500 std::string(DI->getArgNameStr(DI->getNumArgs() - 1))); 1501 Type castToType = R.first; 1502 for (unsigned ArgIdx = 0; ArgIdx < DI->getNumArgs() - 1; ++ArgIdx) { 1503 1504 // MOD can take several forms: 1505 // 1. $X - take the type of parameter / variable X. 1506 // 2. The value "R" - take the type of the return type. 1507 // 3. a type string 1508 // 4. The value "U" or "S" to switch the signedness. 1509 // 5. The value "H" or "D" to half or double the bitwidth. 1510 // 6. The value "8" to convert to 8-bit (signed) integer lanes. 1511 if (!DI->getArgNameStr(ArgIdx).empty()) { 1512 assert_with_loc(Intr.Variables.find(std::string( 1513 DI->getArgNameStr(ArgIdx))) != Intr.Variables.end(), 1514 "Variable not found"); 1515 castToType = 1516 Intr.Variables[std::string(DI->getArgNameStr(ArgIdx))].getType(); 1517 } else { 1518 StringInit *SI = dyn_cast<StringInit>(DI->getArg(ArgIdx)); 1519 assert_with_loc(SI, "Expected string type or $Name for cast type"); 1520 1521 if (SI->getAsUnquotedString() == "R") { 1522 castToType = Intr.getReturnType(); 1523 } else if (SI->getAsUnquotedString() == "U") { 1524 castToType.makeUnsigned(); 1525 } else if (SI->getAsUnquotedString() == "S") { 1526 castToType.makeSigned(); 1527 } else if (SI->getAsUnquotedString() == "H") { 1528 castToType.halveLanes(); 1529 } else if (SI->getAsUnquotedString() == "D") { 1530 castToType.doubleLanes(); 1531 } else if (SI->getAsUnquotedString() == "8") { 1532 castToType.makeInteger(8, true); 1533 } else if (SI->getAsUnquotedString() == "32") { 1534 castToType.make32BitElement(); 1535 } else { 1536 castToType = Type::fromTypedefName(SI->getAsUnquotedString()); 1537 assert_with_loc(!castToType.isVoid(), "Unknown typedef"); 1538 } 1539 } 1540 } 1541 1542 std::string S; 1543 if (IsBitCast) { 1544 // Emit a reinterpret cast. The second operand must be an lvalue, so create 1545 // a temporary. 1546 std::string N = "reint"; 1547 unsigned I = 0; 1548 while (Intr.Variables.find(N) != Intr.Variables.end()) 1549 N = "reint" + utostr(++I); 1550 Intr.Variables[N] = Variable(R.first, N + Intr.VariablePostfix); 1551 1552 Intr.OS << R.first.str() << " " << Intr.Variables[N].getName() << " = " 1553 << R.second << ";"; 1554 Intr.emitNewLine(); 1555 1556 S = "*(" + castToType.str() + " *) &" + Intr.Variables[N].getName() + ""; 1557 } else { 1558 // Emit a normal (static) cast. 1559 S = "(" + castToType.str() + ")(" + R.second + ")"; 1560 } 1561 1562 return std::make_pair(castToType, S); 1563 } 1564 1565 std::pair<Type, std::string> Intrinsic::DagEmitter::emitDagShuffle(DagInit *DI){ 1566 // See the documentation in arm_neon.td for a description of these operators. 1567 class LowHalf : public SetTheory::Operator { 1568 public: 1569 void apply(SetTheory &ST, DagInit *Expr, SetTheory::RecSet &Elts, 1570 ArrayRef<SMLoc> Loc) override { 1571 SetTheory::RecSet Elts2; 1572 ST.evaluate(Expr->arg_begin(), Expr->arg_end(), Elts2, Loc); 1573 Elts.insert(Elts2.begin(), Elts2.begin() + (Elts2.size() / 2)); 1574 } 1575 }; 1576 1577 class HighHalf : public SetTheory::Operator { 1578 public: 1579 void apply(SetTheory &ST, DagInit *Expr, SetTheory::RecSet &Elts, 1580 ArrayRef<SMLoc> Loc) override { 1581 SetTheory::RecSet Elts2; 1582 ST.evaluate(Expr->arg_begin(), Expr->arg_end(), Elts2, Loc); 1583 Elts.insert(Elts2.begin() + (Elts2.size() / 2), Elts2.end()); 1584 } 1585 }; 1586 1587 class Rev : public SetTheory::Operator { 1588 unsigned ElementSize; 1589 1590 public: 1591 Rev(unsigned ElementSize) : ElementSize(ElementSize) {} 1592 1593 void apply(SetTheory &ST, DagInit *Expr, SetTheory::RecSet &Elts, 1594 ArrayRef<SMLoc> Loc) override { 1595 SetTheory::RecSet Elts2; 1596 ST.evaluate(Expr->arg_begin() + 1, Expr->arg_end(), Elts2, Loc); 1597 1598 int64_t VectorSize = cast<IntInit>(Expr->getArg(0))->getValue(); 1599 VectorSize /= ElementSize; 1600 1601 std::vector<Record *> Revved; 1602 for (unsigned VI = 0; VI < Elts2.size(); VI += VectorSize) { 1603 for (int LI = VectorSize - 1; LI >= 0; --LI) { 1604 Revved.push_back(Elts2[VI + LI]); 1605 } 1606 } 1607 1608 Elts.insert(Revved.begin(), Revved.end()); 1609 } 1610 }; 1611 1612 class MaskExpander : public SetTheory::Expander { 1613 unsigned N; 1614 1615 public: 1616 MaskExpander(unsigned N) : N(N) {} 1617 1618 void expand(SetTheory &ST, Record *R, SetTheory::RecSet &Elts) override { 1619 unsigned Addend = 0; 1620 if (R->getName() == "mask0") 1621 Addend = 0; 1622 else if (R->getName() == "mask1") 1623 Addend = N; 1624 else 1625 return; 1626 for (unsigned I = 0; I < N; ++I) 1627 Elts.insert(R->getRecords().getDef("sv" + utostr(I + Addend))); 1628 } 1629 }; 1630 1631 // (shuffle arg1, arg2, sequence) 1632 std::pair<Type, std::string> Arg1 = 1633 emitDagArg(DI->getArg(0), std::string(DI->getArgNameStr(0))); 1634 std::pair<Type, std::string> Arg2 = 1635 emitDagArg(DI->getArg(1), std::string(DI->getArgNameStr(1))); 1636 assert_with_loc(Arg1.first == Arg2.first, 1637 "Different types in arguments to shuffle!"); 1638 1639 SetTheory ST; 1640 SetTheory::RecSet Elts; 1641 ST.addOperator("lowhalf", std::make_unique<LowHalf>()); 1642 ST.addOperator("highhalf", std::make_unique<HighHalf>()); 1643 ST.addOperator("rev", 1644 std::make_unique<Rev>(Arg1.first.getElementSizeInBits())); 1645 ST.addExpander("MaskExpand", 1646 std::make_unique<MaskExpander>(Arg1.first.getNumElements())); 1647 ST.evaluate(DI->getArg(2), Elts, None); 1648 1649 std::string S = "__builtin_shufflevector(" + Arg1.second + ", " + Arg2.second; 1650 for (auto &E : Elts) { 1651 StringRef Name = E->getName(); 1652 assert_with_loc(Name.startswith("sv"), 1653 "Incorrect element kind in shuffle mask!"); 1654 S += ", " + Name.drop_front(2).str(); 1655 } 1656 S += ")"; 1657 1658 // Recalculate the return type - the shuffle may have halved or doubled it. 1659 Type T(Arg1.first); 1660 if (Elts.size() > T.getNumElements()) { 1661 assert_with_loc( 1662 Elts.size() == T.getNumElements() * 2, 1663 "Can only double or half the number of elements in a shuffle!"); 1664 T.doubleLanes(); 1665 } else if (Elts.size() < T.getNumElements()) { 1666 assert_with_loc( 1667 Elts.size() == T.getNumElements() / 2, 1668 "Can only double or half the number of elements in a shuffle!"); 1669 T.halveLanes(); 1670 } 1671 1672 return std::make_pair(T, S); 1673 } 1674 1675 std::pair<Type, std::string> Intrinsic::DagEmitter::emitDagDup(DagInit *DI) { 1676 assert_with_loc(DI->getNumArgs() == 1, "dup() expects one argument"); 1677 std::pair<Type, std::string> A = 1678 emitDagArg(DI->getArg(0), std::string(DI->getArgNameStr(0))); 1679 assert_with_loc(A.first.isScalar(), "dup() expects a scalar argument"); 1680 1681 Type T = Intr.getBaseType(); 1682 assert_with_loc(T.isVector(), "dup() used but default type is scalar!"); 1683 std::string S = "(" + T.str() + ") {"; 1684 for (unsigned I = 0; I < T.getNumElements(); ++I) { 1685 if (I != 0) 1686 S += ", "; 1687 S += A.second; 1688 } 1689 S += "}"; 1690 1691 return std::make_pair(T, S); 1692 } 1693 1694 std::pair<Type, std::string> Intrinsic::DagEmitter::emitDagDupTyped(DagInit *DI) { 1695 assert_with_loc(DI->getNumArgs() == 2, "dup_typed() expects two arguments"); 1696 std::pair<Type, std::string> A = 1697 emitDagArg(DI->getArg(0), std::string(DI->getArgNameStr(0))); 1698 std::pair<Type, std::string> B = 1699 emitDagArg(DI->getArg(1), std::string(DI->getArgNameStr(1))); 1700 assert_with_loc(B.first.isScalar(), 1701 "dup_typed() requires a scalar as the second argument"); 1702 1703 Type T = A.first; 1704 assert_with_loc(T.isVector(), "dup_typed() used but target type is scalar!"); 1705 std::string S = "(" + T.str() + ") {"; 1706 for (unsigned I = 0; I < T.getNumElements(); ++I) { 1707 if (I != 0) 1708 S += ", "; 1709 S += B.second; 1710 } 1711 S += "}"; 1712 1713 return std::make_pair(T, S); 1714 } 1715 1716 std::pair<Type, std::string> Intrinsic::DagEmitter::emitDagSplat(DagInit *DI) { 1717 assert_with_loc(DI->getNumArgs() == 2, "splat() expects two arguments"); 1718 std::pair<Type, std::string> A = 1719 emitDagArg(DI->getArg(0), std::string(DI->getArgNameStr(0))); 1720 std::pair<Type, std::string> B = 1721 emitDagArg(DI->getArg(1), std::string(DI->getArgNameStr(1))); 1722 1723 assert_with_loc(B.first.isScalar(), 1724 "splat() requires a scalar int as the second argument"); 1725 1726 std::string S = "__builtin_shufflevector(" + A.second + ", " + A.second; 1727 for (unsigned I = 0; I < Intr.getBaseType().getNumElements(); ++I) { 1728 S += ", " + B.second; 1729 } 1730 S += ")"; 1731 1732 return std::make_pair(Intr.getBaseType(), S); 1733 } 1734 1735 std::pair<Type, std::string> Intrinsic::DagEmitter::emitDagSaveTemp(DagInit *DI) { 1736 assert_with_loc(DI->getNumArgs() == 2, "save_temp() expects two arguments"); 1737 std::pair<Type, std::string> A = 1738 emitDagArg(DI->getArg(1), std::string(DI->getArgNameStr(1))); 1739 1740 assert_with_loc(!A.first.isVoid(), 1741 "Argument to save_temp() must have non-void type!"); 1742 1743 std::string N = std::string(DI->getArgNameStr(0)); 1744 assert_with_loc(!N.empty(), 1745 "save_temp() expects a name as the first argument"); 1746 1747 assert_with_loc(Intr.Variables.find(N) == Intr.Variables.end(), 1748 "Variable already defined!"); 1749 Intr.Variables[N] = Variable(A.first, N + Intr.VariablePostfix); 1750 1751 std::string S = 1752 A.first.str() + " " + Intr.Variables[N].getName() + " = " + A.second; 1753 1754 return std::make_pair(Type::getVoid(), S); 1755 } 1756 1757 std::pair<Type, std::string> 1758 Intrinsic::DagEmitter::emitDagNameReplace(DagInit *DI) { 1759 std::string S = Intr.Name; 1760 1761 assert_with_loc(DI->getNumArgs() == 2, "name_replace requires 2 arguments!"); 1762 std::string ToReplace = cast<StringInit>(DI->getArg(0))->getAsUnquotedString(); 1763 std::string ReplaceWith = cast<StringInit>(DI->getArg(1))->getAsUnquotedString(); 1764 1765 size_t Idx = S.find(ToReplace); 1766 1767 assert_with_loc(Idx != std::string::npos, "name should contain '" + ToReplace + "'!"); 1768 S.replace(Idx, ToReplace.size(), ReplaceWith); 1769 1770 return std::make_pair(Type::getVoid(), S); 1771 } 1772 1773 std::pair<Type, std::string> Intrinsic::DagEmitter::emitDagLiteral(DagInit *DI){ 1774 std::string Ty = cast<StringInit>(DI->getArg(0))->getAsUnquotedString(); 1775 std::string Value = cast<StringInit>(DI->getArg(1))->getAsUnquotedString(); 1776 return std::make_pair(Type::fromTypedefName(Ty), Value); 1777 } 1778 1779 std::pair<Type, std::string> 1780 Intrinsic::DagEmitter::emitDagArg(Init *Arg, std::string ArgName) { 1781 if (!ArgName.empty()) { 1782 assert_with_loc(!Arg->isComplete(), 1783 "Arguments must either be DAGs or names, not both!"); 1784 assert_with_loc(Intr.Variables.find(ArgName) != Intr.Variables.end(), 1785 "Variable not defined!"); 1786 Variable &V = Intr.Variables[ArgName]; 1787 return std::make_pair(V.getType(), V.getName()); 1788 } 1789 1790 assert(Arg && "Neither ArgName nor Arg?!"); 1791 DagInit *DI = dyn_cast<DagInit>(Arg); 1792 assert_with_loc(DI, "Arguments must either be DAGs or names!"); 1793 1794 return emitDag(DI); 1795 } 1796 1797 std::string Intrinsic::generate() { 1798 // Avoid duplicated code for big and little endian 1799 if (isBigEndianSafe()) { 1800 generateImpl(false, "", ""); 1801 return OS.str(); 1802 } 1803 // Little endian intrinsics are simple and don't require any argument 1804 // swapping. 1805 OS << "#ifdef __LITTLE_ENDIAN__\n"; 1806 1807 generateImpl(false, "", ""); 1808 1809 OS << "#else\n"; 1810 1811 // Big endian intrinsics are more complex. The user intended these 1812 // intrinsics to operate on a vector "as-if" loaded by (V)LDR, 1813 // but we load as-if (V)LD1. So we should swap all arguments and 1814 // swap the return value too. 1815 // 1816 // If we call sub-intrinsics, we should call a version that does 1817 // not re-swap the arguments! 1818 generateImpl(true, "", "__noswap_"); 1819 1820 // If we're needed early, create a non-swapping variant for 1821 // big-endian. 1822 if (NeededEarly) { 1823 generateImpl(false, "__noswap_", "__noswap_"); 1824 } 1825 OS << "#endif\n\n"; 1826 1827 return OS.str(); 1828 } 1829 1830 void Intrinsic::generateImpl(bool ReverseArguments, 1831 StringRef NamePrefix, StringRef CallPrefix) { 1832 CurrentRecord = R; 1833 1834 // If we call a macro, our local variables may be corrupted due to 1835 // lack of proper lexical scoping. So, add a globally unique postfix 1836 // to every variable. 1837 // 1838 // indexBody() should have set up the Dependencies set by now. 1839 for (auto *I : Dependencies) 1840 if (I->UseMacro) { 1841 VariablePostfix = "_" + utostr(Emitter.getUniqueNumber()); 1842 break; 1843 } 1844 1845 initVariables(); 1846 1847 emitPrototype(NamePrefix); 1848 1849 if (IsUnavailable) { 1850 OS << " __attribute__((unavailable));"; 1851 } else { 1852 emitOpeningBrace(); 1853 emitShadowedArgs(); 1854 if (ReverseArguments) 1855 emitArgumentReversal(); 1856 emitBody(CallPrefix); 1857 if (ReverseArguments) 1858 emitReturnReversal(); 1859 emitReturn(); 1860 emitClosingBrace(); 1861 } 1862 OS << "\n"; 1863 1864 CurrentRecord = nullptr; 1865 } 1866 1867 void Intrinsic::indexBody() { 1868 CurrentRecord = R; 1869 1870 initVariables(); 1871 emitBody(""); 1872 OS.str(""); 1873 1874 CurrentRecord = nullptr; 1875 } 1876 1877 //===----------------------------------------------------------------------===// 1878 // NeonEmitter implementation 1879 //===----------------------------------------------------------------------===// 1880 1881 Intrinsic &NeonEmitter::getIntrinsic(StringRef Name, ArrayRef<Type> Types, 1882 Optional<std::string> MangledName) { 1883 // First, look up the name in the intrinsic map. 1884 assert_with_loc(IntrinsicMap.find(Name.str()) != IntrinsicMap.end(), 1885 ("Intrinsic '" + Name + "' not found!").str()); 1886 auto &V = IntrinsicMap.find(Name.str())->second; 1887 std::vector<Intrinsic *> GoodVec; 1888 1889 // Create a string to print if we end up failing. 1890 std::string ErrMsg = "looking up intrinsic '" + Name.str() + "("; 1891 for (unsigned I = 0; I < Types.size(); ++I) { 1892 if (I != 0) 1893 ErrMsg += ", "; 1894 ErrMsg += Types[I].str(); 1895 } 1896 ErrMsg += ")'\n"; 1897 ErrMsg += "Available overloads:\n"; 1898 1899 // Now, look through each intrinsic implementation and see if the types are 1900 // compatible. 1901 for (auto &I : V) { 1902 ErrMsg += " - " + I.getReturnType().str() + " " + I.getMangledName(); 1903 ErrMsg += "("; 1904 for (unsigned A = 0; A < I.getNumParams(); ++A) { 1905 if (A != 0) 1906 ErrMsg += ", "; 1907 ErrMsg += I.getParamType(A).str(); 1908 } 1909 ErrMsg += ")\n"; 1910 1911 if (MangledName && MangledName != I.getMangledName(true)) 1912 continue; 1913 1914 if (I.getNumParams() != Types.size()) 1915 continue; 1916 1917 unsigned ArgNum = 0; 1918 bool MatchingArgumentTypes = 1919 std::all_of(Types.begin(), Types.end(), [&](const auto &Type) { 1920 return Type == I.getParamType(ArgNum++); 1921 }); 1922 1923 if (MatchingArgumentTypes) 1924 GoodVec.push_back(&I); 1925 } 1926 1927 assert_with_loc(!GoodVec.empty(), 1928 "No compatible intrinsic found - " + ErrMsg); 1929 assert_with_loc(GoodVec.size() == 1, "Multiple overloads found - " + ErrMsg); 1930 1931 return *GoodVec.front(); 1932 } 1933 1934 void NeonEmitter::createIntrinsic(Record *R, 1935 SmallVectorImpl<Intrinsic *> &Out) { 1936 std::string Name = std::string(R->getValueAsString("Name")); 1937 std::string Proto = std::string(R->getValueAsString("Prototype")); 1938 std::string Types = std::string(R->getValueAsString("Types")); 1939 Record *OperationRec = R->getValueAsDef("Operation"); 1940 bool BigEndianSafe = R->getValueAsBit("BigEndianSafe"); 1941 std::string Guard = std::string(R->getValueAsString("ArchGuard")); 1942 bool IsUnavailable = OperationRec->getValueAsBit("Unavailable"); 1943 std::string CartesianProductWith = std::string(R->getValueAsString("CartesianProductWith")); 1944 1945 // Set the global current record. This allows assert_with_loc to produce 1946 // decent location information even when highly nested. 1947 CurrentRecord = R; 1948 1949 ListInit *Body = OperationRec->getValueAsListInit("Ops"); 1950 1951 std::vector<TypeSpec> TypeSpecs = TypeSpec::fromTypeSpecs(Types); 1952 1953 ClassKind CK = ClassNone; 1954 if (R->getSuperClasses().size() >= 2) 1955 CK = ClassMap[R->getSuperClasses()[1].first]; 1956 1957 std::vector<std::pair<TypeSpec, TypeSpec>> NewTypeSpecs; 1958 if (!CartesianProductWith.empty()) { 1959 std::vector<TypeSpec> ProductTypeSpecs = TypeSpec::fromTypeSpecs(CartesianProductWith); 1960 for (auto TS : TypeSpecs) { 1961 Type DefaultT(TS, "."); 1962 for (auto SrcTS : ProductTypeSpecs) { 1963 Type DefaultSrcT(SrcTS, "."); 1964 if (TS == SrcTS || 1965 DefaultSrcT.getSizeInBits() != DefaultT.getSizeInBits()) 1966 continue; 1967 NewTypeSpecs.push_back(std::make_pair(TS, SrcTS)); 1968 } 1969 } 1970 } else { 1971 for (auto TS : TypeSpecs) { 1972 NewTypeSpecs.push_back(std::make_pair(TS, TS)); 1973 } 1974 } 1975 1976 llvm::sort(NewTypeSpecs); 1977 NewTypeSpecs.erase(std::unique(NewTypeSpecs.begin(), NewTypeSpecs.end()), 1978 NewTypeSpecs.end()); 1979 auto &Entry = IntrinsicMap[Name]; 1980 1981 for (auto &I : NewTypeSpecs) { 1982 Entry.emplace_back(R, Name, Proto, I.first, I.second, CK, Body, *this, 1983 Guard, IsUnavailable, BigEndianSafe); 1984 Out.push_back(&Entry.back()); 1985 } 1986 1987 CurrentRecord = nullptr; 1988 } 1989 1990 /// genBuiltinsDef: Generate the BuiltinsARM.def and BuiltinsAArch64.def 1991 /// declaration of builtins, checking for unique builtin declarations. 1992 void NeonEmitter::genBuiltinsDef(raw_ostream &OS, 1993 SmallVectorImpl<Intrinsic *> &Defs) { 1994 OS << "#ifdef GET_NEON_BUILTINS\n"; 1995 1996 // We only want to emit a builtin once, and we want to emit them in 1997 // alphabetical order, so use a std::set. 1998 std::set<std::string> Builtins; 1999 2000 for (auto *Def : Defs) { 2001 if (Def->hasBody()) 2002 continue; 2003 2004 std::string S = "BUILTIN(__builtin_neon_" + Def->getMangledName() + ", \""; 2005 2006 S += Def->getBuiltinTypeStr(); 2007 S += "\", \"n\")"; 2008 2009 Builtins.insert(S); 2010 } 2011 2012 for (auto &S : Builtins) 2013 OS << S << "\n"; 2014 OS << "#endif\n\n"; 2015 } 2016 2017 /// Generate the ARM and AArch64 overloaded type checking code for 2018 /// SemaChecking.cpp, checking for unique builtin declarations. 2019 void NeonEmitter::genOverloadTypeCheckCode(raw_ostream &OS, 2020 SmallVectorImpl<Intrinsic *> &Defs) { 2021 OS << "#ifdef GET_NEON_OVERLOAD_CHECK\n"; 2022 2023 // We record each overload check line before emitting because subsequent Inst 2024 // definitions may extend the number of permitted types (i.e. augment the 2025 // Mask). Use std::map to avoid sorting the table by hash number. 2026 struct OverloadInfo { 2027 uint64_t Mask; 2028 int PtrArgNum; 2029 bool HasConstPtr; 2030 OverloadInfo() : Mask(0ULL), PtrArgNum(0), HasConstPtr(false) {} 2031 }; 2032 std::map<std::string, OverloadInfo> OverloadMap; 2033 2034 for (auto *Def : Defs) { 2035 // If the def has a body (that is, it has Operation DAGs), it won't call 2036 // __builtin_neon_* so we don't need to generate a definition for it. 2037 if (Def->hasBody()) 2038 continue; 2039 // Functions which have a scalar argument cannot be overloaded, no need to 2040 // check them if we are emitting the type checking code. 2041 if (Def->protoHasScalar()) 2042 continue; 2043 2044 uint64_t Mask = 0ULL; 2045 Mask |= 1ULL << Def->getPolymorphicKeyType().getNeonEnum(); 2046 2047 // Check if the function has a pointer or const pointer argument. 2048 int PtrArgNum = -1; 2049 bool HasConstPtr = false; 2050 for (unsigned I = 0; I < Def->getNumParams(); ++I) { 2051 const auto &Type = Def->getParamType(I); 2052 if (Type.isPointer()) { 2053 PtrArgNum = I; 2054 HasConstPtr = Type.isConstPointer(); 2055 } 2056 } 2057 2058 // For sret builtins, adjust the pointer argument index. 2059 if (PtrArgNum >= 0 && Def->getReturnType().getNumVectors() > 1) 2060 PtrArgNum += 1; 2061 2062 std::string Name = Def->getName(); 2063 // Omit type checking for the pointer arguments of vld1_lane, vld1_dup, 2064 // and vst1_lane intrinsics. Using a pointer to the vector element 2065 // type with one of those operations causes codegen to select an aligned 2066 // load/store instruction. If you want an unaligned operation, 2067 // the pointer argument needs to have less alignment than element type, 2068 // so just accept any pointer type. 2069 if (Name == "vld1_lane" || Name == "vld1_dup" || Name == "vst1_lane") { 2070 PtrArgNum = -1; 2071 HasConstPtr = false; 2072 } 2073 2074 if (Mask) { 2075 std::string Name = Def->getMangledName(); 2076 OverloadMap.insert(std::make_pair(Name, OverloadInfo())); 2077 OverloadInfo &OI = OverloadMap[Name]; 2078 OI.Mask |= Mask; 2079 OI.PtrArgNum |= PtrArgNum; 2080 OI.HasConstPtr = HasConstPtr; 2081 } 2082 } 2083 2084 for (auto &I : OverloadMap) { 2085 OverloadInfo &OI = I.second; 2086 2087 OS << "case NEON::BI__builtin_neon_" << I.first << ": "; 2088 OS << "mask = 0x" << Twine::utohexstr(OI.Mask) << "ULL"; 2089 if (OI.PtrArgNum >= 0) 2090 OS << "; PtrArgNum = " << OI.PtrArgNum; 2091 if (OI.HasConstPtr) 2092 OS << "; HasConstPtr = true"; 2093 OS << "; break;\n"; 2094 } 2095 OS << "#endif\n\n"; 2096 } 2097 2098 void NeonEmitter::genIntrinsicRangeCheckCode(raw_ostream &OS, 2099 SmallVectorImpl<Intrinsic *> &Defs) { 2100 OS << "#ifdef GET_NEON_IMMEDIATE_CHECK\n"; 2101 2102 std::set<std::string> Emitted; 2103 2104 for (auto *Def : Defs) { 2105 if (Def->hasBody()) 2106 continue; 2107 // Functions which do not have an immediate do not need to have range 2108 // checking code emitted. 2109 if (!Def->hasImmediate()) 2110 continue; 2111 if (Emitted.find(Def->getMangledName()) != Emitted.end()) 2112 continue; 2113 2114 std::string LowerBound, UpperBound; 2115 2116 Record *R = Def->getRecord(); 2117 if (R->getValueAsBit("isVCVT_N")) { 2118 // VCVT between floating- and fixed-point values takes an immediate 2119 // in the range [1, 32) for f32 or [1, 64) for f64 or [1, 16) for f16. 2120 LowerBound = "1"; 2121 if (Def->getBaseType().getElementSizeInBits() == 16 || 2122 Def->getName().find('h') != std::string::npos) 2123 // VCVTh operating on FP16 intrinsics in range [1, 16) 2124 UpperBound = "15"; 2125 else if (Def->getBaseType().getElementSizeInBits() == 32) 2126 UpperBound = "31"; 2127 else 2128 UpperBound = "63"; 2129 } else if (R->getValueAsBit("isScalarShift")) { 2130 // Right shifts have an 'r' in the name, left shifts do not. Convert 2131 // instructions have the same bounds and right shifts. 2132 if (Def->getName().find('r') != std::string::npos || 2133 Def->getName().find("cvt") != std::string::npos) 2134 LowerBound = "1"; 2135 2136 UpperBound = utostr(Def->getReturnType().getElementSizeInBits() - 1); 2137 } else if (R->getValueAsBit("isShift")) { 2138 // Builtins which are overloaded by type will need to have their upper 2139 // bound computed at Sema time based on the type constant. 2140 2141 // Right shifts have an 'r' in the name, left shifts do not. 2142 if (Def->getName().find('r') != std::string::npos) 2143 LowerBound = "1"; 2144 UpperBound = "RFT(TV, true)"; 2145 } else if (Def->getClassKind(true) == ClassB) { 2146 // ClassB intrinsics have a type (and hence lane number) that is only 2147 // known at runtime. 2148 if (R->getValueAsBit("isLaneQ")) 2149 UpperBound = "RFT(TV, false, true)"; 2150 else 2151 UpperBound = "RFT(TV, false, false)"; 2152 } else { 2153 // The immediate generally refers to a lane in the preceding argument. 2154 assert(Def->getImmediateIdx() > 0); 2155 Type T = Def->getParamType(Def->getImmediateIdx() - 1); 2156 UpperBound = utostr(T.getNumElements() - 1); 2157 } 2158 2159 // Calculate the index of the immediate that should be range checked. 2160 unsigned Idx = Def->getNumParams(); 2161 if (Def->hasImmediate()) 2162 Idx = Def->getGeneratedParamIdx(Def->getImmediateIdx()); 2163 2164 OS << "case NEON::BI__builtin_neon_" << Def->getMangledName() << ": " 2165 << "i = " << Idx << ";"; 2166 if (!LowerBound.empty()) 2167 OS << " l = " << LowerBound << ";"; 2168 if (!UpperBound.empty()) 2169 OS << " u = " << UpperBound << ";"; 2170 OS << " break;\n"; 2171 2172 Emitted.insert(Def->getMangledName()); 2173 } 2174 2175 OS << "#endif\n\n"; 2176 } 2177 2178 /// runHeader - Emit a file with sections defining: 2179 /// 1. the NEON section of BuiltinsARM.def and BuiltinsAArch64.def. 2180 /// 2. the SemaChecking code for the type overload checking. 2181 /// 3. the SemaChecking code for validation of intrinsic immediate arguments. 2182 void NeonEmitter::runHeader(raw_ostream &OS) { 2183 std::vector<Record *> RV = Records.getAllDerivedDefinitions("Inst"); 2184 2185 SmallVector<Intrinsic *, 128> Defs; 2186 for (auto *R : RV) 2187 createIntrinsic(R, Defs); 2188 2189 // Generate shared BuiltinsXXX.def 2190 genBuiltinsDef(OS, Defs); 2191 2192 // Generate ARM overloaded type checking code for SemaChecking.cpp 2193 genOverloadTypeCheckCode(OS, Defs); 2194 2195 // Generate ARM range checking code for shift/lane immediates. 2196 genIntrinsicRangeCheckCode(OS, Defs); 2197 } 2198 2199 static void emitNeonTypeDefs(const std::string& types, raw_ostream &OS) { 2200 std::string TypedefTypes(types); 2201 std::vector<TypeSpec> TDTypeVec = TypeSpec::fromTypeSpecs(TypedefTypes); 2202 2203 // Emit vector typedefs. 2204 bool InIfdef = false; 2205 for (auto &TS : TDTypeVec) { 2206 bool IsA64 = false; 2207 Type T(TS, "."); 2208 if (T.isDouble()) 2209 IsA64 = true; 2210 2211 if (InIfdef && !IsA64) { 2212 OS << "#endif\n"; 2213 InIfdef = false; 2214 } 2215 if (!InIfdef && IsA64) { 2216 OS << "#ifdef __aarch64__\n"; 2217 InIfdef = true; 2218 } 2219 2220 if (T.isPoly()) 2221 OS << "typedef __attribute__((neon_polyvector_type("; 2222 else 2223 OS << "typedef __attribute__((neon_vector_type("; 2224 2225 Type T2 = T; 2226 T2.makeScalar(); 2227 OS << T.getNumElements() << "))) "; 2228 OS << T2.str(); 2229 OS << " " << T.str() << ";\n"; 2230 } 2231 if (InIfdef) 2232 OS << "#endif\n"; 2233 OS << "\n"; 2234 2235 // Emit struct typedefs. 2236 InIfdef = false; 2237 for (unsigned NumMembers = 2; NumMembers <= 4; ++NumMembers) { 2238 for (auto &TS : TDTypeVec) { 2239 bool IsA64 = false; 2240 Type T(TS, "."); 2241 if (T.isDouble()) 2242 IsA64 = true; 2243 2244 if (InIfdef && !IsA64) { 2245 OS << "#endif\n"; 2246 InIfdef = false; 2247 } 2248 if (!InIfdef && IsA64) { 2249 OS << "#ifdef __aarch64__\n"; 2250 InIfdef = true; 2251 } 2252 2253 const char Mods[] = { static_cast<char>('2' + (NumMembers - 2)), 0}; 2254 Type VT(TS, Mods); 2255 OS << "typedef struct " << VT.str() << " {\n"; 2256 OS << " " << T.str() << " val"; 2257 OS << "[" << NumMembers << "]"; 2258 OS << ";\n} "; 2259 OS << VT.str() << ";\n"; 2260 OS << "\n"; 2261 } 2262 } 2263 if (InIfdef) 2264 OS << "#endif\n"; 2265 } 2266 2267 /// run - Read the records in arm_neon.td and output arm_neon.h. arm_neon.h 2268 /// is comprised of type definitions and function declarations. 2269 void NeonEmitter::run(raw_ostream &OS) { 2270 OS << "/*===---- arm_neon.h - ARM Neon intrinsics " 2271 "------------------------------" 2272 "---===\n" 2273 " *\n" 2274 " * Permission is hereby granted, free of charge, to any person " 2275 "obtaining " 2276 "a copy\n" 2277 " * of this software and associated documentation files (the " 2278 "\"Software\")," 2279 " to deal\n" 2280 " * in the Software without restriction, including without limitation " 2281 "the " 2282 "rights\n" 2283 " * to use, copy, modify, merge, publish, distribute, sublicense, " 2284 "and/or sell\n" 2285 " * copies of the Software, and to permit persons to whom the Software " 2286 "is\n" 2287 " * furnished to do so, subject to the following conditions:\n" 2288 " *\n" 2289 " * The above copyright notice and this permission notice shall be " 2290 "included in\n" 2291 " * all copies or substantial portions of the Software.\n" 2292 " *\n" 2293 " * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, " 2294 "EXPRESS OR\n" 2295 " * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF " 2296 "MERCHANTABILITY,\n" 2297 " * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT " 2298 "SHALL THE\n" 2299 " * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR " 2300 "OTHER\n" 2301 " * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, " 2302 "ARISING FROM,\n" 2303 " * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER " 2304 "DEALINGS IN\n" 2305 " * THE SOFTWARE.\n" 2306 " *\n" 2307 " *===-----------------------------------------------------------------" 2308 "---" 2309 "---===\n" 2310 " */\n\n"; 2311 2312 OS << "#ifndef __ARM_NEON_H\n"; 2313 OS << "#define __ARM_NEON_H\n\n"; 2314 2315 OS << "#ifndef __ARM_FP\n"; 2316 OS << "#error \"NEON intrinsics not available with the soft-float ABI. " 2317 "Please use -mfloat-abi=softfp or -mfloat-abi=hard\"\n"; 2318 OS << "#else\n\n"; 2319 2320 OS << "#if !defined(__ARM_NEON)\n"; 2321 OS << "#error \"NEON support not enabled\"\n"; 2322 OS << "#else\n\n"; 2323 2324 OS << "#include <stdint.h>\n\n"; 2325 2326 OS << "#ifdef __ARM_FEATURE_BF16\n"; 2327 OS << "#include <arm_bf16.h>\n"; 2328 OS << "typedef __bf16 bfloat16_t;\n"; 2329 OS << "#endif\n\n"; 2330 2331 // Emit NEON-specific scalar typedefs. 2332 OS << "typedef float float32_t;\n"; 2333 OS << "typedef __fp16 float16_t;\n"; 2334 2335 OS << "#ifdef __aarch64__\n"; 2336 OS << "typedef double float64_t;\n"; 2337 OS << "#endif\n\n"; 2338 2339 // For now, signedness of polynomial types depends on target 2340 OS << "#ifdef __aarch64__\n"; 2341 OS << "typedef uint8_t poly8_t;\n"; 2342 OS << "typedef uint16_t poly16_t;\n"; 2343 OS << "typedef uint64_t poly64_t;\n"; 2344 OS << "typedef __uint128_t poly128_t;\n"; 2345 OS << "#else\n"; 2346 OS << "typedef int8_t poly8_t;\n"; 2347 OS << "typedef int16_t poly16_t;\n"; 2348 OS << "typedef int64_t poly64_t;\n"; 2349 OS << "#endif\n"; 2350 2351 emitNeonTypeDefs("cQcsQsiQilQlUcQUcUsQUsUiQUiUlQUlhQhfQfdQdPcQPcPsQPsPlQPl", OS); 2352 2353 OS << "#ifdef __ARM_FEATURE_BF16\n"; 2354 emitNeonTypeDefs("bQb", OS); 2355 OS << "#endif\n\n"; 2356 2357 OS << "#define __ai static __inline__ __attribute__((__always_inline__, " 2358 "__nodebug__))\n\n"; 2359 2360 SmallVector<Intrinsic *, 128> Defs; 2361 std::vector<Record *> RV = Records.getAllDerivedDefinitions("Inst"); 2362 for (auto *R : RV) 2363 createIntrinsic(R, Defs); 2364 2365 for (auto *I : Defs) 2366 I->indexBody(); 2367 2368 llvm::stable_sort(Defs, llvm::deref<std::less<>>()); 2369 2370 // Only emit a def when its requirements have been met. 2371 // FIXME: This loop could be made faster, but it's fast enough for now. 2372 bool MadeProgress = true; 2373 std::string InGuard; 2374 while (!Defs.empty() && MadeProgress) { 2375 MadeProgress = false; 2376 2377 for (SmallVector<Intrinsic *, 128>::iterator I = Defs.begin(); 2378 I != Defs.end(); /*No step*/) { 2379 bool DependenciesSatisfied = true; 2380 for (auto *II : (*I)->getDependencies()) { 2381 if (llvm::is_contained(Defs, II)) 2382 DependenciesSatisfied = false; 2383 } 2384 if (!DependenciesSatisfied) { 2385 // Try the next one. 2386 ++I; 2387 continue; 2388 } 2389 2390 // Emit #endif/#if pair if needed. 2391 if ((*I)->getGuard() != InGuard) { 2392 if (!InGuard.empty()) 2393 OS << "#endif\n"; 2394 InGuard = (*I)->getGuard(); 2395 if (!InGuard.empty()) 2396 OS << "#if " << InGuard << "\n"; 2397 } 2398 2399 // Actually generate the intrinsic code. 2400 OS << (*I)->generate(); 2401 2402 MadeProgress = true; 2403 I = Defs.erase(I); 2404 } 2405 } 2406 assert(Defs.empty() && "Some requirements were not satisfied!"); 2407 if (!InGuard.empty()) 2408 OS << "#endif\n"; 2409 2410 OS << "\n"; 2411 OS << "#undef __ai\n\n"; 2412 OS << "#endif /* if !defined(__ARM_NEON) */\n"; 2413 OS << "#endif /* ifndef __ARM_FP */\n"; 2414 OS << "#endif /* __ARM_NEON_H */\n"; 2415 } 2416 2417 /// run - Read the records in arm_fp16.td and output arm_fp16.h. arm_fp16.h 2418 /// is comprised of type definitions and function declarations. 2419 void NeonEmitter::runFP16(raw_ostream &OS) { 2420 OS << "/*===---- arm_fp16.h - ARM FP16 intrinsics " 2421 "------------------------------" 2422 "---===\n" 2423 " *\n" 2424 " * Permission is hereby granted, free of charge, to any person " 2425 "obtaining a copy\n" 2426 " * of this software and associated documentation files (the " 2427 "\"Software\"), to deal\n" 2428 " * in the Software without restriction, including without limitation " 2429 "the rights\n" 2430 " * to use, copy, modify, merge, publish, distribute, sublicense, " 2431 "and/or sell\n" 2432 " * copies of the Software, and to permit persons to whom the Software " 2433 "is\n" 2434 " * furnished to do so, subject to the following conditions:\n" 2435 " *\n" 2436 " * The above copyright notice and this permission notice shall be " 2437 "included in\n" 2438 " * all copies or substantial portions of the Software.\n" 2439 " *\n" 2440 " * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, " 2441 "EXPRESS OR\n" 2442 " * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF " 2443 "MERCHANTABILITY,\n" 2444 " * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT " 2445 "SHALL THE\n" 2446 " * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR " 2447 "OTHER\n" 2448 " * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, " 2449 "ARISING FROM,\n" 2450 " * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER " 2451 "DEALINGS IN\n" 2452 " * THE SOFTWARE.\n" 2453 " *\n" 2454 " *===-----------------------------------------------------------------" 2455 "---" 2456 "---===\n" 2457 " */\n\n"; 2458 2459 OS << "#ifndef __ARM_FP16_H\n"; 2460 OS << "#define __ARM_FP16_H\n\n"; 2461 2462 OS << "#include <stdint.h>\n\n"; 2463 2464 OS << "typedef __fp16 float16_t;\n"; 2465 2466 OS << "#define __ai static __inline__ __attribute__((__always_inline__, " 2467 "__nodebug__))\n\n"; 2468 2469 SmallVector<Intrinsic *, 128> Defs; 2470 std::vector<Record *> RV = Records.getAllDerivedDefinitions("Inst"); 2471 for (auto *R : RV) 2472 createIntrinsic(R, Defs); 2473 2474 for (auto *I : Defs) 2475 I->indexBody(); 2476 2477 llvm::stable_sort(Defs, llvm::deref<std::less<>>()); 2478 2479 // Only emit a def when its requirements have been met. 2480 // FIXME: This loop could be made faster, but it's fast enough for now. 2481 bool MadeProgress = true; 2482 std::string InGuard; 2483 while (!Defs.empty() && MadeProgress) { 2484 MadeProgress = false; 2485 2486 for (SmallVector<Intrinsic *, 128>::iterator I = Defs.begin(); 2487 I != Defs.end(); /*No step*/) { 2488 bool DependenciesSatisfied = true; 2489 for (auto *II : (*I)->getDependencies()) { 2490 if (llvm::is_contained(Defs, II)) 2491 DependenciesSatisfied = false; 2492 } 2493 if (!DependenciesSatisfied) { 2494 // Try the next one. 2495 ++I; 2496 continue; 2497 } 2498 2499 // Emit #endif/#if pair if needed. 2500 if ((*I)->getGuard() != InGuard) { 2501 if (!InGuard.empty()) 2502 OS << "#endif\n"; 2503 InGuard = (*I)->getGuard(); 2504 if (!InGuard.empty()) 2505 OS << "#if " << InGuard << "\n"; 2506 } 2507 2508 // Actually generate the intrinsic code. 2509 OS << (*I)->generate(); 2510 2511 MadeProgress = true; 2512 I = Defs.erase(I); 2513 } 2514 } 2515 assert(Defs.empty() && "Some requirements were not satisfied!"); 2516 if (!InGuard.empty()) 2517 OS << "#endif\n"; 2518 2519 OS << "\n"; 2520 OS << "#undef __ai\n\n"; 2521 OS << "#endif /* __ARM_FP16_H */\n"; 2522 } 2523 2524 void NeonEmitter::runBF16(raw_ostream &OS) { 2525 OS << "/*===---- arm_bf16.h - ARM BF16 intrinsics " 2526 "-----------------------------------===\n" 2527 " *\n" 2528 " *\n" 2529 " * Part of the LLVM Project, under the Apache License v2.0 with LLVM " 2530 "Exceptions.\n" 2531 " * See https://llvm.org/LICENSE.txt for license information.\n" 2532 " * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception\n" 2533 " *\n" 2534 " *===-----------------------------------------------------------------" 2535 "------===\n" 2536 " */\n\n"; 2537 2538 OS << "#ifndef __ARM_BF16_H\n"; 2539 OS << "#define __ARM_BF16_H\n\n"; 2540 2541 OS << "typedef __bf16 bfloat16_t;\n"; 2542 2543 OS << "#define __ai static __inline__ __attribute__((__always_inline__, " 2544 "__nodebug__))\n\n"; 2545 2546 SmallVector<Intrinsic *, 128> Defs; 2547 std::vector<Record *> RV = Records.getAllDerivedDefinitions("Inst"); 2548 for (auto *R : RV) 2549 createIntrinsic(R, Defs); 2550 2551 for (auto *I : Defs) 2552 I->indexBody(); 2553 2554 llvm::stable_sort(Defs, llvm::deref<std::less<>>()); 2555 2556 // Only emit a def when its requirements have been met. 2557 // FIXME: This loop could be made faster, but it's fast enough for now. 2558 bool MadeProgress = true; 2559 std::string InGuard; 2560 while (!Defs.empty() && MadeProgress) { 2561 MadeProgress = false; 2562 2563 for (SmallVector<Intrinsic *, 128>::iterator I = Defs.begin(); 2564 I != Defs.end(); /*No step*/) { 2565 bool DependenciesSatisfied = true; 2566 for (auto *II : (*I)->getDependencies()) { 2567 if (llvm::is_contained(Defs, II)) 2568 DependenciesSatisfied = false; 2569 } 2570 if (!DependenciesSatisfied) { 2571 // Try the next one. 2572 ++I; 2573 continue; 2574 } 2575 2576 // Emit #endif/#if pair if needed. 2577 if ((*I)->getGuard() != InGuard) { 2578 if (!InGuard.empty()) 2579 OS << "#endif\n"; 2580 InGuard = (*I)->getGuard(); 2581 if (!InGuard.empty()) 2582 OS << "#if " << InGuard << "\n"; 2583 } 2584 2585 // Actually generate the intrinsic code. 2586 OS << (*I)->generate(); 2587 2588 MadeProgress = true; 2589 I = Defs.erase(I); 2590 } 2591 } 2592 assert(Defs.empty() && "Some requirements were not satisfied!"); 2593 if (!InGuard.empty()) 2594 OS << "#endif\n"; 2595 2596 OS << "\n"; 2597 OS << "#undef __ai\n\n"; 2598 2599 OS << "#endif\n"; 2600 } 2601 2602 void clang::EmitNeon(RecordKeeper &Records, raw_ostream &OS) { 2603 NeonEmitter(Records).run(OS); 2604 } 2605 2606 void clang::EmitFP16(RecordKeeper &Records, raw_ostream &OS) { 2607 NeonEmitter(Records).runFP16(OS); 2608 } 2609 2610 void clang::EmitBF16(RecordKeeper &Records, raw_ostream &OS) { 2611 NeonEmitter(Records).runBF16(OS); 2612 } 2613 2614 void clang::EmitNeonSema(RecordKeeper &Records, raw_ostream &OS) { 2615 NeonEmitter(Records).runHeader(OS); 2616 } 2617 2618 void clang::EmitNeonTest(RecordKeeper &Records, raw_ostream &OS) { 2619 llvm_unreachable("Neon test generation no longer implemented!"); 2620 } 2621