1 //===- NeonEmitter.cpp - Generate arm_neon.h for use with clang -*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This tablegen backend is responsible for emitting arm_neon.h, which includes 10 // a declaration and definition of each function specified by the ARM NEON 11 // compiler interface. See ARM document DUI0348B. 12 // 13 // Each NEON instruction is implemented in terms of 1 or more functions which 14 // are suffixed with the element type of the input vectors. Functions may be 15 // implemented in terms of generic vector operations such as +, *, -, etc. or 16 // by calling a __builtin_-prefixed function which will be handled by clang's 17 // CodeGen library. 18 // 19 // Additional validation code can be generated by this file when runHeader() is 20 // called, rather than the normal run() entry point. 21 // 22 // See also the documentation in include/clang/Basic/arm_neon.td. 23 // 24 //===----------------------------------------------------------------------===// 25 26 #include "TableGenBackends.h" 27 #include "llvm/ADT/ArrayRef.h" 28 #include "llvm/ADT/DenseMap.h" 29 #include "llvm/ADT/None.h" 30 #include "llvm/ADT/Optional.h" 31 #include "llvm/ADT/STLExtras.h" 32 #include "llvm/ADT/SmallVector.h" 33 #include "llvm/ADT/StringExtras.h" 34 #include "llvm/ADT/StringRef.h" 35 #include "llvm/Support/Casting.h" 36 #include "llvm/Support/ErrorHandling.h" 37 #include "llvm/Support/raw_ostream.h" 38 #include "llvm/TableGen/Error.h" 39 #include "llvm/TableGen/Record.h" 40 #include "llvm/TableGen/SetTheory.h" 41 #include <algorithm> 42 #include <cassert> 43 #include <cctype> 44 #include <cstddef> 45 #include <cstdint> 46 #include <deque> 47 #include <map> 48 #include <set> 49 #include <sstream> 50 #include <string> 51 #include <utility> 52 #include <vector> 53 54 using namespace llvm; 55 56 namespace { 57 58 // While globals are generally bad, this one allows us to perform assertions 59 // liberally and somehow still trace them back to the def they indirectly 60 // came from. 61 static Record *CurrentRecord = nullptr; 62 static void assert_with_loc(bool Assertion, const std::string &Str) { 63 if (!Assertion) { 64 if (CurrentRecord) 65 PrintFatalError(CurrentRecord->getLoc(), Str); 66 else 67 PrintFatalError(Str); 68 } 69 } 70 71 enum ClassKind { 72 ClassNone, 73 ClassI, // generic integer instruction, e.g., "i8" suffix 74 ClassS, // signed/unsigned/poly, e.g., "s8", "u8" or "p8" suffix 75 ClassW, // width-specific instruction, e.g., "8" suffix 76 ClassB, // bitcast arguments with enum argument to specify type 77 ClassL, // Logical instructions which are op instructions 78 // but we need to not emit any suffix for in our 79 // tests. 80 ClassNoTest // Instructions which we do not test since they are 81 // not TRUE instructions. 82 }; 83 84 /// NeonTypeFlags - Flags to identify the types for overloaded Neon 85 /// builtins. These must be kept in sync with the flags in 86 /// include/clang/Basic/TargetBuiltins.h. 87 namespace NeonTypeFlags { 88 89 enum { EltTypeMask = 0xf, UnsignedFlag = 0x10, QuadFlag = 0x20 }; 90 91 enum EltType { 92 Int8, 93 Int16, 94 Int32, 95 Int64, 96 Poly8, 97 Poly16, 98 Poly64, 99 Poly128, 100 Float16, 101 Float32, 102 Float64, 103 BFloat16 104 }; 105 106 } // end namespace NeonTypeFlags 107 108 class NeonEmitter; 109 110 //===----------------------------------------------------------------------===// 111 // TypeSpec 112 //===----------------------------------------------------------------------===// 113 114 /// A TypeSpec is just a simple wrapper around a string, but gets its own type 115 /// for strong typing purposes. 116 /// 117 /// A TypeSpec can be used to create a type. 118 class TypeSpec : public std::string { 119 public: 120 static std::vector<TypeSpec> fromTypeSpecs(StringRef Str) { 121 std::vector<TypeSpec> Ret; 122 TypeSpec Acc; 123 for (char I : Str.str()) { 124 if (islower(I)) { 125 Acc.push_back(I); 126 Ret.push_back(TypeSpec(Acc)); 127 Acc.clear(); 128 } else { 129 Acc.push_back(I); 130 } 131 } 132 return Ret; 133 } 134 }; 135 136 //===----------------------------------------------------------------------===// 137 // Type 138 //===----------------------------------------------------------------------===// 139 140 /// A Type. Not much more to say here. 141 class Type { 142 private: 143 TypeSpec TS; 144 145 enum TypeKind { 146 Void, 147 Float, 148 SInt, 149 UInt, 150 Poly, 151 BFloat16, 152 }; 153 TypeKind Kind; 154 bool Immediate, Constant, Pointer; 155 // ScalarForMangling and NoManglingQ are really not suited to live here as 156 // they are not related to the type. But they live in the TypeSpec (not the 157 // prototype), so this is really the only place to store them. 158 bool ScalarForMangling, NoManglingQ; 159 unsigned Bitwidth, ElementBitwidth, NumVectors; 160 161 public: 162 Type() 163 : Kind(Void), Immediate(false), Constant(false), 164 Pointer(false), ScalarForMangling(false), NoManglingQ(false), 165 Bitwidth(0), ElementBitwidth(0), NumVectors(0) {} 166 167 Type(TypeSpec TS, StringRef CharMods) 168 : TS(std::move(TS)), Kind(Void), Immediate(false), 169 Constant(false), Pointer(false), ScalarForMangling(false), 170 NoManglingQ(false), Bitwidth(0), ElementBitwidth(0), NumVectors(0) { 171 applyModifiers(CharMods); 172 } 173 174 /// Returns a type representing "void". 175 static Type getVoid() { return Type(); } 176 177 bool operator==(const Type &Other) const { return str() == Other.str(); } 178 bool operator!=(const Type &Other) const { return !operator==(Other); } 179 180 // 181 // Query functions 182 // 183 bool isScalarForMangling() const { return ScalarForMangling; } 184 bool noManglingQ() const { return NoManglingQ; } 185 186 bool isPointer() const { return Pointer; } 187 bool isValue() const { return !isVoid() && !isPointer(); } 188 bool isScalar() const { return isValue() && NumVectors == 0; } 189 bool isVector() const { return isValue() && NumVectors > 0; } 190 bool isConstPointer() const { return Constant; } 191 bool isFloating() const { return Kind == Float; } 192 bool isInteger() const { return Kind == SInt || Kind == UInt; } 193 bool isPoly() const { return Kind == Poly; } 194 bool isSigned() const { return Kind == SInt; } 195 bool isImmediate() const { return Immediate; } 196 bool isFloat() const { return isFloating() && ElementBitwidth == 32; } 197 bool isDouble() const { return isFloating() && ElementBitwidth == 64; } 198 bool isHalf() const { return isFloating() && ElementBitwidth == 16; } 199 bool isChar() const { return ElementBitwidth == 8; } 200 bool isShort() const { return isInteger() && ElementBitwidth == 16; } 201 bool isInt() const { return isInteger() && ElementBitwidth == 32; } 202 bool isLong() const { return isInteger() && ElementBitwidth == 64; } 203 bool isVoid() const { return Kind == Void; } 204 bool isBFloat16() const { return Kind == BFloat16; } 205 unsigned getNumElements() const { return Bitwidth / ElementBitwidth; } 206 unsigned getSizeInBits() const { return Bitwidth; } 207 unsigned getElementSizeInBits() const { return ElementBitwidth; } 208 unsigned getNumVectors() const { return NumVectors; } 209 210 // 211 // Mutator functions 212 // 213 void makeUnsigned() { 214 assert(!isVoid() && "not a potentially signed type"); 215 Kind = UInt; 216 } 217 void makeSigned() { 218 assert(!isVoid() && "not a potentially signed type"); 219 Kind = SInt; 220 } 221 222 void makeInteger(unsigned ElemWidth, bool Sign) { 223 assert(!isVoid() && "converting void to int probably not useful"); 224 Kind = Sign ? SInt : UInt; 225 Immediate = false; 226 ElementBitwidth = ElemWidth; 227 } 228 229 void makeImmediate(unsigned ElemWidth) { 230 Kind = SInt; 231 Immediate = true; 232 ElementBitwidth = ElemWidth; 233 } 234 235 void makeScalar() { 236 Bitwidth = ElementBitwidth; 237 NumVectors = 0; 238 } 239 240 void makeOneVector() { 241 assert(isVector()); 242 NumVectors = 1; 243 } 244 245 void make32BitElement() { 246 assert_with_loc(Bitwidth > 32, "Not enough bits to make it 32!"); 247 ElementBitwidth = 32; 248 } 249 250 void doubleLanes() { 251 assert_with_loc(Bitwidth != 128, "Can't get bigger than 128!"); 252 Bitwidth = 128; 253 } 254 255 void halveLanes() { 256 assert_with_loc(Bitwidth != 64, "Can't get smaller than 64!"); 257 Bitwidth = 64; 258 } 259 260 /// Return the C string representation of a type, which is the typename 261 /// defined in stdint.h or arm_neon.h. 262 std::string str() const; 263 264 /// Return the string representation of a type, which is an encoded 265 /// string for passing to the BUILTIN() macro in Builtins.def. 266 std::string builtin_str() const; 267 268 /// Return the value in NeonTypeFlags for this type. 269 unsigned getNeonEnum() const; 270 271 /// Parse a type from a stdint.h or arm_neon.h typedef name, 272 /// for example uint32x2_t or int64_t. 273 static Type fromTypedefName(StringRef Name); 274 275 private: 276 /// Creates the type based on the typespec string in TS. 277 /// Sets "Quad" to true if the "Q" or "H" modifiers were 278 /// seen. This is needed by applyModifier as some modifiers 279 /// only take effect if the type size was changed by "Q" or "H". 280 void applyTypespec(bool &Quad); 281 /// Applies prototype modifiers to the type. 282 void applyModifiers(StringRef Mods); 283 }; 284 285 //===----------------------------------------------------------------------===// 286 // Variable 287 //===----------------------------------------------------------------------===// 288 289 /// A variable is a simple class that just has a type and a name. 290 class Variable { 291 Type T; 292 std::string N; 293 294 public: 295 Variable() : T(Type::getVoid()) {} 296 Variable(Type T, std::string N) : T(std::move(T)), N(std::move(N)) {} 297 298 Type getType() const { return T; } 299 std::string getName() const { return "__" + N; } 300 }; 301 302 //===----------------------------------------------------------------------===// 303 // Intrinsic 304 //===----------------------------------------------------------------------===// 305 306 /// The main grunt class. This represents an instantiation of an intrinsic with 307 /// a particular typespec and prototype. 308 class Intrinsic { 309 /// The Record this intrinsic was created from. 310 Record *R; 311 /// The unmangled name. 312 std::string Name; 313 /// The input and output typespecs. InTS == OutTS except when 314 /// CartesianProductWith is non-empty - this is the case for vreinterpret. 315 TypeSpec OutTS, InTS; 316 /// The base class kind. Most intrinsics use ClassS, which has full type 317 /// info for integers (s32/u32). Some use ClassI, which doesn't care about 318 /// signedness (i32), while some (ClassB) have no type at all, only a width 319 /// (32). 320 ClassKind CK; 321 /// The list of DAGs for the body. May be empty, in which case we should 322 /// emit a builtin call. 323 ListInit *Body; 324 /// The architectural #ifdef guard. 325 std::string Guard; 326 /// Set if the Unavailable bit is 1. This means we don't generate a body, 327 /// just an "unavailable" attribute on a declaration. 328 bool IsUnavailable; 329 /// Is this intrinsic safe for big-endian? or does it need its arguments 330 /// reversing? 331 bool BigEndianSafe; 332 333 /// The types of return value [0] and parameters [1..]. 334 std::vector<Type> Types; 335 /// The index of the key type passed to CGBuiltin.cpp for polymorphic calls. 336 int PolymorphicKeyType; 337 /// The local variables defined. 338 std::map<std::string, Variable> Variables; 339 /// NeededEarly - set if any other intrinsic depends on this intrinsic. 340 bool NeededEarly; 341 /// UseMacro - set if we should implement using a macro or unset for a 342 /// function. 343 bool UseMacro; 344 /// The set of intrinsics that this intrinsic uses/requires. 345 std::set<Intrinsic *> Dependencies; 346 /// The "base type", which is Type('d', OutTS). InBaseType is only 347 /// different if CartesianProductWith is non-empty (for vreinterpret). 348 Type BaseType, InBaseType; 349 /// The return variable. 350 Variable RetVar; 351 /// A postfix to apply to every variable. Defaults to "". 352 std::string VariablePostfix; 353 354 NeonEmitter &Emitter; 355 std::stringstream OS; 356 357 bool isBigEndianSafe() const { 358 if (BigEndianSafe) 359 return true; 360 361 for (const auto &T : Types){ 362 if (T.isVector() && T.getNumElements() > 1) 363 return false; 364 } 365 return true; 366 } 367 368 public: 369 Intrinsic(Record *R, StringRef Name, StringRef Proto, TypeSpec OutTS, 370 TypeSpec InTS, ClassKind CK, ListInit *Body, NeonEmitter &Emitter, 371 StringRef Guard, bool IsUnavailable, bool BigEndianSafe) 372 : R(R), Name(Name.str()), OutTS(OutTS), InTS(InTS), CK(CK), Body(Body), 373 Guard(Guard.str()), IsUnavailable(IsUnavailable), 374 BigEndianSafe(BigEndianSafe), PolymorphicKeyType(0), NeededEarly(false), 375 UseMacro(false), BaseType(OutTS, "."), InBaseType(InTS, "."), 376 Emitter(Emitter) { 377 // Modify the TypeSpec per-argument to get a concrete Type, and create 378 // known variables for each. 379 // Types[0] is the return value. 380 unsigned Pos = 0; 381 Types.emplace_back(OutTS, getNextModifiers(Proto, Pos)); 382 StringRef Mods = getNextModifiers(Proto, Pos); 383 while (!Mods.empty()) { 384 Types.emplace_back(InTS, Mods); 385 if (Mods.contains('!')) 386 PolymorphicKeyType = Types.size() - 1; 387 388 Mods = getNextModifiers(Proto, Pos); 389 } 390 391 for (auto Type : Types) { 392 // If this builtin takes an immediate argument, we need to #define it rather 393 // than use a standard declaration, so that SemaChecking can range check 394 // the immediate passed by the user. 395 396 // Pointer arguments need to use macros to avoid hiding aligned attributes 397 // from the pointer type. 398 399 // It is not permitted to pass or return an __fp16 by value, so intrinsics 400 // taking a scalar float16_t must be implemented as macros. 401 if (Type.isImmediate() || Type.isPointer() || 402 (Type.isScalar() && Type.isHalf())) 403 UseMacro = true; 404 } 405 } 406 407 /// Get the Record that this intrinsic is based off. 408 Record *getRecord() const { return R; } 409 /// Get the set of Intrinsics that this intrinsic calls. 410 /// this is the set of immediate dependencies, NOT the 411 /// transitive closure. 412 const std::set<Intrinsic *> &getDependencies() const { return Dependencies; } 413 /// Get the architectural guard string (#ifdef). 414 std::string getGuard() const { return Guard; } 415 /// Get the non-mangled name. 416 std::string getName() const { return Name; } 417 418 /// Return true if the intrinsic takes an immediate operand. 419 bool hasImmediate() const { 420 return llvm::any_of(Types, [](const Type &T) { return T.isImmediate(); }); 421 } 422 423 /// Return the parameter index of the immediate operand. 424 unsigned getImmediateIdx() const { 425 for (unsigned Idx = 0; Idx < Types.size(); ++Idx) 426 if (Types[Idx].isImmediate()) 427 return Idx - 1; 428 llvm_unreachable("Intrinsic has no immediate"); 429 } 430 431 432 unsigned getNumParams() const { return Types.size() - 1; } 433 Type getReturnType() const { return Types[0]; } 434 Type getParamType(unsigned I) const { return Types[I + 1]; } 435 Type getBaseType() const { return BaseType; } 436 Type getPolymorphicKeyType() const { return Types[PolymorphicKeyType]; } 437 438 /// Return true if the prototype has a scalar argument. 439 bool protoHasScalar() const; 440 441 /// Return the index that parameter PIndex will sit at 442 /// in a generated function call. This is often just PIndex, 443 /// but may not be as things such as multiple-vector operands 444 /// and sret parameters need to be taken into accont. 445 unsigned getGeneratedParamIdx(unsigned PIndex) { 446 unsigned Idx = 0; 447 if (getReturnType().getNumVectors() > 1) 448 // Multiple vectors are passed as sret. 449 ++Idx; 450 451 for (unsigned I = 0; I < PIndex; ++I) 452 Idx += std::max(1U, getParamType(I).getNumVectors()); 453 454 return Idx; 455 } 456 457 bool hasBody() const { return Body && !Body->getValues().empty(); } 458 459 void setNeededEarly() { NeededEarly = true; } 460 461 bool operator<(const Intrinsic &Other) const { 462 // Sort lexicographically on a two-tuple (Guard, Name) 463 if (Guard != Other.Guard) 464 return Guard < Other.Guard; 465 return Name < Other.Name; 466 } 467 468 ClassKind getClassKind(bool UseClassBIfScalar = false) { 469 if (UseClassBIfScalar && !protoHasScalar()) 470 return ClassB; 471 return CK; 472 } 473 474 /// Return the name, mangled with type information. 475 /// If ForceClassS is true, use ClassS (u32/s32) instead 476 /// of the intrinsic's own type class. 477 std::string getMangledName(bool ForceClassS = false) const; 478 /// Return the type code for a builtin function call. 479 std::string getInstTypeCode(Type T, ClassKind CK) const; 480 /// Return the type string for a BUILTIN() macro in Builtins.def. 481 std::string getBuiltinTypeStr(); 482 483 /// Generate the intrinsic, returning code. 484 std::string generate(); 485 /// Perform type checking and populate the dependency graph, but 486 /// don't generate code yet. 487 void indexBody(); 488 489 private: 490 StringRef getNextModifiers(StringRef Proto, unsigned &Pos) const; 491 492 std::string mangleName(std::string Name, ClassKind CK) const; 493 494 void initVariables(); 495 std::string replaceParamsIn(std::string S); 496 497 void emitBodyAsBuiltinCall(); 498 499 void generateImpl(bool ReverseArguments, 500 StringRef NamePrefix, StringRef CallPrefix); 501 void emitReturn(); 502 void emitBody(StringRef CallPrefix); 503 void emitShadowedArgs(); 504 void emitArgumentReversal(); 505 void emitReturnReversal(); 506 void emitReverseVariable(Variable &Dest, Variable &Src); 507 void emitNewLine(); 508 void emitClosingBrace(); 509 void emitOpeningBrace(); 510 void emitPrototype(StringRef NamePrefix); 511 512 class DagEmitter { 513 Intrinsic &Intr; 514 StringRef CallPrefix; 515 516 public: 517 DagEmitter(Intrinsic &Intr, StringRef CallPrefix) : 518 Intr(Intr), CallPrefix(CallPrefix) { 519 } 520 std::pair<Type, std::string> emitDagArg(Init *Arg, std::string ArgName); 521 std::pair<Type, std::string> emitDagSaveTemp(DagInit *DI); 522 std::pair<Type, std::string> emitDagSplat(DagInit *DI); 523 std::pair<Type, std::string> emitDagDup(DagInit *DI); 524 std::pair<Type, std::string> emitDagDupTyped(DagInit *DI); 525 std::pair<Type, std::string> emitDagShuffle(DagInit *DI); 526 std::pair<Type, std::string> emitDagCast(DagInit *DI, bool IsBitCast); 527 std::pair<Type, std::string> emitDagCall(DagInit *DI, 528 bool MatchMangledName); 529 std::pair<Type, std::string> emitDagNameReplace(DagInit *DI); 530 std::pair<Type, std::string> emitDagLiteral(DagInit *DI); 531 std::pair<Type, std::string> emitDagOp(DagInit *DI); 532 std::pair<Type, std::string> emitDag(DagInit *DI); 533 }; 534 }; 535 536 //===----------------------------------------------------------------------===// 537 // NeonEmitter 538 //===----------------------------------------------------------------------===// 539 540 class NeonEmitter { 541 RecordKeeper &Records; 542 DenseMap<Record *, ClassKind> ClassMap; 543 std::map<std::string, std::deque<Intrinsic>> IntrinsicMap; 544 unsigned UniqueNumber; 545 546 void createIntrinsic(Record *R, SmallVectorImpl<Intrinsic *> &Out); 547 void genBuiltinsDef(raw_ostream &OS, SmallVectorImpl<Intrinsic *> &Defs); 548 void genOverloadTypeCheckCode(raw_ostream &OS, 549 SmallVectorImpl<Intrinsic *> &Defs); 550 void genIntrinsicRangeCheckCode(raw_ostream &OS, 551 SmallVectorImpl<Intrinsic *> &Defs); 552 553 public: 554 /// Called by Intrinsic - this attempts to get an intrinsic that takes 555 /// the given types as arguments. 556 Intrinsic &getIntrinsic(StringRef Name, ArrayRef<Type> Types, 557 Optional<std::string> MangledName); 558 559 /// Called by Intrinsic - returns a globally-unique number. 560 unsigned getUniqueNumber() { return UniqueNumber++; } 561 562 NeonEmitter(RecordKeeper &R) : Records(R), UniqueNumber(0) { 563 Record *SI = R.getClass("SInst"); 564 Record *II = R.getClass("IInst"); 565 Record *WI = R.getClass("WInst"); 566 Record *SOpI = R.getClass("SOpInst"); 567 Record *IOpI = R.getClass("IOpInst"); 568 Record *WOpI = R.getClass("WOpInst"); 569 Record *LOpI = R.getClass("LOpInst"); 570 Record *NoTestOpI = R.getClass("NoTestOpInst"); 571 572 ClassMap[SI] = ClassS; 573 ClassMap[II] = ClassI; 574 ClassMap[WI] = ClassW; 575 ClassMap[SOpI] = ClassS; 576 ClassMap[IOpI] = ClassI; 577 ClassMap[WOpI] = ClassW; 578 ClassMap[LOpI] = ClassL; 579 ClassMap[NoTestOpI] = ClassNoTest; 580 } 581 582 // Emit arm_neon.h.inc 583 void run(raw_ostream &o); 584 585 // Emit arm_fp16.h.inc 586 void runFP16(raw_ostream &o); 587 588 // Emit arm_bf16.h.inc 589 void runBF16(raw_ostream &o); 590 591 // Emit all the __builtin prototypes used in arm_neon.h, arm_fp16.h and 592 // arm_bf16.h 593 void runHeader(raw_ostream &o); 594 }; 595 596 } // end anonymous namespace 597 598 //===----------------------------------------------------------------------===// 599 // Type implementation 600 //===----------------------------------------------------------------------===// 601 602 std::string Type::str() const { 603 if (isVoid()) 604 return "void"; 605 std::string S; 606 607 if (isInteger() && !isSigned()) 608 S += "u"; 609 610 if (isPoly()) 611 S += "poly"; 612 else if (isFloating()) 613 S += "float"; 614 else if (isBFloat16()) 615 S += "bfloat"; 616 else 617 S += "int"; 618 619 S += utostr(ElementBitwidth); 620 if (isVector()) 621 S += "x" + utostr(getNumElements()); 622 if (NumVectors > 1) 623 S += "x" + utostr(NumVectors); 624 S += "_t"; 625 626 if (Constant) 627 S += " const"; 628 if (Pointer) 629 S += " *"; 630 631 return S; 632 } 633 634 std::string Type::builtin_str() const { 635 std::string S; 636 if (isVoid()) 637 return "v"; 638 639 if (isPointer()) { 640 // All pointers are void pointers. 641 S = "v"; 642 if (isConstPointer()) 643 S += "C"; 644 S += "*"; 645 return S; 646 } else if (isInteger()) 647 switch (ElementBitwidth) { 648 case 8: S += "c"; break; 649 case 16: S += "s"; break; 650 case 32: S += "i"; break; 651 case 64: S += "Wi"; break; 652 case 128: S += "LLLi"; break; 653 default: llvm_unreachable("Unhandled case!"); 654 } 655 else if (isBFloat16()) { 656 assert(ElementBitwidth == 16 && "BFloat16 can only be 16 bits"); 657 S += "y"; 658 } else 659 switch (ElementBitwidth) { 660 case 16: S += "h"; break; 661 case 32: S += "f"; break; 662 case 64: S += "d"; break; 663 default: llvm_unreachable("Unhandled case!"); 664 } 665 666 // FIXME: NECESSARY??????????????????????????????????????????????????????????????????????? 667 if (isChar() && !isPointer() && isSigned()) 668 // Make chars explicitly signed. 669 S = "S" + S; 670 else if (isInteger() && !isSigned()) 671 S = "U" + S; 672 673 // Constant indices are "int", but have the "constant expression" modifier. 674 if (isImmediate()) { 675 assert(isInteger() && isSigned()); 676 S = "I" + S; 677 } 678 679 if (isScalar()) 680 return S; 681 682 std::string Ret; 683 for (unsigned I = 0; I < NumVectors; ++I) 684 Ret += "V" + utostr(getNumElements()) + S; 685 686 return Ret; 687 } 688 689 unsigned Type::getNeonEnum() const { 690 unsigned Addend; 691 switch (ElementBitwidth) { 692 case 8: Addend = 0; break; 693 case 16: Addend = 1; break; 694 case 32: Addend = 2; break; 695 case 64: Addend = 3; break; 696 case 128: Addend = 4; break; 697 default: llvm_unreachable("Unhandled element bitwidth!"); 698 } 699 700 unsigned Base = (unsigned)NeonTypeFlags::Int8 + Addend; 701 if (isPoly()) { 702 // Adjustment needed because Poly32 doesn't exist. 703 if (Addend >= 2) 704 --Addend; 705 Base = (unsigned)NeonTypeFlags::Poly8 + Addend; 706 } 707 if (isFloating()) { 708 assert(Addend != 0 && "Float8 doesn't exist!"); 709 Base = (unsigned)NeonTypeFlags::Float16 + (Addend - 1); 710 } 711 712 if (isBFloat16()) { 713 assert(Addend == 1 && "BFloat16 is only 16 bit"); 714 Base = (unsigned)NeonTypeFlags::BFloat16; 715 } 716 717 if (Bitwidth == 128) 718 Base |= (unsigned)NeonTypeFlags::QuadFlag; 719 if (isInteger() && !isSigned()) 720 Base |= (unsigned)NeonTypeFlags::UnsignedFlag; 721 722 return Base; 723 } 724 725 Type Type::fromTypedefName(StringRef Name) { 726 Type T; 727 T.Kind = SInt; 728 729 if (Name.front() == 'u') { 730 T.Kind = UInt; 731 Name = Name.drop_front(); 732 } 733 734 if (Name.startswith("float")) { 735 T.Kind = Float; 736 Name = Name.drop_front(5); 737 } else if (Name.startswith("poly")) { 738 T.Kind = Poly; 739 Name = Name.drop_front(4); 740 } else if (Name.startswith("bfloat")) { 741 T.Kind = BFloat16; 742 Name = Name.drop_front(6); 743 } else { 744 assert(Name.startswith("int")); 745 Name = Name.drop_front(3); 746 } 747 748 unsigned I = 0; 749 for (I = 0; I < Name.size(); ++I) { 750 if (!isdigit(Name[I])) 751 break; 752 } 753 Name.substr(0, I).getAsInteger(10, T.ElementBitwidth); 754 Name = Name.drop_front(I); 755 756 T.Bitwidth = T.ElementBitwidth; 757 T.NumVectors = 1; 758 759 if (Name.front() == 'x') { 760 Name = Name.drop_front(); 761 unsigned I = 0; 762 for (I = 0; I < Name.size(); ++I) { 763 if (!isdigit(Name[I])) 764 break; 765 } 766 unsigned NumLanes; 767 Name.substr(0, I).getAsInteger(10, NumLanes); 768 Name = Name.drop_front(I); 769 T.Bitwidth = T.ElementBitwidth * NumLanes; 770 } else { 771 // Was scalar. 772 T.NumVectors = 0; 773 } 774 if (Name.front() == 'x') { 775 Name = Name.drop_front(); 776 unsigned I = 0; 777 for (I = 0; I < Name.size(); ++I) { 778 if (!isdigit(Name[I])) 779 break; 780 } 781 Name.substr(0, I).getAsInteger(10, T.NumVectors); 782 Name = Name.drop_front(I); 783 } 784 785 assert(Name.startswith("_t") && "Malformed typedef!"); 786 return T; 787 } 788 789 void Type::applyTypespec(bool &Quad) { 790 std::string S = TS; 791 ScalarForMangling = false; 792 Kind = SInt; 793 ElementBitwidth = ~0U; 794 NumVectors = 1; 795 796 for (char I : S) { 797 switch (I) { 798 case 'S': 799 ScalarForMangling = true; 800 break; 801 case 'H': 802 NoManglingQ = true; 803 Quad = true; 804 break; 805 case 'Q': 806 Quad = true; 807 break; 808 case 'P': 809 Kind = Poly; 810 break; 811 case 'U': 812 Kind = UInt; 813 break; 814 case 'c': 815 ElementBitwidth = 8; 816 break; 817 case 'h': 818 Kind = Float; 819 LLVM_FALLTHROUGH; 820 case 's': 821 ElementBitwidth = 16; 822 break; 823 case 'f': 824 Kind = Float; 825 LLVM_FALLTHROUGH; 826 case 'i': 827 ElementBitwidth = 32; 828 break; 829 case 'd': 830 Kind = Float; 831 LLVM_FALLTHROUGH; 832 case 'l': 833 ElementBitwidth = 64; 834 break; 835 case 'k': 836 ElementBitwidth = 128; 837 // Poly doesn't have a 128x1 type. 838 if (isPoly()) 839 NumVectors = 0; 840 break; 841 case 'b': 842 Kind = BFloat16; 843 ElementBitwidth = 16; 844 break; 845 default: 846 llvm_unreachable("Unhandled type code!"); 847 } 848 } 849 assert(ElementBitwidth != ~0U && "Bad element bitwidth!"); 850 851 Bitwidth = Quad ? 128 : 64; 852 } 853 854 void Type::applyModifiers(StringRef Mods) { 855 bool AppliedQuad = false; 856 applyTypespec(AppliedQuad); 857 858 for (char Mod : Mods) { 859 switch (Mod) { 860 case '.': 861 break; 862 case 'v': 863 Kind = Void; 864 break; 865 case 'S': 866 Kind = SInt; 867 break; 868 case 'U': 869 Kind = UInt; 870 break; 871 case 'B': 872 Kind = BFloat16; 873 ElementBitwidth = 16; 874 break; 875 case 'F': 876 Kind = Float; 877 break; 878 case 'P': 879 Kind = Poly; 880 break; 881 case '>': 882 assert(ElementBitwidth < 128); 883 ElementBitwidth *= 2; 884 break; 885 case '<': 886 assert(ElementBitwidth > 8); 887 ElementBitwidth /= 2; 888 break; 889 case '1': 890 NumVectors = 0; 891 break; 892 case '2': 893 NumVectors = 2; 894 break; 895 case '3': 896 NumVectors = 3; 897 break; 898 case '4': 899 NumVectors = 4; 900 break; 901 case '*': 902 Pointer = true; 903 break; 904 case 'c': 905 Constant = true; 906 break; 907 case 'Q': 908 Bitwidth = 128; 909 break; 910 case 'q': 911 Bitwidth = 64; 912 break; 913 case 'I': 914 Kind = SInt; 915 ElementBitwidth = Bitwidth = 32; 916 NumVectors = 0; 917 Immediate = true; 918 break; 919 case 'p': 920 if (isPoly()) 921 Kind = UInt; 922 break; 923 case '!': 924 // Key type, handled elsewhere. 925 break; 926 default: 927 llvm_unreachable("Unhandled character!"); 928 } 929 } 930 } 931 932 //===----------------------------------------------------------------------===// 933 // Intrinsic implementation 934 //===----------------------------------------------------------------------===// 935 936 StringRef Intrinsic::getNextModifiers(StringRef Proto, unsigned &Pos) const { 937 if (Proto.size() == Pos) 938 return StringRef(); 939 else if (Proto[Pos] != '(') 940 return Proto.substr(Pos++, 1); 941 942 size_t Start = Pos + 1; 943 size_t End = Proto.find(')', Start); 944 assert_with_loc(End != StringRef::npos, "unmatched modifier group paren"); 945 Pos = End + 1; 946 return Proto.slice(Start, End); 947 } 948 949 std::string Intrinsic::getInstTypeCode(Type T, ClassKind CK) const { 950 char typeCode = '\0'; 951 bool printNumber = true; 952 953 if (CK == ClassB) 954 return ""; 955 956 if (T.isBFloat16()) 957 return "bf16"; 958 959 if (T.isPoly()) 960 typeCode = 'p'; 961 else if (T.isInteger()) 962 typeCode = T.isSigned() ? 's' : 'u'; 963 else 964 typeCode = 'f'; 965 966 if (CK == ClassI) { 967 switch (typeCode) { 968 default: 969 break; 970 case 's': 971 case 'u': 972 case 'p': 973 typeCode = 'i'; 974 break; 975 } 976 } 977 if (CK == ClassB) { 978 typeCode = '\0'; 979 } 980 981 std::string S; 982 if (typeCode != '\0') 983 S.push_back(typeCode); 984 if (printNumber) 985 S += utostr(T.getElementSizeInBits()); 986 987 return S; 988 } 989 990 std::string Intrinsic::getBuiltinTypeStr() { 991 ClassKind LocalCK = getClassKind(true); 992 std::string S; 993 994 Type RetT = getReturnType(); 995 if ((LocalCK == ClassI || LocalCK == ClassW) && RetT.isScalar() && 996 !RetT.isFloating() && !RetT.isBFloat16()) 997 RetT.makeInteger(RetT.getElementSizeInBits(), false); 998 999 // Since the return value must be one type, return a vector type of the 1000 // appropriate width which we will bitcast. An exception is made for 1001 // returning structs of 2, 3, or 4 vectors which are returned in a sret-like 1002 // fashion, storing them to a pointer arg. 1003 if (RetT.getNumVectors() > 1) { 1004 S += "vv*"; // void result with void* first argument 1005 } else { 1006 if (RetT.isPoly()) 1007 RetT.makeInteger(RetT.getElementSizeInBits(), false); 1008 if (!RetT.isScalar() && RetT.isInteger() && !RetT.isSigned()) 1009 RetT.makeSigned(); 1010 1011 if (LocalCK == ClassB && RetT.isValue() && !RetT.isScalar()) 1012 // Cast to vector of 8-bit elements. 1013 RetT.makeInteger(8, true); 1014 1015 S += RetT.builtin_str(); 1016 } 1017 1018 for (unsigned I = 0; I < getNumParams(); ++I) { 1019 Type T = getParamType(I); 1020 if (T.isPoly()) 1021 T.makeInteger(T.getElementSizeInBits(), false); 1022 1023 if (LocalCK == ClassB && !T.isScalar()) 1024 T.makeInteger(8, true); 1025 // Halves always get converted to 8-bit elements. 1026 if (T.isHalf() && T.isVector() && !T.isScalarForMangling()) 1027 T.makeInteger(8, true); 1028 1029 if (LocalCK == ClassI && T.isInteger()) 1030 T.makeSigned(); 1031 1032 if (hasImmediate() && getImmediateIdx() == I) 1033 T.makeImmediate(32); 1034 1035 S += T.builtin_str(); 1036 } 1037 1038 // Extra constant integer to hold type class enum for this function, e.g. s8 1039 if (LocalCK == ClassB) 1040 S += "i"; 1041 1042 return S; 1043 } 1044 1045 std::string Intrinsic::getMangledName(bool ForceClassS) const { 1046 // Check if the prototype has a scalar operand with the type of the vector 1047 // elements. If not, bitcasting the args will take care of arg checking. 1048 // The actual signedness etc. will be taken care of with special enums. 1049 ClassKind LocalCK = CK; 1050 if (!protoHasScalar()) 1051 LocalCK = ClassB; 1052 1053 return mangleName(Name, ForceClassS ? ClassS : LocalCK); 1054 } 1055 1056 std::string Intrinsic::mangleName(std::string Name, ClassKind LocalCK) const { 1057 std::string typeCode = getInstTypeCode(BaseType, LocalCK); 1058 std::string S = Name; 1059 1060 if (Name == "vcvt_f16_f32" || Name == "vcvt_f32_f16" || 1061 Name == "vcvt_f32_f64" || Name == "vcvt_f64_f32" || 1062 Name == "vcvt_f32_bf16") 1063 return Name; 1064 1065 if (!typeCode.empty()) { 1066 // If the name ends with _xN (N = 2,3,4), insert the typeCode before _xN. 1067 if (Name.size() >= 3 && isdigit(Name.back()) && 1068 Name[Name.length() - 2] == 'x' && Name[Name.length() - 3] == '_') 1069 S.insert(S.length() - 3, "_" + typeCode); 1070 else 1071 S += "_" + typeCode; 1072 } 1073 1074 if (BaseType != InBaseType) { 1075 // A reinterpret - out the input base type at the end. 1076 S += "_" + getInstTypeCode(InBaseType, LocalCK); 1077 } 1078 1079 if (LocalCK == ClassB) 1080 S += "_v"; 1081 1082 // Insert a 'q' before the first '_' character so that it ends up before 1083 // _lane or _n on vector-scalar operations. 1084 if (BaseType.getSizeInBits() == 128 && !BaseType.noManglingQ()) { 1085 size_t Pos = S.find('_'); 1086 S.insert(Pos, "q"); 1087 } 1088 1089 char Suffix = '\0'; 1090 if (BaseType.isScalarForMangling()) { 1091 switch (BaseType.getElementSizeInBits()) { 1092 case 8: Suffix = 'b'; break; 1093 case 16: Suffix = 'h'; break; 1094 case 32: Suffix = 's'; break; 1095 case 64: Suffix = 'd'; break; 1096 default: llvm_unreachable("Bad suffix!"); 1097 } 1098 } 1099 if (Suffix != '\0') { 1100 size_t Pos = S.find('_'); 1101 S.insert(Pos, &Suffix, 1); 1102 } 1103 1104 return S; 1105 } 1106 1107 std::string Intrinsic::replaceParamsIn(std::string S) { 1108 while (S.find('$') != std::string::npos) { 1109 size_t Pos = S.find('$'); 1110 size_t End = Pos + 1; 1111 while (isalpha(S[End])) 1112 ++End; 1113 1114 std::string VarName = S.substr(Pos + 1, End - Pos - 1); 1115 assert_with_loc(Variables.find(VarName) != Variables.end(), 1116 "Variable not defined!"); 1117 S.replace(Pos, End - Pos, Variables.find(VarName)->second.getName()); 1118 } 1119 1120 return S; 1121 } 1122 1123 void Intrinsic::initVariables() { 1124 Variables.clear(); 1125 1126 // Modify the TypeSpec per-argument to get a concrete Type, and create 1127 // known variables for each. 1128 for (unsigned I = 1; I < Types.size(); ++I) { 1129 char NameC = '0' + (I - 1); 1130 std::string Name = "p"; 1131 Name.push_back(NameC); 1132 1133 Variables[Name] = Variable(Types[I], Name + VariablePostfix); 1134 } 1135 RetVar = Variable(Types[0], "ret" + VariablePostfix); 1136 } 1137 1138 void Intrinsic::emitPrototype(StringRef NamePrefix) { 1139 if (UseMacro) 1140 OS << "#define "; 1141 else 1142 OS << "__ai " << Types[0].str() << " "; 1143 1144 OS << NamePrefix.str() << mangleName(Name, ClassS) << "("; 1145 1146 for (unsigned I = 0; I < getNumParams(); ++I) { 1147 if (I != 0) 1148 OS << ", "; 1149 1150 char NameC = '0' + I; 1151 std::string Name = "p"; 1152 Name.push_back(NameC); 1153 assert(Variables.find(Name) != Variables.end()); 1154 Variable &V = Variables[Name]; 1155 1156 if (!UseMacro) 1157 OS << V.getType().str() << " "; 1158 OS << V.getName(); 1159 } 1160 1161 OS << ")"; 1162 } 1163 1164 void Intrinsic::emitOpeningBrace() { 1165 if (UseMacro) 1166 OS << " __extension__ ({"; 1167 else 1168 OS << " {"; 1169 emitNewLine(); 1170 } 1171 1172 void Intrinsic::emitClosingBrace() { 1173 if (UseMacro) 1174 OS << "})"; 1175 else 1176 OS << "}"; 1177 } 1178 1179 void Intrinsic::emitNewLine() { 1180 if (UseMacro) 1181 OS << " \\\n"; 1182 else 1183 OS << "\n"; 1184 } 1185 1186 void Intrinsic::emitReverseVariable(Variable &Dest, Variable &Src) { 1187 if (Dest.getType().getNumVectors() > 1) { 1188 emitNewLine(); 1189 1190 for (unsigned K = 0; K < Dest.getType().getNumVectors(); ++K) { 1191 OS << " " << Dest.getName() << ".val[" << K << "] = " 1192 << "__builtin_shufflevector(" 1193 << Src.getName() << ".val[" << K << "], " 1194 << Src.getName() << ".val[" << K << "]"; 1195 for (int J = Dest.getType().getNumElements() - 1; J >= 0; --J) 1196 OS << ", " << J; 1197 OS << ");"; 1198 emitNewLine(); 1199 } 1200 } else { 1201 OS << " " << Dest.getName() 1202 << " = __builtin_shufflevector(" << Src.getName() << ", " << Src.getName(); 1203 for (int J = Dest.getType().getNumElements() - 1; J >= 0; --J) 1204 OS << ", " << J; 1205 OS << ");"; 1206 emitNewLine(); 1207 } 1208 } 1209 1210 void Intrinsic::emitArgumentReversal() { 1211 if (isBigEndianSafe()) 1212 return; 1213 1214 // Reverse all vector arguments. 1215 for (unsigned I = 0; I < getNumParams(); ++I) { 1216 std::string Name = "p" + utostr(I); 1217 std::string NewName = "rev" + utostr(I); 1218 1219 Variable &V = Variables[Name]; 1220 Variable NewV(V.getType(), NewName + VariablePostfix); 1221 1222 if (!NewV.getType().isVector() || NewV.getType().getNumElements() == 1) 1223 continue; 1224 1225 OS << " " << NewV.getType().str() << " " << NewV.getName() << ";"; 1226 emitReverseVariable(NewV, V); 1227 V = NewV; 1228 } 1229 } 1230 1231 void Intrinsic::emitReturnReversal() { 1232 if (isBigEndianSafe()) 1233 return; 1234 if (!getReturnType().isVector() || getReturnType().isVoid() || 1235 getReturnType().getNumElements() == 1) 1236 return; 1237 emitReverseVariable(RetVar, RetVar); 1238 } 1239 1240 void Intrinsic::emitShadowedArgs() { 1241 // Macro arguments are not type-checked like inline function arguments, 1242 // so assign them to local temporaries to get the right type checking. 1243 if (!UseMacro) 1244 return; 1245 1246 for (unsigned I = 0; I < getNumParams(); ++I) { 1247 // Do not create a temporary for an immediate argument. 1248 // That would defeat the whole point of using a macro! 1249 if (getParamType(I).isImmediate()) 1250 continue; 1251 // Do not create a temporary for pointer arguments. The input 1252 // pointer may have an alignment hint. 1253 if (getParamType(I).isPointer()) 1254 continue; 1255 1256 std::string Name = "p" + utostr(I); 1257 1258 assert(Variables.find(Name) != Variables.end()); 1259 Variable &V = Variables[Name]; 1260 1261 std::string NewName = "s" + utostr(I); 1262 Variable V2(V.getType(), NewName + VariablePostfix); 1263 1264 OS << " " << V2.getType().str() << " " << V2.getName() << " = " 1265 << V.getName() << ";"; 1266 emitNewLine(); 1267 1268 V = V2; 1269 } 1270 } 1271 1272 bool Intrinsic::protoHasScalar() const { 1273 return llvm::any_of( 1274 Types, [](const Type &T) { return T.isScalar() && !T.isImmediate(); }); 1275 } 1276 1277 void Intrinsic::emitBodyAsBuiltinCall() { 1278 std::string S; 1279 1280 // If this builtin returns a struct 2, 3, or 4 vectors, pass it as an implicit 1281 // sret-like argument. 1282 bool SRet = getReturnType().getNumVectors() >= 2; 1283 1284 StringRef N = Name; 1285 ClassKind LocalCK = CK; 1286 if (!protoHasScalar()) 1287 LocalCK = ClassB; 1288 1289 if (!getReturnType().isVoid() && !SRet) 1290 S += "(" + RetVar.getType().str() + ") "; 1291 1292 S += "__builtin_neon_" + mangleName(std::string(N), LocalCK) + "("; 1293 1294 if (SRet) 1295 S += "&" + RetVar.getName() + ", "; 1296 1297 for (unsigned I = 0; I < getNumParams(); ++I) { 1298 Variable &V = Variables["p" + utostr(I)]; 1299 Type T = V.getType(); 1300 1301 // Handle multiple-vector values specially, emitting each subvector as an 1302 // argument to the builtin. 1303 if (T.getNumVectors() > 1) { 1304 // Check if an explicit cast is needed. 1305 std::string Cast; 1306 if (LocalCK == ClassB) { 1307 Type T2 = T; 1308 T2.makeOneVector(); 1309 T2.makeInteger(8, /*Sign=*/true); 1310 Cast = "(" + T2.str() + ")"; 1311 } 1312 1313 for (unsigned J = 0; J < T.getNumVectors(); ++J) 1314 S += Cast + V.getName() + ".val[" + utostr(J) + "], "; 1315 continue; 1316 } 1317 1318 std::string Arg = V.getName(); 1319 Type CastToType = T; 1320 1321 // Check if an explicit cast is needed. 1322 if (CastToType.isVector() && 1323 (LocalCK == ClassB || (T.isHalf() && !T.isScalarForMangling()))) { 1324 CastToType.makeInteger(8, true); 1325 Arg = "(" + CastToType.str() + ")" + Arg; 1326 } else if (CastToType.isVector() && LocalCK == ClassI) { 1327 if (CastToType.isInteger()) 1328 CastToType.makeSigned(); 1329 Arg = "(" + CastToType.str() + ")" + Arg; 1330 } 1331 1332 S += Arg + ", "; 1333 } 1334 1335 // Extra constant integer to hold type class enum for this function, e.g. s8 1336 if (getClassKind(true) == ClassB) { 1337 S += utostr(getPolymorphicKeyType().getNeonEnum()); 1338 } else { 1339 // Remove extraneous ", ". 1340 S.pop_back(); 1341 S.pop_back(); 1342 } 1343 S += ");"; 1344 1345 std::string RetExpr; 1346 if (!SRet && !RetVar.getType().isVoid()) 1347 RetExpr = RetVar.getName() + " = "; 1348 1349 OS << " " << RetExpr << S; 1350 emitNewLine(); 1351 } 1352 1353 void Intrinsic::emitBody(StringRef CallPrefix) { 1354 std::vector<std::string> Lines; 1355 1356 assert(RetVar.getType() == Types[0]); 1357 // Create a return variable, if we're not void. 1358 if (!RetVar.getType().isVoid()) { 1359 OS << " " << RetVar.getType().str() << " " << RetVar.getName() << ";"; 1360 emitNewLine(); 1361 } 1362 1363 if (!Body || Body->getValues().empty()) { 1364 // Nothing specific to output - must output a builtin. 1365 emitBodyAsBuiltinCall(); 1366 return; 1367 } 1368 1369 // We have a list of "things to output". The last should be returned. 1370 for (auto *I : Body->getValues()) { 1371 if (StringInit *SI = dyn_cast<StringInit>(I)) { 1372 Lines.push_back(replaceParamsIn(SI->getAsString())); 1373 } else if (DagInit *DI = dyn_cast<DagInit>(I)) { 1374 DagEmitter DE(*this, CallPrefix); 1375 Lines.push_back(DE.emitDag(DI).second + ";"); 1376 } 1377 } 1378 1379 assert(!Lines.empty() && "Empty def?"); 1380 if (!RetVar.getType().isVoid()) 1381 Lines.back().insert(0, RetVar.getName() + " = "); 1382 1383 for (auto &L : Lines) { 1384 OS << " " << L; 1385 emitNewLine(); 1386 } 1387 } 1388 1389 void Intrinsic::emitReturn() { 1390 if (RetVar.getType().isVoid()) 1391 return; 1392 if (UseMacro) 1393 OS << " " << RetVar.getName() << ";"; 1394 else 1395 OS << " return " << RetVar.getName() << ";"; 1396 emitNewLine(); 1397 } 1398 1399 std::pair<Type, std::string> Intrinsic::DagEmitter::emitDag(DagInit *DI) { 1400 // At this point we should only be seeing a def. 1401 DefInit *DefI = cast<DefInit>(DI->getOperator()); 1402 std::string Op = DefI->getAsString(); 1403 1404 if (Op == "cast" || Op == "bitcast") 1405 return emitDagCast(DI, Op == "bitcast"); 1406 if (Op == "shuffle") 1407 return emitDagShuffle(DI); 1408 if (Op == "dup") 1409 return emitDagDup(DI); 1410 if (Op == "dup_typed") 1411 return emitDagDupTyped(DI); 1412 if (Op == "splat") 1413 return emitDagSplat(DI); 1414 if (Op == "save_temp") 1415 return emitDagSaveTemp(DI); 1416 if (Op == "op") 1417 return emitDagOp(DI); 1418 if (Op == "call" || Op == "call_mangled") 1419 return emitDagCall(DI, Op == "call_mangled"); 1420 if (Op == "name_replace") 1421 return emitDagNameReplace(DI); 1422 if (Op == "literal") 1423 return emitDagLiteral(DI); 1424 assert_with_loc(false, "Unknown operation!"); 1425 return std::make_pair(Type::getVoid(), ""); 1426 } 1427 1428 std::pair<Type, std::string> Intrinsic::DagEmitter::emitDagOp(DagInit *DI) { 1429 std::string Op = cast<StringInit>(DI->getArg(0))->getAsUnquotedString(); 1430 if (DI->getNumArgs() == 2) { 1431 // Unary op. 1432 std::pair<Type, std::string> R = 1433 emitDagArg(DI->getArg(1), std::string(DI->getArgNameStr(1))); 1434 return std::make_pair(R.first, Op + R.second); 1435 } else { 1436 assert(DI->getNumArgs() == 3 && "Can only handle unary and binary ops!"); 1437 std::pair<Type, std::string> R1 = 1438 emitDagArg(DI->getArg(1), std::string(DI->getArgNameStr(1))); 1439 std::pair<Type, std::string> R2 = 1440 emitDagArg(DI->getArg(2), std::string(DI->getArgNameStr(2))); 1441 assert_with_loc(R1.first == R2.first, "Argument type mismatch!"); 1442 return std::make_pair(R1.first, R1.second + " " + Op + " " + R2.second); 1443 } 1444 } 1445 1446 std::pair<Type, std::string> 1447 Intrinsic::DagEmitter::emitDagCall(DagInit *DI, bool MatchMangledName) { 1448 std::vector<Type> Types; 1449 std::vector<std::string> Values; 1450 for (unsigned I = 0; I < DI->getNumArgs() - 1; ++I) { 1451 std::pair<Type, std::string> R = 1452 emitDagArg(DI->getArg(I + 1), std::string(DI->getArgNameStr(I + 1))); 1453 Types.push_back(R.first); 1454 Values.push_back(R.second); 1455 } 1456 1457 // Look up the called intrinsic. 1458 std::string N; 1459 if (StringInit *SI = dyn_cast<StringInit>(DI->getArg(0))) 1460 N = SI->getAsUnquotedString(); 1461 else 1462 N = emitDagArg(DI->getArg(0), "").second; 1463 Optional<std::string> MangledName; 1464 if (MatchMangledName) { 1465 if (Intr.getRecord()->getValueAsBit("isLaneQ")) 1466 N += "q"; 1467 MangledName = Intr.mangleName(N, ClassS); 1468 } 1469 Intrinsic &Callee = Intr.Emitter.getIntrinsic(N, Types, MangledName); 1470 1471 // Make sure the callee is known as an early def. 1472 Callee.setNeededEarly(); 1473 Intr.Dependencies.insert(&Callee); 1474 1475 // Now create the call itself. 1476 std::string S; 1477 if (!Callee.isBigEndianSafe()) 1478 S += CallPrefix.str(); 1479 S += Callee.getMangledName(true) + "("; 1480 for (unsigned I = 0; I < DI->getNumArgs() - 1; ++I) { 1481 if (I != 0) 1482 S += ", "; 1483 S += Values[I]; 1484 } 1485 S += ")"; 1486 1487 return std::make_pair(Callee.getReturnType(), S); 1488 } 1489 1490 std::pair<Type, std::string> Intrinsic::DagEmitter::emitDagCast(DagInit *DI, 1491 bool IsBitCast){ 1492 // (cast MOD* VAL) -> cast VAL to type given by MOD. 1493 std::pair<Type, std::string> R = 1494 emitDagArg(DI->getArg(DI->getNumArgs() - 1), 1495 std::string(DI->getArgNameStr(DI->getNumArgs() - 1))); 1496 Type castToType = R.first; 1497 for (unsigned ArgIdx = 0; ArgIdx < DI->getNumArgs() - 1; ++ArgIdx) { 1498 1499 // MOD can take several forms: 1500 // 1. $X - take the type of parameter / variable X. 1501 // 2. The value "R" - take the type of the return type. 1502 // 3. a type string 1503 // 4. The value "U" or "S" to switch the signedness. 1504 // 5. The value "H" or "D" to half or double the bitwidth. 1505 // 6. The value "8" to convert to 8-bit (signed) integer lanes. 1506 if (!DI->getArgNameStr(ArgIdx).empty()) { 1507 assert_with_loc(Intr.Variables.find(std::string( 1508 DI->getArgNameStr(ArgIdx))) != Intr.Variables.end(), 1509 "Variable not found"); 1510 castToType = 1511 Intr.Variables[std::string(DI->getArgNameStr(ArgIdx))].getType(); 1512 } else { 1513 StringInit *SI = dyn_cast<StringInit>(DI->getArg(ArgIdx)); 1514 assert_with_loc(SI, "Expected string type or $Name for cast type"); 1515 1516 if (SI->getAsUnquotedString() == "R") { 1517 castToType = Intr.getReturnType(); 1518 } else if (SI->getAsUnquotedString() == "U") { 1519 castToType.makeUnsigned(); 1520 } else if (SI->getAsUnquotedString() == "S") { 1521 castToType.makeSigned(); 1522 } else if (SI->getAsUnquotedString() == "H") { 1523 castToType.halveLanes(); 1524 } else if (SI->getAsUnquotedString() == "D") { 1525 castToType.doubleLanes(); 1526 } else if (SI->getAsUnquotedString() == "8") { 1527 castToType.makeInteger(8, true); 1528 } else if (SI->getAsUnquotedString() == "32") { 1529 castToType.make32BitElement(); 1530 } else { 1531 castToType = Type::fromTypedefName(SI->getAsUnquotedString()); 1532 assert_with_loc(!castToType.isVoid(), "Unknown typedef"); 1533 } 1534 } 1535 } 1536 1537 std::string S; 1538 if (IsBitCast) { 1539 // Emit a reinterpret cast. The second operand must be an lvalue, so create 1540 // a temporary. 1541 std::string N = "reint"; 1542 unsigned I = 0; 1543 while (Intr.Variables.find(N) != Intr.Variables.end()) 1544 N = "reint" + utostr(++I); 1545 Intr.Variables[N] = Variable(R.first, N + Intr.VariablePostfix); 1546 1547 Intr.OS << R.first.str() << " " << Intr.Variables[N].getName() << " = " 1548 << R.second << ";"; 1549 Intr.emitNewLine(); 1550 1551 S = "*(" + castToType.str() + " *) &" + Intr.Variables[N].getName() + ""; 1552 } else { 1553 // Emit a normal (static) cast. 1554 S = "(" + castToType.str() + ")(" + R.second + ")"; 1555 } 1556 1557 return std::make_pair(castToType, S); 1558 } 1559 1560 std::pair<Type, std::string> Intrinsic::DagEmitter::emitDagShuffle(DagInit *DI){ 1561 // See the documentation in arm_neon.td for a description of these operators. 1562 class LowHalf : public SetTheory::Operator { 1563 public: 1564 void apply(SetTheory &ST, DagInit *Expr, SetTheory::RecSet &Elts, 1565 ArrayRef<SMLoc> Loc) override { 1566 SetTheory::RecSet Elts2; 1567 ST.evaluate(Expr->arg_begin(), Expr->arg_end(), Elts2, Loc); 1568 Elts.insert(Elts2.begin(), Elts2.begin() + (Elts2.size() / 2)); 1569 } 1570 }; 1571 1572 class HighHalf : public SetTheory::Operator { 1573 public: 1574 void apply(SetTheory &ST, DagInit *Expr, SetTheory::RecSet &Elts, 1575 ArrayRef<SMLoc> Loc) override { 1576 SetTheory::RecSet Elts2; 1577 ST.evaluate(Expr->arg_begin(), Expr->arg_end(), Elts2, Loc); 1578 Elts.insert(Elts2.begin() + (Elts2.size() / 2), Elts2.end()); 1579 } 1580 }; 1581 1582 class Rev : public SetTheory::Operator { 1583 unsigned ElementSize; 1584 1585 public: 1586 Rev(unsigned ElementSize) : ElementSize(ElementSize) {} 1587 1588 void apply(SetTheory &ST, DagInit *Expr, SetTheory::RecSet &Elts, 1589 ArrayRef<SMLoc> Loc) override { 1590 SetTheory::RecSet Elts2; 1591 ST.evaluate(Expr->arg_begin() + 1, Expr->arg_end(), Elts2, Loc); 1592 1593 int64_t VectorSize = cast<IntInit>(Expr->getArg(0))->getValue(); 1594 VectorSize /= ElementSize; 1595 1596 std::vector<Record *> Revved; 1597 for (unsigned VI = 0; VI < Elts2.size(); VI += VectorSize) { 1598 for (int LI = VectorSize - 1; LI >= 0; --LI) { 1599 Revved.push_back(Elts2[VI + LI]); 1600 } 1601 } 1602 1603 Elts.insert(Revved.begin(), Revved.end()); 1604 } 1605 }; 1606 1607 class MaskExpander : public SetTheory::Expander { 1608 unsigned N; 1609 1610 public: 1611 MaskExpander(unsigned N) : N(N) {} 1612 1613 void expand(SetTheory &ST, Record *R, SetTheory::RecSet &Elts) override { 1614 unsigned Addend = 0; 1615 if (R->getName() == "mask0") 1616 Addend = 0; 1617 else if (R->getName() == "mask1") 1618 Addend = N; 1619 else 1620 return; 1621 for (unsigned I = 0; I < N; ++I) 1622 Elts.insert(R->getRecords().getDef("sv" + utostr(I + Addend))); 1623 } 1624 }; 1625 1626 // (shuffle arg1, arg2, sequence) 1627 std::pair<Type, std::string> Arg1 = 1628 emitDagArg(DI->getArg(0), std::string(DI->getArgNameStr(0))); 1629 std::pair<Type, std::string> Arg2 = 1630 emitDagArg(DI->getArg(1), std::string(DI->getArgNameStr(1))); 1631 assert_with_loc(Arg1.first == Arg2.first, 1632 "Different types in arguments to shuffle!"); 1633 1634 SetTheory ST; 1635 SetTheory::RecSet Elts; 1636 ST.addOperator("lowhalf", std::make_unique<LowHalf>()); 1637 ST.addOperator("highhalf", std::make_unique<HighHalf>()); 1638 ST.addOperator("rev", 1639 std::make_unique<Rev>(Arg1.first.getElementSizeInBits())); 1640 ST.addExpander("MaskExpand", 1641 std::make_unique<MaskExpander>(Arg1.first.getNumElements())); 1642 ST.evaluate(DI->getArg(2), Elts, None); 1643 1644 std::string S = "__builtin_shufflevector(" + Arg1.second + ", " + Arg2.second; 1645 for (auto &E : Elts) { 1646 StringRef Name = E->getName(); 1647 assert_with_loc(Name.startswith("sv"), 1648 "Incorrect element kind in shuffle mask!"); 1649 S += ", " + Name.drop_front(2).str(); 1650 } 1651 S += ")"; 1652 1653 // Recalculate the return type - the shuffle may have halved or doubled it. 1654 Type T(Arg1.first); 1655 if (Elts.size() > T.getNumElements()) { 1656 assert_with_loc( 1657 Elts.size() == T.getNumElements() * 2, 1658 "Can only double or half the number of elements in a shuffle!"); 1659 T.doubleLanes(); 1660 } else if (Elts.size() < T.getNumElements()) { 1661 assert_with_loc( 1662 Elts.size() == T.getNumElements() / 2, 1663 "Can only double or half the number of elements in a shuffle!"); 1664 T.halveLanes(); 1665 } 1666 1667 return std::make_pair(T, S); 1668 } 1669 1670 std::pair<Type, std::string> Intrinsic::DagEmitter::emitDagDup(DagInit *DI) { 1671 assert_with_loc(DI->getNumArgs() == 1, "dup() expects one argument"); 1672 std::pair<Type, std::string> A = 1673 emitDagArg(DI->getArg(0), std::string(DI->getArgNameStr(0))); 1674 assert_with_loc(A.first.isScalar(), "dup() expects a scalar argument"); 1675 1676 Type T = Intr.getBaseType(); 1677 assert_with_loc(T.isVector(), "dup() used but default type is scalar!"); 1678 std::string S = "(" + T.str() + ") {"; 1679 for (unsigned I = 0; I < T.getNumElements(); ++I) { 1680 if (I != 0) 1681 S += ", "; 1682 S += A.second; 1683 } 1684 S += "}"; 1685 1686 return std::make_pair(T, S); 1687 } 1688 1689 std::pair<Type, std::string> Intrinsic::DagEmitter::emitDagDupTyped(DagInit *DI) { 1690 assert_with_loc(DI->getNumArgs() == 2, "dup_typed() expects two arguments"); 1691 std::pair<Type, std::string> B = 1692 emitDagArg(DI->getArg(1), std::string(DI->getArgNameStr(1))); 1693 assert_with_loc(B.first.isScalar(), 1694 "dup_typed() requires a scalar as the second argument"); 1695 Type T; 1696 // If the type argument is a constant string, construct the type directly. 1697 if (StringInit *SI = dyn_cast<StringInit>(DI->getArg(0))) { 1698 T = Type::fromTypedefName(SI->getAsUnquotedString()); 1699 assert_with_loc(!T.isVoid(), "Unknown typedef"); 1700 } else 1701 T = emitDagArg(DI->getArg(0), std::string(DI->getArgNameStr(0))).first; 1702 1703 assert_with_loc(T.isVector(), "dup_typed() used but target type is scalar!"); 1704 std::string S = "(" + T.str() + ") {"; 1705 for (unsigned I = 0; I < T.getNumElements(); ++I) { 1706 if (I != 0) 1707 S += ", "; 1708 S += B.second; 1709 } 1710 S += "}"; 1711 1712 return std::make_pair(T, S); 1713 } 1714 1715 std::pair<Type, std::string> Intrinsic::DagEmitter::emitDagSplat(DagInit *DI) { 1716 assert_with_loc(DI->getNumArgs() == 2, "splat() expects two arguments"); 1717 std::pair<Type, std::string> A = 1718 emitDagArg(DI->getArg(0), std::string(DI->getArgNameStr(0))); 1719 std::pair<Type, std::string> B = 1720 emitDagArg(DI->getArg(1), std::string(DI->getArgNameStr(1))); 1721 1722 assert_with_loc(B.first.isScalar(), 1723 "splat() requires a scalar int as the second argument"); 1724 1725 std::string S = "__builtin_shufflevector(" + A.second + ", " + A.second; 1726 for (unsigned I = 0; I < Intr.getBaseType().getNumElements(); ++I) { 1727 S += ", " + B.second; 1728 } 1729 S += ")"; 1730 1731 return std::make_pair(Intr.getBaseType(), S); 1732 } 1733 1734 std::pair<Type, std::string> Intrinsic::DagEmitter::emitDagSaveTemp(DagInit *DI) { 1735 assert_with_loc(DI->getNumArgs() == 2, "save_temp() expects two arguments"); 1736 std::pair<Type, std::string> A = 1737 emitDagArg(DI->getArg(1), std::string(DI->getArgNameStr(1))); 1738 1739 assert_with_loc(!A.first.isVoid(), 1740 "Argument to save_temp() must have non-void type!"); 1741 1742 std::string N = std::string(DI->getArgNameStr(0)); 1743 assert_with_loc(!N.empty(), 1744 "save_temp() expects a name as the first argument"); 1745 1746 assert_with_loc(Intr.Variables.find(N) == Intr.Variables.end(), 1747 "Variable already defined!"); 1748 Intr.Variables[N] = Variable(A.first, N + Intr.VariablePostfix); 1749 1750 std::string S = 1751 A.first.str() + " " + Intr.Variables[N].getName() + " = " + A.second; 1752 1753 return std::make_pair(Type::getVoid(), S); 1754 } 1755 1756 std::pair<Type, std::string> 1757 Intrinsic::DagEmitter::emitDagNameReplace(DagInit *DI) { 1758 std::string S = Intr.Name; 1759 1760 assert_with_loc(DI->getNumArgs() == 2, "name_replace requires 2 arguments!"); 1761 std::string ToReplace = cast<StringInit>(DI->getArg(0))->getAsUnquotedString(); 1762 std::string ReplaceWith = cast<StringInit>(DI->getArg(1))->getAsUnquotedString(); 1763 1764 size_t Idx = S.find(ToReplace); 1765 1766 assert_with_loc(Idx != std::string::npos, "name should contain '" + ToReplace + "'!"); 1767 S.replace(Idx, ToReplace.size(), ReplaceWith); 1768 1769 return std::make_pair(Type::getVoid(), S); 1770 } 1771 1772 std::pair<Type, std::string> Intrinsic::DagEmitter::emitDagLiteral(DagInit *DI){ 1773 std::string Ty = cast<StringInit>(DI->getArg(0))->getAsUnquotedString(); 1774 std::string Value = cast<StringInit>(DI->getArg(1))->getAsUnquotedString(); 1775 return std::make_pair(Type::fromTypedefName(Ty), Value); 1776 } 1777 1778 std::pair<Type, std::string> 1779 Intrinsic::DagEmitter::emitDagArg(Init *Arg, std::string ArgName) { 1780 if (!ArgName.empty()) { 1781 assert_with_loc(!Arg->isComplete(), 1782 "Arguments must either be DAGs or names, not both!"); 1783 assert_with_loc(Intr.Variables.find(ArgName) != Intr.Variables.end(), 1784 "Variable not defined!"); 1785 Variable &V = Intr.Variables[ArgName]; 1786 return std::make_pair(V.getType(), V.getName()); 1787 } 1788 1789 assert(Arg && "Neither ArgName nor Arg?!"); 1790 DagInit *DI = dyn_cast<DagInit>(Arg); 1791 assert_with_loc(DI, "Arguments must either be DAGs or names!"); 1792 1793 return emitDag(DI); 1794 } 1795 1796 std::string Intrinsic::generate() { 1797 // Avoid duplicated code for big and little endian 1798 if (isBigEndianSafe()) { 1799 generateImpl(false, "", ""); 1800 return OS.str(); 1801 } 1802 // Little endian intrinsics are simple and don't require any argument 1803 // swapping. 1804 OS << "#ifdef __LITTLE_ENDIAN__\n"; 1805 1806 generateImpl(false, "", ""); 1807 1808 OS << "#else\n"; 1809 1810 // Big endian intrinsics are more complex. The user intended these 1811 // intrinsics to operate on a vector "as-if" loaded by (V)LDR, 1812 // but we load as-if (V)LD1. So we should swap all arguments and 1813 // swap the return value too. 1814 // 1815 // If we call sub-intrinsics, we should call a version that does 1816 // not re-swap the arguments! 1817 generateImpl(true, "", "__noswap_"); 1818 1819 // If we're needed early, create a non-swapping variant for 1820 // big-endian. 1821 if (NeededEarly) { 1822 generateImpl(false, "__noswap_", "__noswap_"); 1823 } 1824 OS << "#endif\n\n"; 1825 1826 return OS.str(); 1827 } 1828 1829 void Intrinsic::generateImpl(bool ReverseArguments, 1830 StringRef NamePrefix, StringRef CallPrefix) { 1831 CurrentRecord = R; 1832 1833 // If we call a macro, our local variables may be corrupted due to 1834 // lack of proper lexical scoping. So, add a globally unique postfix 1835 // to every variable. 1836 // 1837 // indexBody() should have set up the Dependencies set by now. 1838 for (auto *I : Dependencies) 1839 if (I->UseMacro) { 1840 VariablePostfix = "_" + utostr(Emitter.getUniqueNumber()); 1841 break; 1842 } 1843 1844 initVariables(); 1845 1846 emitPrototype(NamePrefix); 1847 1848 if (IsUnavailable) { 1849 OS << " __attribute__((unavailable));"; 1850 } else { 1851 emitOpeningBrace(); 1852 emitShadowedArgs(); 1853 if (ReverseArguments) 1854 emitArgumentReversal(); 1855 emitBody(CallPrefix); 1856 if (ReverseArguments) 1857 emitReturnReversal(); 1858 emitReturn(); 1859 emitClosingBrace(); 1860 } 1861 OS << "\n"; 1862 1863 CurrentRecord = nullptr; 1864 } 1865 1866 void Intrinsic::indexBody() { 1867 CurrentRecord = R; 1868 1869 initVariables(); 1870 emitBody(""); 1871 OS.str(""); 1872 1873 CurrentRecord = nullptr; 1874 } 1875 1876 //===----------------------------------------------------------------------===// 1877 // NeonEmitter implementation 1878 //===----------------------------------------------------------------------===// 1879 1880 Intrinsic &NeonEmitter::getIntrinsic(StringRef Name, ArrayRef<Type> Types, 1881 Optional<std::string> MangledName) { 1882 // First, look up the name in the intrinsic map. 1883 assert_with_loc(IntrinsicMap.find(Name.str()) != IntrinsicMap.end(), 1884 ("Intrinsic '" + Name + "' not found!").str()); 1885 auto &V = IntrinsicMap.find(Name.str())->second; 1886 std::vector<Intrinsic *> GoodVec; 1887 1888 // Create a string to print if we end up failing. 1889 std::string ErrMsg = "looking up intrinsic '" + Name.str() + "("; 1890 for (unsigned I = 0; I < Types.size(); ++I) { 1891 if (I != 0) 1892 ErrMsg += ", "; 1893 ErrMsg += Types[I].str(); 1894 } 1895 ErrMsg += ")'\n"; 1896 ErrMsg += "Available overloads:\n"; 1897 1898 // Now, look through each intrinsic implementation and see if the types are 1899 // compatible. 1900 for (auto &I : V) { 1901 ErrMsg += " - " + I.getReturnType().str() + " " + I.getMangledName(); 1902 ErrMsg += "("; 1903 for (unsigned A = 0; A < I.getNumParams(); ++A) { 1904 if (A != 0) 1905 ErrMsg += ", "; 1906 ErrMsg += I.getParamType(A).str(); 1907 } 1908 ErrMsg += ")\n"; 1909 1910 if (MangledName && MangledName != I.getMangledName(true)) 1911 continue; 1912 1913 if (I.getNumParams() != Types.size()) 1914 continue; 1915 1916 unsigned ArgNum = 0; 1917 bool MatchingArgumentTypes = llvm::all_of(Types, [&](const auto &Type) { 1918 return Type == I.getParamType(ArgNum++); 1919 }); 1920 1921 if (MatchingArgumentTypes) 1922 GoodVec.push_back(&I); 1923 } 1924 1925 assert_with_loc(!GoodVec.empty(), 1926 "No compatible intrinsic found - " + ErrMsg); 1927 assert_with_loc(GoodVec.size() == 1, "Multiple overloads found - " + ErrMsg); 1928 1929 return *GoodVec.front(); 1930 } 1931 1932 void NeonEmitter::createIntrinsic(Record *R, 1933 SmallVectorImpl<Intrinsic *> &Out) { 1934 std::string Name = std::string(R->getValueAsString("Name")); 1935 std::string Proto = std::string(R->getValueAsString("Prototype")); 1936 std::string Types = std::string(R->getValueAsString("Types")); 1937 Record *OperationRec = R->getValueAsDef("Operation"); 1938 bool BigEndianSafe = R->getValueAsBit("BigEndianSafe"); 1939 std::string Guard = std::string(R->getValueAsString("ArchGuard")); 1940 bool IsUnavailable = OperationRec->getValueAsBit("Unavailable"); 1941 std::string CartesianProductWith = std::string(R->getValueAsString("CartesianProductWith")); 1942 1943 // Set the global current record. This allows assert_with_loc to produce 1944 // decent location information even when highly nested. 1945 CurrentRecord = R; 1946 1947 ListInit *Body = OperationRec->getValueAsListInit("Ops"); 1948 1949 std::vector<TypeSpec> TypeSpecs = TypeSpec::fromTypeSpecs(Types); 1950 1951 ClassKind CK = ClassNone; 1952 if (R->getSuperClasses().size() >= 2) 1953 CK = ClassMap[R->getSuperClasses()[1].first]; 1954 1955 std::vector<std::pair<TypeSpec, TypeSpec>> NewTypeSpecs; 1956 if (!CartesianProductWith.empty()) { 1957 std::vector<TypeSpec> ProductTypeSpecs = TypeSpec::fromTypeSpecs(CartesianProductWith); 1958 for (auto TS : TypeSpecs) { 1959 Type DefaultT(TS, "."); 1960 for (auto SrcTS : ProductTypeSpecs) { 1961 Type DefaultSrcT(SrcTS, "."); 1962 if (TS == SrcTS || 1963 DefaultSrcT.getSizeInBits() != DefaultT.getSizeInBits()) 1964 continue; 1965 NewTypeSpecs.push_back(std::make_pair(TS, SrcTS)); 1966 } 1967 } 1968 } else { 1969 for (auto TS : TypeSpecs) { 1970 NewTypeSpecs.push_back(std::make_pair(TS, TS)); 1971 } 1972 } 1973 1974 llvm::sort(NewTypeSpecs); 1975 NewTypeSpecs.erase(std::unique(NewTypeSpecs.begin(), NewTypeSpecs.end()), 1976 NewTypeSpecs.end()); 1977 auto &Entry = IntrinsicMap[Name]; 1978 1979 for (auto &I : NewTypeSpecs) { 1980 Entry.emplace_back(R, Name, Proto, I.first, I.second, CK, Body, *this, 1981 Guard, IsUnavailable, BigEndianSafe); 1982 Out.push_back(&Entry.back()); 1983 } 1984 1985 CurrentRecord = nullptr; 1986 } 1987 1988 /// genBuiltinsDef: Generate the BuiltinsARM.def and BuiltinsAArch64.def 1989 /// declaration of builtins, checking for unique builtin declarations. 1990 void NeonEmitter::genBuiltinsDef(raw_ostream &OS, 1991 SmallVectorImpl<Intrinsic *> &Defs) { 1992 OS << "#ifdef GET_NEON_BUILTINS\n"; 1993 1994 // We only want to emit a builtin once, and we want to emit them in 1995 // alphabetical order, so use a std::set. 1996 std::set<std::string> Builtins; 1997 1998 for (auto *Def : Defs) { 1999 if (Def->hasBody()) 2000 continue; 2001 2002 std::string S = "BUILTIN(__builtin_neon_" + Def->getMangledName() + ", \""; 2003 2004 S += Def->getBuiltinTypeStr(); 2005 S += "\", \"n\")"; 2006 2007 Builtins.insert(S); 2008 } 2009 2010 for (auto &S : Builtins) 2011 OS << S << "\n"; 2012 OS << "#endif\n\n"; 2013 } 2014 2015 /// Generate the ARM and AArch64 overloaded type checking code for 2016 /// SemaChecking.cpp, checking for unique builtin declarations. 2017 void NeonEmitter::genOverloadTypeCheckCode(raw_ostream &OS, 2018 SmallVectorImpl<Intrinsic *> &Defs) { 2019 OS << "#ifdef GET_NEON_OVERLOAD_CHECK\n"; 2020 2021 // We record each overload check line before emitting because subsequent Inst 2022 // definitions may extend the number of permitted types (i.e. augment the 2023 // Mask). Use std::map to avoid sorting the table by hash number. 2024 struct OverloadInfo { 2025 uint64_t Mask; 2026 int PtrArgNum; 2027 bool HasConstPtr; 2028 OverloadInfo() : Mask(0ULL), PtrArgNum(0), HasConstPtr(false) {} 2029 }; 2030 std::map<std::string, OverloadInfo> OverloadMap; 2031 2032 for (auto *Def : Defs) { 2033 // If the def has a body (that is, it has Operation DAGs), it won't call 2034 // __builtin_neon_* so we don't need to generate a definition for it. 2035 if (Def->hasBody()) 2036 continue; 2037 // Functions which have a scalar argument cannot be overloaded, no need to 2038 // check them if we are emitting the type checking code. 2039 if (Def->protoHasScalar()) 2040 continue; 2041 2042 uint64_t Mask = 0ULL; 2043 Mask |= 1ULL << Def->getPolymorphicKeyType().getNeonEnum(); 2044 2045 // Check if the function has a pointer or const pointer argument. 2046 int PtrArgNum = -1; 2047 bool HasConstPtr = false; 2048 for (unsigned I = 0; I < Def->getNumParams(); ++I) { 2049 const auto &Type = Def->getParamType(I); 2050 if (Type.isPointer()) { 2051 PtrArgNum = I; 2052 HasConstPtr = Type.isConstPointer(); 2053 } 2054 } 2055 2056 // For sret builtins, adjust the pointer argument index. 2057 if (PtrArgNum >= 0 && Def->getReturnType().getNumVectors() > 1) 2058 PtrArgNum += 1; 2059 2060 std::string Name = Def->getName(); 2061 // Omit type checking for the pointer arguments of vld1_lane, vld1_dup, 2062 // and vst1_lane intrinsics. Using a pointer to the vector element 2063 // type with one of those operations causes codegen to select an aligned 2064 // load/store instruction. If you want an unaligned operation, 2065 // the pointer argument needs to have less alignment than element type, 2066 // so just accept any pointer type. 2067 if (Name == "vld1_lane" || Name == "vld1_dup" || Name == "vst1_lane") { 2068 PtrArgNum = -1; 2069 HasConstPtr = false; 2070 } 2071 2072 if (Mask) { 2073 std::string Name = Def->getMangledName(); 2074 OverloadMap.insert(std::make_pair(Name, OverloadInfo())); 2075 OverloadInfo &OI = OverloadMap[Name]; 2076 OI.Mask |= Mask; 2077 OI.PtrArgNum |= PtrArgNum; 2078 OI.HasConstPtr = HasConstPtr; 2079 } 2080 } 2081 2082 for (auto &I : OverloadMap) { 2083 OverloadInfo &OI = I.second; 2084 2085 OS << "case NEON::BI__builtin_neon_" << I.first << ": "; 2086 OS << "mask = 0x" << Twine::utohexstr(OI.Mask) << "ULL"; 2087 if (OI.PtrArgNum >= 0) 2088 OS << "; PtrArgNum = " << OI.PtrArgNum; 2089 if (OI.HasConstPtr) 2090 OS << "; HasConstPtr = true"; 2091 OS << "; break;\n"; 2092 } 2093 OS << "#endif\n\n"; 2094 } 2095 2096 void NeonEmitter::genIntrinsicRangeCheckCode(raw_ostream &OS, 2097 SmallVectorImpl<Intrinsic *> &Defs) { 2098 OS << "#ifdef GET_NEON_IMMEDIATE_CHECK\n"; 2099 2100 std::set<std::string> Emitted; 2101 2102 for (auto *Def : Defs) { 2103 if (Def->hasBody()) 2104 continue; 2105 // Functions which do not have an immediate do not need to have range 2106 // checking code emitted. 2107 if (!Def->hasImmediate()) 2108 continue; 2109 if (Emitted.find(Def->getMangledName()) != Emitted.end()) 2110 continue; 2111 2112 std::string LowerBound, UpperBound; 2113 2114 Record *R = Def->getRecord(); 2115 if (R->getValueAsBit("isVXAR")) { 2116 //VXAR takes an immediate in the range [0, 63] 2117 LowerBound = "0"; 2118 UpperBound = "63"; 2119 } else if (R->getValueAsBit("isVCVT_N")) { 2120 // VCVT between floating- and fixed-point values takes an immediate 2121 // in the range [1, 32) for f32 or [1, 64) for f64 or [1, 16) for f16. 2122 LowerBound = "1"; 2123 if (Def->getBaseType().getElementSizeInBits() == 16 || 2124 Def->getName().find('h') != std::string::npos) 2125 // VCVTh operating on FP16 intrinsics in range [1, 16) 2126 UpperBound = "15"; 2127 else if (Def->getBaseType().getElementSizeInBits() == 32) 2128 UpperBound = "31"; 2129 else 2130 UpperBound = "63"; 2131 } else if (R->getValueAsBit("isScalarShift")) { 2132 // Right shifts have an 'r' in the name, left shifts do not. Convert 2133 // instructions have the same bounds and right shifts. 2134 if (Def->getName().find('r') != std::string::npos || 2135 Def->getName().find("cvt") != std::string::npos) 2136 LowerBound = "1"; 2137 2138 UpperBound = utostr(Def->getReturnType().getElementSizeInBits() - 1); 2139 } else if (R->getValueAsBit("isShift")) { 2140 // Builtins which are overloaded by type will need to have their upper 2141 // bound computed at Sema time based on the type constant. 2142 2143 // Right shifts have an 'r' in the name, left shifts do not. 2144 if (Def->getName().find('r') != std::string::npos) 2145 LowerBound = "1"; 2146 UpperBound = "RFT(TV, true)"; 2147 } else if (Def->getClassKind(true) == ClassB) { 2148 // ClassB intrinsics have a type (and hence lane number) that is only 2149 // known at runtime. 2150 if (R->getValueAsBit("isLaneQ")) 2151 UpperBound = "RFT(TV, false, true)"; 2152 else 2153 UpperBound = "RFT(TV, false, false)"; 2154 } else { 2155 // The immediate generally refers to a lane in the preceding argument. 2156 assert(Def->getImmediateIdx() > 0); 2157 Type T = Def->getParamType(Def->getImmediateIdx() - 1); 2158 UpperBound = utostr(T.getNumElements() - 1); 2159 } 2160 2161 // Calculate the index of the immediate that should be range checked. 2162 unsigned Idx = Def->getNumParams(); 2163 if (Def->hasImmediate()) 2164 Idx = Def->getGeneratedParamIdx(Def->getImmediateIdx()); 2165 2166 OS << "case NEON::BI__builtin_neon_" << Def->getMangledName() << ": " 2167 << "i = " << Idx << ";"; 2168 if (!LowerBound.empty()) 2169 OS << " l = " << LowerBound << ";"; 2170 if (!UpperBound.empty()) 2171 OS << " u = " << UpperBound << ";"; 2172 OS << " break;\n"; 2173 2174 Emitted.insert(Def->getMangledName()); 2175 } 2176 2177 OS << "#endif\n\n"; 2178 } 2179 2180 /// runHeader - Emit a file with sections defining: 2181 /// 1. the NEON section of BuiltinsARM.def and BuiltinsAArch64.def. 2182 /// 2. the SemaChecking code for the type overload checking. 2183 /// 3. the SemaChecking code for validation of intrinsic immediate arguments. 2184 void NeonEmitter::runHeader(raw_ostream &OS) { 2185 std::vector<Record *> RV = Records.getAllDerivedDefinitions("Inst"); 2186 2187 SmallVector<Intrinsic *, 128> Defs; 2188 for (auto *R : RV) 2189 createIntrinsic(R, Defs); 2190 2191 // Generate shared BuiltinsXXX.def 2192 genBuiltinsDef(OS, Defs); 2193 2194 // Generate ARM overloaded type checking code for SemaChecking.cpp 2195 genOverloadTypeCheckCode(OS, Defs); 2196 2197 // Generate ARM range checking code for shift/lane immediates. 2198 genIntrinsicRangeCheckCode(OS, Defs); 2199 } 2200 2201 static void emitNeonTypeDefs(const std::string& types, raw_ostream &OS) { 2202 std::string TypedefTypes(types); 2203 std::vector<TypeSpec> TDTypeVec = TypeSpec::fromTypeSpecs(TypedefTypes); 2204 2205 // Emit vector typedefs. 2206 bool InIfdef = false; 2207 for (auto &TS : TDTypeVec) { 2208 bool IsA64 = false; 2209 Type T(TS, "."); 2210 if (T.isDouble()) 2211 IsA64 = true; 2212 2213 if (InIfdef && !IsA64) { 2214 OS << "#endif\n"; 2215 InIfdef = false; 2216 } 2217 if (!InIfdef && IsA64) { 2218 OS << "#ifdef __aarch64__\n"; 2219 InIfdef = true; 2220 } 2221 2222 if (T.isPoly()) 2223 OS << "typedef __attribute__((neon_polyvector_type("; 2224 else 2225 OS << "typedef __attribute__((neon_vector_type("; 2226 2227 Type T2 = T; 2228 T2.makeScalar(); 2229 OS << T.getNumElements() << "))) "; 2230 OS << T2.str(); 2231 OS << " " << T.str() << ";\n"; 2232 } 2233 if (InIfdef) 2234 OS << "#endif\n"; 2235 OS << "\n"; 2236 2237 // Emit struct typedefs. 2238 InIfdef = false; 2239 for (unsigned NumMembers = 2; NumMembers <= 4; ++NumMembers) { 2240 for (auto &TS : TDTypeVec) { 2241 bool IsA64 = false; 2242 Type T(TS, "."); 2243 if (T.isDouble()) 2244 IsA64 = true; 2245 2246 if (InIfdef && !IsA64) { 2247 OS << "#endif\n"; 2248 InIfdef = false; 2249 } 2250 if (!InIfdef && IsA64) { 2251 OS << "#ifdef __aarch64__\n"; 2252 InIfdef = true; 2253 } 2254 2255 const char Mods[] = { static_cast<char>('2' + (NumMembers - 2)), 0}; 2256 Type VT(TS, Mods); 2257 OS << "typedef struct " << VT.str() << " {\n"; 2258 OS << " " << T.str() << " val"; 2259 OS << "[" << NumMembers << "]"; 2260 OS << ";\n} "; 2261 OS << VT.str() << ";\n"; 2262 OS << "\n"; 2263 } 2264 } 2265 if (InIfdef) 2266 OS << "#endif\n"; 2267 } 2268 2269 /// run - Read the records in arm_neon.td and output arm_neon.h. arm_neon.h 2270 /// is comprised of type definitions and function declarations. 2271 void NeonEmitter::run(raw_ostream &OS) { 2272 OS << "/*===---- arm_neon.h - ARM Neon intrinsics " 2273 "------------------------------" 2274 "---===\n" 2275 " *\n" 2276 " * Permission is hereby granted, free of charge, to any person " 2277 "obtaining " 2278 "a copy\n" 2279 " * of this software and associated documentation files (the " 2280 "\"Software\")," 2281 " to deal\n" 2282 " * in the Software without restriction, including without limitation " 2283 "the " 2284 "rights\n" 2285 " * to use, copy, modify, merge, publish, distribute, sublicense, " 2286 "and/or sell\n" 2287 " * copies of the Software, and to permit persons to whom the Software " 2288 "is\n" 2289 " * furnished to do so, subject to the following conditions:\n" 2290 " *\n" 2291 " * The above copyright notice and this permission notice shall be " 2292 "included in\n" 2293 " * all copies or substantial portions of the Software.\n" 2294 " *\n" 2295 " * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, " 2296 "EXPRESS OR\n" 2297 " * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF " 2298 "MERCHANTABILITY,\n" 2299 " * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT " 2300 "SHALL THE\n" 2301 " * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR " 2302 "OTHER\n" 2303 " * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, " 2304 "ARISING FROM,\n" 2305 " * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER " 2306 "DEALINGS IN\n" 2307 " * THE SOFTWARE.\n" 2308 " *\n" 2309 " *===-----------------------------------------------------------------" 2310 "---" 2311 "---===\n" 2312 " */\n\n"; 2313 2314 OS << "#ifndef __ARM_NEON_H\n"; 2315 OS << "#define __ARM_NEON_H\n\n"; 2316 2317 OS << "#ifndef __ARM_FP\n"; 2318 OS << "#error \"NEON intrinsics not available with the soft-float ABI. " 2319 "Please use -mfloat-abi=softfp or -mfloat-abi=hard\"\n"; 2320 OS << "#else\n\n"; 2321 2322 OS << "#if !defined(__ARM_NEON)\n"; 2323 OS << "#error \"NEON support not enabled\"\n"; 2324 OS << "#else\n\n"; 2325 2326 OS << "#include <stdint.h>\n\n"; 2327 2328 OS << "#ifdef __ARM_FEATURE_BF16\n"; 2329 OS << "#include <arm_bf16.h>\n"; 2330 OS << "typedef __bf16 bfloat16_t;\n"; 2331 OS << "#endif\n\n"; 2332 2333 // Emit NEON-specific scalar typedefs. 2334 OS << "typedef float float32_t;\n"; 2335 OS << "typedef __fp16 float16_t;\n"; 2336 2337 OS << "#ifdef __aarch64__\n"; 2338 OS << "typedef double float64_t;\n"; 2339 OS << "#endif\n\n"; 2340 2341 // For now, signedness of polynomial types depends on target 2342 OS << "#ifdef __aarch64__\n"; 2343 OS << "typedef uint8_t poly8_t;\n"; 2344 OS << "typedef uint16_t poly16_t;\n"; 2345 OS << "typedef uint64_t poly64_t;\n"; 2346 OS << "typedef __uint128_t poly128_t;\n"; 2347 OS << "#else\n"; 2348 OS << "typedef int8_t poly8_t;\n"; 2349 OS << "typedef int16_t poly16_t;\n"; 2350 OS << "typedef int64_t poly64_t;\n"; 2351 OS << "#endif\n"; 2352 2353 emitNeonTypeDefs("cQcsQsiQilQlUcQUcUsQUsUiQUiUlQUlhQhfQfdQdPcQPcPsQPsPlQPl", OS); 2354 2355 OS << "#ifdef __ARM_FEATURE_BF16\n"; 2356 emitNeonTypeDefs("bQb", OS); 2357 OS << "#endif\n\n"; 2358 2359 OS << "#define __ai static __inline__ __attribute__((__always_inline__, " 2360 "__nodebug__))\n\n"; 2361 2362 SmallVector<Intrinsic *, 128> Defs; 2363 std::vector<Record *> RV = Records.getAllDerivedDefinitions("Inst"); 2364 for (auto *R : RV) 2365 createIntrinsic(R, Defs); 2366 2367 for (auto *I : Defs) 2368 I->indexBody(); 2369 2370 llvm::stable_sort(Defs, llvm::deref<std::less<>>()); 2371 2372 // Only emit a def when its requirements have been met. 2373 // FIXME: This loop could be made faster, but it's fast enough for now. 2374 bool MadeProgress = true; 2375 std::string InGuard; 2376 while (!Defs.empty() && MadeProgress) { 2377 MadeProgress = false; 2378 2379 for (SmallVector<Intrinsic *, 128>::iterator I = Defs.begin(); 2380 I != Defs.end(); /*No step*/) { 2381 bool DependenciesSatisfied = true; 2382 for (auto *II : (*I)->getDependencies()) { 2383 if (llvm::is_contained(Defs, II)) 2384 DependenciesSatisfied = false; 2385 } 2386 if (!DependenciesSatisfied) { 2387 // Try the next one. 2388 ++I; 2389 continue; 2390 } 2391 2392 // Emit #endif/#if pair if needed. 2393 if ((*I)->getGuard() != InGuard) { 2394 if (!InGuard.empty()) 2395 OS << "#endif\n"; 2396 InGuard = (*I)->getGuard(); 2397 if (!InGuard.empty()) 2398 OS << "#if " << InGuard << "\n"; 2399 } 2400 2401 // Actually generate the intrinsic code. 2402 OS << (*I)->generate(); 2403 2404 MadeProgress = true; 2405 I = Defs.erase(I); 2406 } 2407 } 2408 assert(Defs.empty() && "Some requirements were not satisfied!"); 2409 if (!InGuard.empty()) 2410 OS << "#endif\n"; 2411 2412 OS << "\n"; 2413 OS << "#undef __ai\n\n"; 2414 OS << "#endif /* if !defined(__ARM_NEON) */\n"; 2415 OS << "#endif /* ifndef __ARM_FP */\n"; 2416 OS << "#endif /* __ARM_NEON_H */\n"; 2417 } 2418 2419 /// run - Read the records in arm_fp16.td and output arm_fp16.h. arm_fp16.h 2420 /// is comprised of type definitions and function declarations. 2421 void NeonEmitter::runFP16(raw_ostream &OS) { 2422 OS << "/*===---- arm_fp16.h - ARM FP16 intrinsics " 2423 "------------------------------" 2424 "---===\n" 2425 " *\n" 2426 " * Permission is hereby granted, free of charge, to any person " 2427 "obtaining a copy\n" 2428 " * of this software and associated documentation files (the " 2429 "\"Software\"), to deal\n" 2430 " * in the Software without restriction, including without limitation " 2431 "the rights\n" 2432 " * to use, copy, modify, merge, publish, distribute, sublicense, " 2433 "and/or sell\n" 2434 " * copies of the Software, and to permit persons to whom the Software " 2435 "is\n" 2436 " * furnished to do so, subject to the following conditions:\n" 2437 " *\n" 2438 " * The above copyright notice and this permission notice shall be " 2439 "included in\n" 2440 " * all copies or substantial portions of the Software.\n" 2441 " *\n" 2442 " * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, " 2443 "EXPRESS OR\n" 2444 " * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF " 2445 "MERCHANTABILITY,\n" 2446 " * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT " 2447 "SHALL THE\n" 2448 " * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR " 2449 "OTHER\n" 2450 " * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, " 2451 "ARISING FROM,\n" 2452 " * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER " 2453 "DEALINGS IN\n" 2454 " * THE SOFTWARE.\n" 2455 " *\n" 2456 " *===-----------------------------------------------------------------" 2457 "---" 2458 "---===\n" 2459 " */\n\n"; 2460 2461 OS << "#ifndef __ARM_FP16_H\n"; 2462 OS << "#define __ARM_FP16_H\n\n"; 2463 2464 OS << "#include <stdint.h>\n\n"; 2465 2466 OS << "typedef __fp16 float16_t;\n"; 2467 2468 OS << "#define __ai static __inline__ __attribute__((__always_inline__, " 2469 "__nodebug__))\n\n"; 2470 2471 SmallVector<Intrinsic *, 128> Defs; 2472 std::vector<Record *> RV = Records.getAllDerivedDefinitions("Inst"); 2473 for (auto *R : RV) 2474 createIntrinsic(R, Defs); 2475 2476 for (auto *I : Defs) 2477 I->indexBody(); 2478 2479 llvm::stable_sort(Defs, llvm::deref<std::less<>>()); 2480 2481 // Only emit a def when its requirements have been met. 2482 // FIXME: This loop could be made faster, but it's fast enough for now. 2483 bool MadeProgress = true; 2484 std::string InGuard; 2485 while (!Defs.empty() && MadeProgress) { 2486 MadeProgress = false; 2487 2488 for (SmallVector<Intrinsic *, 128>::iterator I = Defs.begin(); 2489 I != Defs.end(); /*No step*/) { 2490 bool DependenciesSatisfied = true; 2491 for (auto *II : (*I)->getDependencies()) { 2492 if (llvm::is_contained(Defs, II)) 2493 DependenciesSatisfied = false; 2494 } 2495 if (!DependenciesSatisfied) { 2496 // Try the next one. 2497 ++I; 2498 continue; 2499 } 2500 2501 // Emit #endif/#if pair if needed. 2502 if ((*I)->getGuard() != InGuard) { 2503 if (!InGuard.empty()) 2504 OS << "#endif\n"; 2505 InGuard = (*I)->getGuard(); 2506 if (!InGuard.empty()) 2507 OS << "#if " << InGuard << "\n"; 2508 } 2509 2510 // Actually generate the intrinsic code. 2511 OS << (*I)->generate(); 2512 2513 MadeProgress = true; 2514 I = Defs.erase(I); 2515 } 2516 } 2517 assert(Defs.empty() && "Some requirements were not satisfied!"); 2518 if (!InGuard.empty()) 2519 OS << "#endif\n"; 2520 2521 OS << "\n"; 2522 OS << "#undef __ai\n\n"; 2523 OS << "#endif /* __ARM_FP16_H */\n"; 2524 } 2525 2526 void NeonEmitter::runBF16(raw_ostream &OS) { 2527 OS << "/*===---- arm_bf16.h - ARM BF16 intrinsics " 2528 "-----------------------------------===\n" 2529 " *\n" 2530 " *\n" 2531 " * Part of the LLVM Project, under the Apache License v2.0 with LLVM " 2532 "Exceptions.\n" 2533 " * See https://llvm.org/LICENSE.txt for license information.\n" 2534 " * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception\n" 2535 " *\n" 2536 " *===-----------------------------------------------------------------" 2537 "------===\n" 2538 " */\n\n"; 2539 2540 OS << "#ifndef __ARM_BF16_H\n"; 2541 OS << "#define __ARM_BF16_H\n\n"; 2542 2543 OS << "typedef __bf16 bfloat16_t;\n"; 2544 2545 OS << "#define __ai static __inline__ __attribute__((__always_inline__, " 2546 "__nodebug__))\n\n"; 2547 2548 SmallVector<Intrinsic *, 128> Defs; 2549 std::vector<Record *> RV = Records.getAllDerivedDefinitions("Inst"); 2550 for (auto *R : RV) 2551 createIntrinsic(R, Defs); 2552 2553 for (auto *I : Defs) 2554 I->indexBody(); 2555 2556 llvm::stable_sort(Defs, llvm::deref<std::less<>>()); 2557 2558 // Only emit a def when its requirements have been met. 2559 // FIXME: This loop could be made faster, but it's fast enough for now. 2560 bool MadeProgress = true; 2561 std::string InGuard; 2562 while (!Defs.empty() && MadeProgress) { 2563 MadeProgress = false; 2564 2565 for (SmallVector<Intrinsic *, 128>::iterator I = Defs.begin(); 2566 I != Defs.end(); /*No step*/) { 2567 bool DependenciesSatisfied = true; 2568 for (auto *II : (*I)->getDependencies()) { 2569 if (llvm::is_contained(Defs, II)) 2570 DependenciesSatisfied = false; 2571 } 2572 if (!DependenciesSatisfied) { 2573 // Try the next one. 2574 ++I; 2575 continue; 2576 } 2577 2578 // Emit #endif/#if pair if needed. 2579 if ((*I)->getGuard() != InGuard) { 2580 if (!InGuard.empty()) 2581 OS << "#endif\n"; 2582 InGuard = (*I)->getGuard(); 2583 if (!InGuard.empty()) 2584 OS << "#if " << InGuard << "\n"; 2585 } 2586 2587 // Actually generate the intrinsic code. 2588 OS << (*I)->generate(); 2589 2590 MadeProgress = true; 2591 I = Defs.erase(I); 2592 } 2593 } 2594 assert(Defs.empty() && "Some requirements were not satisfied!"); 2595 if (!InGuard.empty()) 2596 OS << "#endif\n"; 2597 2598 OS << "\n"; 2599 OS << "#undef __ai\n\n"; 2600 2601 OS << "#endif\n"; 2602 } 2603 2604 void clang::EmitNeon(RecordKeeper &Records, raw_ostream &OS) { 2605 NeonEmitter(Records).run(OS); 2606 } 2607 2608 void clang::EmitFP16(RecordKeeper &Records, raw_ostream &OS) { 2609 NeonEmitter(Records).runFP16(OS); 2610 } 2611 2612 void clang::EmitBF16(RecordKeeper &Records, raw_ostream &OS) { 2613 NeonEmitter(Records).runBF16(OS); 2614 } 2615 2616 void clang::EmitNeonSema(RecordKeeper &Records, raw_ostream &OS) { 2617 NeonEmitter(Records).runHeader(OS); 2618 } 2619 2620 void clang::EmitNeonTest(RecordKeeper &Records, raw_ostream &OS) { 2621 llvm_unreachable("Neon test generation no longer implemented!"); 2622 } 2623