1 //===- NeonEmitter.cpp - Generate arm_neon.h for use with clang -*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This tablegen backend is responsible for emitting arm_neon.h, which includes 10 // a declaration and definition of each function specified by the ARM NEON 11 // compiler interface. See ARM document DUI0348B. 12 // 13 // Each NEON instruction is implemented in terms of 1 or more functions which 14 // are suffixed with the element type of the input vectors. Functions may be 15 // implemented in terms of generic vector operations such as +, *, -, etc. or 16 // by calling a __builtin_-prefixed function which will be handled by clang's 17 // CodeGen library. 18 // 19 // Additional validation code can be generated by this file when runHeader() is 20 // called, rather than the normal run() entry point. 21 // 22 // See also the documentation in include/clang/Basic/arm_neon.td. 23 // 24 //===----------------------------------------------------------------------===// 25 26 #include "TableGenBackends.h" 27 #include "llvm/ADT/ArrayRef.h" 28 #include "llvm/ADT/DenseMap.h" 29 #include "llvm/ADT/None.h" 30 #include "llvm/ADT/Optional.h" 31 #include "llvm/ADT/STLExtras.h" 32 #include "llvm/ADT/SmallVector.h" 33 #include "llvm/ADT/StringExtras.h" 34 #include "llvm/ADT/StringRef.h" 35 #include "llvm/Support/Casting.h" 36 #include "llvm/Support/ErrorHandling.h" 37 #include "llvm/Support/raw_ostream.h" 38 #include "llvm/TableGen/Error.h" 39 #include "llvm/TableGen/Record.h" 40 #include "llvm/TableGen/SetTheory.h" 41 #include <algorithm> 42 #include <cassert> 43 #include <cctype> 44 #include <cstddef> 45 #include <cstdint> 46 #include <deque> 47 #include <map> 48 #include <set> 49 #include <sstream> 50 #include <string> 51 #include <utility> 52 #include <vector> 53 54 using namespace llvm; 55 56 namespace { 57 58 // While globals are generally bad, this one allows us to perform assertions 59 // liberally and somehow still trace them back to the def they indirectly 60 // came from. 61 static Record *CurrentRecord = nullptr; 62 static void assert_with_loc(bool Assertion, const std::string &Str) { 63 if (!Assertion) { 64 if (CurrentRecord) 65 PrintFatalError(CurrentRecord->getLoc(), Str); 66 else 67 PrintFatalError(Str); 68 } 69 } 70 71 enum ClassKind { 72 ClassNone, 73 ClassI, // generic integer instruction, e.g., "i8" suffix 74 ClassS, // signed/unsigned/poly, e.g., "s8", "u8" or "p8" suffix 75 ClassW, // width-specific instruction, e.g., "8" suffix 76 ClassB, // bitcast arguments with enum argument to specify type 77 ClassL, // Logical instructions which are op instructions 78 // but we need to not emit any suffix for in our 79 // tests. 80 ClassNoTest // Instructions which we do not test since they are 81 // not TRUE instructions. 82 }; 83 84 /// NeonTypeFlags - Flags to identify the types for overloaded Neon 85 /// builtins. These must be kept in sync with the flags in 86 /// include/clang/Basic/TargetBuiltins.h. 87 namespace NeonTypeFlags { 88 89 enum { EltTypeMask = 0xf, UnsignedFlag = 0x10, QuadFlag = 0x20 }; 90 91 enum EltType { 92 Int8, 93 Int16, 94 Int32, 95 Int64, 96 Poly8, 97 Poly16, 98 Poly64, 99 Poly128, 100 Float16, 101 Float32, 102 Float64, 103 BFloat16 104 }; 105 106 } // end namespace NeonTypeFlags 107 108 class NeonEmitter; 109 110 //===----------------------------------------------------------------------===// 111 // TypeSpec 112 //===----------------------------------------------------------------------===// 113 114 /// A TypeSpec is just a simple wrapper around a string, but gets its own type 115 /// for strong typing purposes. 116 /// 117 /// A TypeSpec can be used to create a type. 118 class TypeSpec : public std::string { 119 public: 120 static std::vector<TypeSpec> fromTypeSpecs(StringRef Str) { 121 std::vector<TypeSpec> Ret; 122 TypeSpec Acc; 123 for (char I : Str.str()) { 124 if (islower(I)) { 125 Acc.push_back(I); 126 Ret.push_back(TypeSpec(Acc)); 127 Acc.clear(); 128 } else { 129 Acc.push_back(I); 130 } 131 } 132 return Ret; 133 } 134 }; 135 136 //===----------------------------------------------------------------------===// 137 // Type 138 //===----------------------------------------------------------------------===// 139 140 /// A Type. Not much more to say here. 141 class Type { 142 private: 143 TypeSpec TS; 144 145 enum TypeKind { 146 Void, 147 Float, 148 SInt, 149 UInt, 150 Poly, 151 BFloat16, 152 }; 153 TypeKind Kind; 154 bool Immediate, Constant, Pointer; 155 // ScalarForMangling and NoManglingQ are really not suited to live here as 156 // they are not related to the type. But they live in the TypeSpec (not the 157 // prototype), so this is really the only place to store them. 158 bool ScalarForMangling, NoManglingQ; 159 unsigned Bitwidth, ElementBitwidth, NumVectors; 160 161 public: 162 Type() 163 : Kind(Void), Immediate(false), Constant(false), 164 Pointer(false), ScalarForMangling(false), NoManglingQ(false), 165 Bitwidth(0), ElementBitwidth(0), NumVectors(0) {} 166 167 Type(TypeSpec TS, StringRef CharMods) 168 : TS(std::move(TS)), Kind(Void), Immediate(false), 169 Constant(false), Pointer(false), ScalarForMangling(false), 170 NoManglingQ(false), Bitwidth(0), ElementBitwidth(0), NumVectors(0) { 171 applyModifiers(CharMods); 172 } 173 174 /// Returns a type representing "void". 175 static Type getVoid() { return Type(); } 176 177 bool operator==(const Type &Other) const { return str() == Other.str(); } 178 bool operator!=(const Type &Other) const { return !operator==(Other); } 179 180 // 181 // Query functions 182 // 183 bool isScalarForMangling() const { return ScalarForMangling; } 184 bool noManglingQ() const { return NoManglingQ; } 185 186 bool isPointer() const { return Pointer; } 187 bool isValue() const { return !isVoid() && !isPointer(); } 188 bool isScalar() const { return isValue() && NumVectors == 0; } 189 bool isVector() const { return isValue() && NumVectors > 0; } 190 bool isConstPointer() const { return Constant; } 191 bool isFloating() const { return Kind == Float; } 192 bool isInteger() const { return Kind == SInt || Kind == UInt; } 193 bool isPoly() const { return Kind == Poly; } 194 bool isSigned() const { return Kind == SInt; } 195 bool isImmediate() const { return Immediate; } 196 bool isFloat() const { return isFloating() && ElementBitwidth == 32; } 197 bool isDouble() const { return isFloating() && ElementBitwidth == 64; } 198 bool isHalf() const { return isFloating() && ElementBitwidth == 16; } 199 bool isChar() const { return ElementBitwidth == 8; } 200 bool isShort() const { return isInteger() && ElementBitwidth == 16; } 201 bool isInt() const { return isInteger() && ElementBitwidth == 32; } 202 bool isLong() const { return isInteger() && ElementBitwidth == 64; } 203 bool isVoid() const { return Kind == Void; } 204 bool isBFloat16() const { return Kind == BFloat16; } 205 unsigned getNumElements() const { return Bitwidth / ElementBitwidth; } 206 unsigned getSizeInBits() const { return Bitwidth; } 207 unsigned getElementSizeInBits() const { return ElementBitwidth; } 208 unsigned getNumVectors() const { return NumVectors; } 209 210 // 211 // Mutator functions 212 // 213 void makeUnsigned() { 214 assert(!isVoid() && "not a potentially signed type"); 215 Kind = UInt; 216 } 217 void makeSigned() { 218 assert(!isVoid() && "not a potentially signed type"); 219 Kind = SInt; 220 } 221 222 void makeInteger(unsigned ElemWidth, bool Sign) { 223 assert(!isVoid() && "converting void to int probably not useful"); 224 Kind = Sign ? SInt : UInt; 225 Immediate = false; 226 ElementBitwidth = ElemWidth; 227 } 228 229 void makeImmediate(unsigned ElemWidth) { 230 Kind = SInt; 231 Immediate = true; 232 ElementBitwidth = ElemWidth; 233 } 234 235 void makeScalar() { 236 Bitwidth = ElementBitwidth; 237 NumVectors = 0; 238 } 239 240 void makeOneVector() { 241 assert(isVector()); 242 NumVectors = 1; 243 } 244 245 void make32BitElement() { 246 assert_with_loc(Bitwidth > 32, "Not enough bits to make it 32!"); 247 ElementBitwidth = 32; 248 } 249 250 void doubleLanes() { 251 assert_with_loc(Bitwidth != 128, "Can't get bigger than 128!"); 252 Bitwidth = 128; 253 } 254 255 void halveLanes() { 256 assert_with_loc(Bitwidth != 64, "Can't get smaller than 64!"); 257 Bitwidth = 64; 258 } 259 260 /// Return the C string representation of a type, which is the typename 261 /// defined in stdint.h or arm_neon.h. 262 std::string str() const; 263 264 /// Return the string representation of a type, which is an encoded 265 /// string for passing to the BUILTIN() macro in Builtins.def. 266 std::string builtin_str() const; 267 268 /// Return the value in NeonTypeFlags for this type. 269 unsigned getNeonEnum() const; 270 271 /// Parse a type from a stdint.h or arm_neon.h typedef name, 272 /// for example uint32x2_t or int64_t. 273 static Type fromTypedefName(StringRef Name); 274 275 private: 276 /// Creates the type based on the typespec string in TS. 277 /// Sets "Quad" to true if the "Q" or "H" modifiers were 278 /// seen. This is needed by applyModifier as some modifiers 279 /// only take effect if the type size was changed by "Q" or "H". 280 void applyTypespec(bool &Quad); 281 /// Applies prototype modifiers to the type. 282 void applyModifiers(StringRef Mods); 283 }; 284 285 //===----------------------------------------------------------------------===// 286 // Variable 287 //===----------------------------------------------------------------------===// 288 289 /// A variable is a simple class that just has a type and a name. 290 class Variable { 291 Type T; 292 std::string N; 293 294 public: 295 Variable() : T(Type::getVoid()), N("") {} 296 Variable(Type T, std::string N) : T(std::move(T)), N(std::move(N)) {} 297 298 Type getType() const { return T; } 299 std::string getName() const { return "__" + N; } 300 }; 301 302 //===----------------------------------------------------------------------===// 303 // Intrinsic 304 //===----------------------------------------------------------------------===// 305 306 /// The main grunt class. This represents an instantiation of an intrinsic with 307 /// a particular typespec and prototype. 308 class Intrinsic { 309 /// The Record this intrinsic was created from. 310 Record *R; 311 /// The unmangled name. 312 std::string Name; 313 /// The input and output typespecs. InTS == OutTS except when 314 /// CartesianProductWith is non-empty - this is the case for vreinterpret. 315 TypeSpec OutTS, InTS; 316 /// The base class kind. Most intrinsics use ClassS, which has full type 317 /// info for integers (s32/u32). Some use ClassI, which doesn't care about 318 /// signedness (i32), while some (ClassB) have no type at all, only a width 319 /// (32). 320 ClassKind CK; 321 /// The list of DAGs for the body. May be empty, in which case we should 322 /// emit a builtin call. 323 ListInit *Body; 324 /// The architectural #ifdef guard. 325 std::string Guard; 326 /// Set if the Unavailable bit is 1. This means we don't generate a body, 327 /// just an "unavailable" attribute on a declaration. 328 bool IsUnavailable; 329 /// Is this intrinsic safe for big-endian? or does it need its arguments 330 /// reversing? 331 bool BigEndianSafe; 332 333 /// The types of return value [0] and parameters [1..]. 334 std::vector<Type> Types; 335 /// The index of the key type passed to CGBuiltin.cpp for polymorphic calls. 336 int PolymorphicKeyType; 337 /// The local variables defined. 338 std::map<std::string, Variable> Variables; 339 /// NeededEarly - set if any other intrinsic depends on this intrinsic. 340 bool NeededEarly; 341 /// UseMacro - set if we should implement using a macro or unset for a 342 /// function. 343 bool UseMacro; 344 /// The set of intrinsics that this intrinsic uses/requires. 345 std::set<Intrinsic *> Dependencies; 346 /// The "base type", which is Type('d', OutTS). InBaseType is only 347 /// different if CartesianProductWith is non-empty (for vreinterpret). 348 Type BaseType, InBaseType; 349 /// The return variable. 350 Variable RetVar; 351 /// A postfix to apply to every variable. Defaults to "". 352 std::string VariablePostfix; 353 354 NeonEmitter &Emitter; 355 std::stringstream OS; 356 357 bool isBigEndianSafe() const { 358 if (BigEndianSafe) 359 return true; 360 361 for (const auto &T : Types){ 362 if (T.isVector() && T.getNumElements() > 1) 363 return false; 364 } 365 return true; 366 } 367 368 public: 369 Intrinsic(Record *R, StringRef Name, StringRef Proto, TypeSpec OutTS, 370 TypeSpec InTS, ClassKind CK, ListInit *Body, NeonEmitter &Emitter, 371 StringRef Guard, bool IsUnavailable, bool BigEndianSafe) 372 : R(R), Name(Name.str()), OutTS(OutTS), InTS(InTS), CK(CK), Body(Body), 373 Guard(Guard.str()), IsUnavailable(IsUnavailable), 374 BigEndianSafe(BigEndianSafe), PolymorphicKeyType(0), NeededEarly(false), 375 UseMacro(false), BaseType(OutTS, "."), InBaseType(InTS, "."), 376 Emitter(Emitter) { 377 // Modify the TypeSpec per-argument to get a concrete Type, and create 378 // known variables for each. 379 // Types[0] is the return value. 380 unsigned Pos = 0; 381 Types.emplace_back(OutTS, getNextModifiers(Proto, Pos)); 382 StringRef Mods = getNextModifiers(Proto, Pos); 383 while (!Mods.empty()) { 384 Types.emplace_back(InTS, Mods); 385 if (Mods.find('!') != StringRef::npos) 386 PolymorphicKeyType = Types.size() - 1; 387 388 Mods = getNextModifiers(Proto, Pos); 389 } 390 391 for (auto Type : Types) { 392 // If this builtin takes an immediate argument, we need to #define it rather 393 // than use a standard declaration, so that SemaChecking can range check 394 // the immediate passed by the user. 395 396 // Pointer arguments need to use macros to avoid hiding aligned attributes 397 // from the pointer type. 398 399 // It is not permitted to pass or return an __fp16 by value, so intrinsics 400 // taking a scalar float16_t must be implemented as macros. 401 if (Type.isImmediate() || Type.isPointer() || 402 (Type.isScalar() && Type.isHalf())) 403 UseMacro = true; 404 } 405 } 406 407 /// Get the Record that this intrinsic is based off. 408 Record *getRecord() const { return R; } 409 /// Get the set of Intrinsics that this intrinsic calls. 410 /// this is the set of immediate dependencies, NOT the 411 /// transitive closure. 412 const std::set<Intrinsic *> &getDependencies() const { return Dependencies; } 413 /// Get the architectural guard string (#ifdef). 414 std::string getGuard() const { return Guard; } 415 /// Get the non-mangled name. 416 std::string getName() const { return Name; } 417 418 /// Return true if the intrinsic takes an immediate operand. 419 bool hasImmediate() const { 420 return std::any_of(Types.begin(), Types.end(), 421 [](const Type &T) { return T.isImmediate(); }); 422 } 423 424 /// Return the parameter index of the immediate operand. 425 unsigned getImmediateIdx() const { 426 for (unsigned Idx = 0; Idx < Types.size(); ++Idx) 427 if (Types[Idx].isImmediate()) 428 return Idx - 1; 429 llvm_unreachable("Intrinsic has no immediate"); 430 } 431 432 433 unsigned getNumParams() const { return Types.size() - 1; } 434 Type getReturnType() const { return Types[0]; } 435 Type getParamType(unsigned I) const { return Types[I + 1]; } 436 Type getBaseType() const { return BaseType; } 437 Type getPolymorphicKeyType() const { return Types[PolymorphicKeyType]; } 438 439 /// Return true if the prototype has a scalar argument. 440 bool protoHasScalar() const; 441 442 /// Return the index that parameter PIndex will sit at 443 /// in a generated function call. This is often just PIndex, 444 /// but may not be as things such as multiple-vector operands 445 /// and sret parameters need to be taken into accont. 446 unsigned getGeneratedParamIdx(unsigned PIndex) { 447 unsigned Idx = 0; 448 if (getReturnType().getNumVectors() > 1) 449 // Multiple vectors are passed as sret. 450 ++Idx; 451 452 for (unsigned I = 0; I < PIndex; ++I) 453 Idx += std::max(1U, getParamType(I).getNumVectors()); 454 455 return Idx; 456 } 457 458 bool hasBody() const { return Body && !Body->getValues().empty(); } 459 460 void setNeededEarly() { NeededEarly = true; } 461 462 bool operator<(const Intrinsic &Other) const { 463 // Sort lexicographically on a two-tuple (Guard, Name) 464 if (Guard != Other.Guard) 465 return Guard < Other.Guard; 466 return Name < Other.Name; 467 } 468 469 ClassKind getClassKind(bool UseClassBIfScalar = false) { 470 if (UseClassBIfScalar && !protoHasScalar()) 471 return ClassB; 472 return CK; 473 } 474 475 /// Return the name, mangled with type information. 476 /// If ForceClassS is true, use ClassS (u32/s32) instead 477 /// of the intrinsic's own type class. 478 std::string getMangledName(bool ForceClassS = false) const; 479 /// Return the type code for a builtin function call. 480 std::string getInstTypeCode(Type T, ClassKind CK) const; 481 /// Return the type string for a BUILTIN() macro in Builtins.def. 482 std::string getBuiltinTypeStr(); 483 484 /// Generate the intrinsic, returning code. 485 std::string generate(); 486 /// Perform type checking and populate the dependency graph, but 487 /// don't generate code yet. 488 void indexBody(); 489 490 private: 491 StringRef getNextModifiers(StringRef Proto, unsigned &Pos) const; 492 493 std::string mangleName(std::string Name, ClassKind CK) const; 494 495 void initVariables(); 496 std::string replaceParamsIn(std::string S); 497 498 void emitBodyAsBuiltinCall(); 499 500 void generateImpl(bool ReverseArguments, 501 StringRef NamePrefix, StringRef CallPrefix); 502 void emitReturn(); 503 void emitBody(StringRef CallPrefix); 504 void emitShadowedArgs(); 505 void emitArgumentReversal(); 506 void emitReturnReversal(); 507 void emitReverseVariable(Variable &Dest, Variable &Src); 508 void emitNewLine(); 509 void emitClosingBrace(); 510 void emitOpeningBrace(); 511 void emitPrototype(StringRef NamePrefix); 512 513 class DagEmitter { 514 Intrinsic &Intr; 515 StringRef CallPrefix; 516 517 public: 518 DagEmitter(Intrinsic &Intr, StringRef CallPrefix) : 519 Intr(Intr), CallPrefix(CallPrefix) { 520 } 521 std::pair<Type, std::string> emitDagArg(Init *Arg, std::string ArgName); 522 std::pair<Type, std::string> emitDagSaveTemp(DagInit *DI); 523 std::pair<Type, std::string> emitDagSplat(DagInit *DI); 524 std::pair<Type, std::string> emitDagDup(DagInit *DI); 525 std::pair<Type, std::string> emitDagDupTyped(DagInit *DI); 526 std::pair<Type, std::string> emitDagShuffle(DagInit *DI); 527 std::pair<Type, std::string> emitDagCast(DagInit *DI, bool IsBitCast); 528 std::pair<Type, std::string> emitDagCall(DagInit *DI, 529 bool MatchMangledName); 530 std::pair<Type, std::string> emitDagNameReplace(DagInit *DI); 531 std::pair<Type, std::string> emitDagLiteral(DagInit *DI); 532 std::pair<Type, std::string> emitDagOp(DagInit *DI); 533 std::pair<Type, std::string> emitDag(DagInit *DI); 534 }; 535 }; 536 537 //===----------------------------------------------------------------------===// 538 // NeonEmitter 539 //===----------------------------------------------------------------------===// 540 541 class NeonEmitter { 542 RecordKeeper &Records; 543 DenseMap<Record *, ClassKind> ClassMap; 544 std::map<std::string, std::deque<Intrinsic>> IntrinsicMap; 545 unsigned UniqueNumber; 546 547 void createIntrinsic(Record *R, SmallVectorImpl<Intrinsic *> &Out); 548 void genBuiltinsDef(raw_ostream &OS, SmallVectorImpl<Intrinsic *> &Defs); 549 void genOverloadTypeCheckCode(raw_ostream &OS, 550 SmallVectorImpl<Intrinsic *> &Defs); 551 void genIntrinsicRangeCheckCode(raw_ostream &OS, 552 SmallVectorImpl<Intrinsic *> &Defs); 553 554 public: 555 /// Called by Intrinsic - this attempts to get an intrinsic that takes 556 /// the given types as arguments. 557 Intrinsic &getIntrinsic(StringRef Name, ArrayRef<Type> Types, 558 Optional<std::string> MangledName); 559 560 /// Called by Intrinsic - returns a globally-unique number. 561 unsigned getUniqueNumber() { return UniqueNumber++; } 562 563 NeonEmitter(RecordKeeper &R) : Records(R), UniqueNumber(0) { 564 Record *SI = R.getClass("SInst"); 565 Record *II = R.getClass("IInst"); 566 Record *WI = R.getClass("WInst"); 567 Record *SOpI = R.getClass("SOpInst"); 568 Record *IOpI = R.getClass("IOpInst"); 569 Record *WOpI = R.getClass("WOpInst"); 570 Record *LOpI = R.getClass("LOpInst"); 571 Record *NoTestOpI = R.getClass("NoTestOpInst"); 572 573 ClassMap[SI] = ClassS; 574 ClassMap[II] = ClassI; 575 ClassMap[WI] = ClassW; 576 ClassMap[SOpI] = ClassS; 577 ClassMap[IOpI] = ClassI; 578 ClassMap[WOpI] = ClassW; 579 ClassMap[LOpI] = ClassL; 580 ClassMap[NoTestOpI] = ClassNoTest; 581 } 582 583 // Emit arm_neon.h.inc 584 void run(raw_ostream &o); 585 586 // Emit arm_fp16.h.inc 587 void runFP16(raw_ostream &o); 588 589 // Emit arm_bf16.h.inc 590 void runBF16(raw_ostream &o); 591 592 // Emit all the __builtin prototypes used in arm_neon.h, arm_fp16.h and 593 // arm_bf16.h 594 void runHeader(raw_ostream &o); 595 }; 596 597 } // end anonymous namespace 598 599 //===----------------------------------------------------------------------===// 600 // Type implementation 601 //===----------------------------------------------------------------------===// 602 603 std::string Type::str() const { 604 if (isVoid()) 605 return "void"; 606 std::string S; 607 608 if (isInteger() && !isSigned()) 609 S += "u"; 610 611 if (isPoly()) 612 S += "poly"; 613 else if (isFloating()) 614 S += "float"; 615 else if (isBFloat16()) 616 S += "bfloat"; 617 else 618 S += "int"; 619 620 S += utostr(ElementBitwidth); 621 if (isVector()) 622 S += "x" + utostr(getNumElements()); 623 if (NumVectors > 1) 624 S += "x" + utostr(NumVectors); 625 S += "_t"; 626 627 if (Constant) 628 S += " const"; 629 if (Pointer) 630 S += " *"; 631 632 return S; 633 } 634 635 std::string Type::builtin_str() const { 636 std::string S; 637 if (isVoid()) 638 return "v"; 639 640 if (isPointer()) { 641 // All pointers are void pointers. 642 S = "v"; 643 if (isConstPointer()) 644 S += "C"; 645 S += "*"; 646 return S; 647 } else if (isInteger()) 648 switch (ElementBitwidth) { 649 case 8: S += "c"; break; 650 case 16: S += "s"; break; 651 case 32: S += "i"; break; 652 case 64: S += "Wi"; break; 653 case 128: S += "LLLi"; break; 654 default: llvm_unreachable("Unhandled case!"); 655 } 656 else if (isBFloat16()) { 657 assert(ElementBitwidth == 16 && "BFloat16 can only be 16 bits"); 658 S += "y"; 659 } else 660 switch (ElementBitwidth) { 661 case 16: S += "h"; break; 662 case 32: S += "f"; break; 663 case 64: S += "d"; break; 664 default: llvm_unreachable("Unhandled case!"); 665 } 666 667 // FIXME: NECESSARY??????????????????????????????????????????????????????????????????????? 668 if (isChar() && !isPointer() && isSigned()) 669 // Make chars explicitly signed. 670 S = "S" + S; 671 else if (isInteger() && !isSigned()) 672 S = "U" + S; 673 674 // Constant indices are "int", but have the "constant expression" modifier. 675 if (isImmediate()) { 676 assert(isInteger() && isSigned()); 677 S = "I" + S; 678 } 679 680 if (isScalar()) 681 return S; 682 683 std::string Ret; 684 for (unsigned I = 0; I < NumVectors; ++I) 685 Ret += "V" + utostr(getNumElements()) + S; 686 687 return Ret; 688 } 689 690 unsigned Type::getNeonEnum() const { 691 unsigned Addend; 692 switch (ElementBitwidth) { 693 case 8: Addend = 0; break; 694 case 16: Addend = 1; break; 695 case 32: Addend = 2; break; 696 case 64: Addend = 3; break; 697 case 128: Addend = 4; break; 698 default: llvm_unreachable("Unhandled element bitwidth!"); 699 } 700 701 unsigned Base = (unsigned)NeonTypeFlags::Int8 + Addend; 702 if (isPoly()) { 703 // Adjustment needed because Poly32 doesn't exist. 704 if (Addend >= 2) 705 --Addend; 706 Base = (unsigned)NeonTypeFlags::Poly8 + Addend; 707 } 708 if (isFloating()) { 709 assert(Addend != 0 && "Float8 doesn't exist!"); 710 Base = (unsigned)NeonTypeFlags::Float16 + (Addend - 1); 711 } 712 713 if (isBFloat16()) { 714 assert(Addend == 1 && "BFloat16 is only 16 bit"); 715 Base = (unsigned)NeonTypeFlags::BFloat16; 716 } 717 718 if (Bitwidth == 128) 719 Base |= (unsigned)NeonTypeFlags::QuadFlag; 720 if (isInteger() && !isSigned()) 721 Base |= (unsigned)NeonTypeFlags::UnsignedFlag; 722 723 return Base; 724 } 725 726 Type Type::fromTypedefName(StringRef Name) { 727 Type T; 728 T.Kind = SInt; 729 730 if (Name.front() == 'u') { 731 T.Kind = UInt; 732 Name = Name.drop_front(); 733 } 734 735 if (Name.startswith("float")) { 736 T.Kind = Float; 737 Name = Name.drop_front(5); 738 } else if (Name.startswith("poly")) { 739 T.Kind = Poly; 740 Name = Name.drop_front(4); 741 } else if (Name.startswith("bfloat")) { 742 T.Kind = BFloat16; 743 Name = Name.drop_front(6); 744 } else { 745 assert(Name.startswith("int")); 746 Name = Name.drop_front(3); 747 } 748 749 unsigned I = 0; 750 for (I = 0; I < Name.size(); ++I) { 751 if (!isdigit(Name[I])) 752 break; 753 } 754 Name.substr(0, I).getAsInteger(10, T.ElementBitwidth); 755 Name = Name.drop_front(I); 756 757 T.Bitwidth = T.ElementBitwidth; 758 T.NumVectors = 1; 759 760 if (Name.front() == 'x') { 761 Name = Name.drop_front(); 762 unsigned I = 0; 763 for (I = 0; I < Name.size(); ++I) { 764 if (!isdigit(Name[I])) 765 break; 766 } 767 unsigned NumLanes; 768 Name.substr(0, I).getAsInteger(10, NumLanes); 769 Name = Name.drop_front(I); 770 T.Bitwidth = T.ElementBitwidth * NumLanes; 771 } else { 772 // Was scalar. 773 T.NumVectors = 0; 774 } 775 if (Name.front() == 'x') { 776 Name = Name.drop_front(); 777 unsigned I = 0; 778 for (I = 0; I < Name.size(); ++I) { 779 if (!isdigit(Name[I])) 780 break; 781 } 782 Name.substr(0, I).getAsInteger(10, T.NumVectors); 783 Name = Name.drop_front(I); 784 } 785 786 assert(Name.startswith("_t") && "Malformed typedef!"); 787 return T; 788 } 789 790 void Type::applyTypespec(bool &Quad) { 791 std::string S = TS; 792 ScalarForMangling = false; 793 Kind = SInt; 794 ElementBitwidth = ~0U; 795 NumVectors = 1; 796 797 for (char I : S) { 798 switch (I) { 799 case 'S': 800 ScalarForMangling = true; 801 break; 802 case 'H': 803 NoManglingQ = true; 804 Quad = true; 805 break; 806 case 'Q': 807 Quad = true; 808 break; 809 case 'P': 810 Kind = Poly; 811 break; 812 case 'U': 813 Kind = UInt; 814 break; 815 case 'c': 816 ElementBitwidth = 8; 817 break; 818 case 'h': 819 Kind = Float; 820 LLVM_FALLTHROUGH; 821 case 's': 822 ElementBitwidth = 16; 823 break; 824 case 'f': 825 Kind = Float; 826 LLVM_FALLTHROUGH; 827 case 'i': 828 ElementBitwidth = 32; 829 break; 830 case 'd': 831 Kind = Float; 832 LLVM_FALLTHROUGH; 833 case 'l': 834 ElementBitwidth = 64; 835 break; 836 case 'k': 837 ElementBitwidth = 128; 838 // Poly doesn't have a 128x1 type. 839 if (isPoly()) 840 NumVectors = 0; 841 break; 842 case 'b': 843 Kind = BFloat16; 844 ElementBitwidth = 16; 845 break; 846 default: 847 llvm_unreachable("Unhandled type code!"); 848 } 849 } 850 assert(ElementBitwidth != ~0U && "Bad element bitwidth!"); 851 852 Bitwidth = Quad ? 128 : 64; 853 } 854 855 void Type::applyModifiers(StringRef Mods) { 856 bool AppliedQuad = false; 857 applyTypespec(AppliedQuad); 858 859 for (char Mod : Mods) { 860 switch (Mod) { 861 case '.': 862 break; 863 case 'v': 864 Kind = Void; 865 break; 866 case 'S': 867 Kind = SInt; 868 break; 869 case 'U': 870 Kind = UInt; 871 break; 872 case 'B': 873 Kind = BFloat16; 874 ElementBitwidth = 16; 875 break; 876 case 'F': 877 Kind = Float; 878 break; 879 case 'P': 880 Kind = Poly; 881 break; 882 case '>': 883 assert(ElementBitwidth < 128); 884 ElementBitwidth *= 2; 885 break; 886 case '<': 887 assert(ElementBitwidth > 8); 888 ElementBitwidth /= 2; 889 break; 890 case '1': 891 NumVectors = 0; 892 break; 893 case '2': 894 NumVectors = 2; 895 break; 896 case '3': 897 NumVectors = 3; 898 break; 899 case '4': 900 NumVectors = 4; 901 break; 902 case '*': 903 Pointer = true; 904 break; 905 case 'c': 906 Constant = true; 907 break; 908 case 'Q': 909 Bitwidth = 128; 910 break; 911 case 'q': 912 Bitwidth = 64; 913 break; 914 case 'I': 915 Kind = SInt; 916 ElementBitwidth = Bitwidth = 32; 917 NumVectors = 0; 918 Immediate = true; 919 break; 920 case 'p': 921 if (isPoly()) 922 Kind = UInt; 923 break; 924 case '!': 925 // Key type, handled elsewhere. 926 break; 927 default: 928 llvm_unreachable("Unhandled character!"); 929 } 930 } 931 } 932 933 //===----------------------------------------------------------------------===// 934 // Intrinsic implementation 935 //===----------------------------------------------------------------------===// 936 937 StringRef Intrinsic::getNextModifiers(StringRef Proto, unsigned &Pos) const { 938 if (Proto.size() == Pos) 939 return StringRef(); 940 else if (Proto[Pos] != '(') 941 return Proto.substr(Pos++, 1); 942 943 size_t Start = Pos + 1; 944 size_t End = Proto.find(')', Start); 945 assert_with_loc(End != StringRef::npos, "unmatched modifier group paren"); 946 Pos = End + 1; 947 return Proto.slice(Start, End); 948 } 949 950 std::string Intrinsic::getInstTypeCode(Type T, ClassKind CK) const { 951 char typeCode = '\0'; 952 bool printNumber = true; 953 954 if (CK == ClassB) 955 return ""; 956 957 if (T.isBFloat16()) 958 return "bf16"; 959 960 if (T.isPoly()) 961 typeCode = 'p'; 962 else if (T.isInteger()) 963 typeCode = T.isSigned() ? 's' : 'u'; 964 else 965 typeCode = 'f'; 966 967 if (CK == ClassI) { 968 switch (typeCode) { 969 default: 970 break; 971 case 's': 972 case 'u': 973 case 'p': 974 typeCode = 'i'; 975 break; 976 } 977 } 978 if (CK == ClassB) { 979 typeCode = '\0'; 980 } 981 982 std::string S; 983 if (typeCode != '\0') 984 S.push_back(typeCode); 985 if (printNumber) 986 S += utostr(T.getElementSizeInBits()); 987 988 return S; 989 } 990 991 std::string Intrinsic::getBuiltinTypeStr() { 992 ClassKind LocalCK = getClassKind(true); 993 std::string S; 994 995 Type RetT = getReturnType(); 996 if ((LocalCK == ClassI || LocalCK == ClassW) && RetT.isScalar() && 997 !RetT.isFloating() && !RetT.isBFloat16()) 998 RetT.makeInteger(RetT.getElementSizeInBits(), false); 999 1000 // Since the return value must be one type, return a vector type of the 1001 // appropriate width which we will bitcast. An exception is made for 1002 // returning structs of 2, 3, or 4 vectors which are returned in a sret-like 1003 // fashion, storing them to a pointer arg. 1004 if (RetT.getNumVectors() > 1) { 1005 S += "vv*"; // void result with void* first argument 1006 } else { 1007 if (RetT.isPoly()) 1008 RetT.makeInteger(RetT.getElementSizeInBits(), false); 1009 if (!RetT.isScalar() && RetT.isInteger() && !RetT.isSigned()) 1010 RetT.makeSigned(); 1011 1012 if (LocalCK == ClassB && RetT.isValue() && !RetT.isScalar()) 1013 // Cast to vector of 8-bit elements. 1014 RetT.makeInteger(8, true); 1015 1016 S += RetT.builtin_str(); 1017 } 1018 1019 for (unsigned I = 0; I < getNumParams(); ++I) { 1020 Type T = getParamType(I); 1021 if (T.isPoly()) 1022 T.makeInteger(T.getElementSizeInBits(), false); 1023 1024 if (LocalCK == ClassB && !T.isScalar()) 1025 T.makeInteger(8, true); 1026 // Halves always get converted to 8-bit elements. 1027 if (T.isHalf() && T.isVector() && !T.isScalarForMangling()) 1028 T.makeInteger(8, true); 1029 1030 if (LocalCK == ClassI && T.isInteger()) 1031 T.makeSigned(); 1032 1033 if (hasImmediate() && getImmediateIdx() == I) 1034 T.makeImmediate(32); 1035 1036 S += T.builtin_str(); 1037 } 1038 1039 // Extra constant integer to hold type class enum for this function, e.g. s8 1040 if (LocalCK == ClassB) 1041 S += "i"; 1042 1043 return S; 1044 } 1045 1046 std::string Intrinsic::getMangledName(bool ForceClassS) const { 1047 // Check if the prototype has a scalar operand with the type of the vector 1048 // elements. If not, bitcasting the args will take care of arg checking. 1049 // The actual signedness etc. will be taken care of with special enums. 1050 ClassKind LocalCK = CK; 1051 if (!protoHasScalar()) 1052 LocalCK = ClassB; 1053 1054 return mangleName(Name, ForceClassS ? ClassS : LocalCK); 1055 } 1056 1057 std::string Intrinsic::mangleName(std::string Name, ClassKind LocalCK) const { 1058 std::string typeCode = getInstTypeCode(BaseType, LocalCK); 1059 std::string S = Name; 1060 1061 if (Name == "vcvt_f16_f32" || Name == "vcvt_f32_f16" || 1062 Name == "vcvt_f32_f64" || Name == "vcvt_f64_f32" || 1063 Name == "vcvt_f32_bf16") 1064 return Name; 1065 1066 if (!typeCode.empty()) { 1067 // If the name ends with _xN (N = 2,3,4), insert the typeCode before _xN. 1068 if (Name.size() >= 3 && isdigit(Name.back()) && 1069 Name[Name.length() - 2] == 'x' && Name[Name.length() - 3] == '_') 1070 S.insert(S.length() - 3, "_" + typeCode); 1071 else 1072 S += "_" + typeCode; 1073 } 1074 1075 if (BaseType != InBaseType) { 1076 // A reinterpret - out the input base type at the end. 1077 S += "_" + getInstTypeCode(InBaseType, LocalCK); 1078 } 1079 1080 if (LocalCK == ClassB) 1081 S += "_v"; 1082 1083 // Insert a 'q' before the first '_' character so that it ends up before 1084 // _lane or _n on vector-scalar operations. 1085 if (BaseType.getSizeInBits() == 128 && !BaseType.noManglingQ()) { 1086 size_t Pos = S.find('_'); 1087 S.insert(Pos, "q"); 1088 } 1089 1090 char Suffix = '\0'; 1091 if (BaseType.isScalarForMangling()) { 1092 switch (BaseType.getElementSizeInBits()) { 1093 case 8: Suffix = 'b'; break; 1094 case 16: Suffix = 'h'; break; 1095 case 32: Suffix = 's'; break; 1096 case 64: Suffix = 'd'; break; 1097 default: llvm_unreachable("Bad suffix!"); 1098 } 1099 } 1100 if (Suffix != '\0') { 1101 size_t Pos = S.find('_'); 1102 S.insert(Pos, &Suffix, 1); 1103 } 1104 1105 return S; 1106 } 1107 1108 std::string Intrinsic::replaceParamsIn(std::string S) { 1109 while (S.find('$') != std::string::npos) { 1110 size_t Pos = S.find('$'); 1111 size_t End = Pos + 1; 1112 while (isalpha(S[End])) 1113 ++End; 1114 1115 std::string VarName = S.substr(Pos + 1, End - Pos - 1); 1116 assert_with_loc(Variables.find(VarName) != Variables.end(), 1117 "Variable not defined!"); 1118 S.replace(Pos, End - Pos, Variables.find(VarName)->second.getName()); 1119 } 1120 1121 return S; 1122 } 1123 1124 void Intrinsic::initVariables() { 1125 Variables.clear(); 1126 1127 // Modify the TypeSpec per-argument to get a concrete Type, and create 1128 // known variables for each. 1129 for (unsigned I = 1; I < Types.size(); ++I) { 1130 char NameC = '0' + (I - 1); 1131 std::string Name = "p"; 1132 Name.push_back(NameC); 1133 1134 Variables[Name] = Variable(Types[I], Name + VariablePostfix); 1135 } 1136 RetVar = Variable(Types[0], "ret" + VariablePostfix); 1137 } 1138 1139 void Intrinsic::emitPrototype(StringRef NamePrefix) { 1140 if (UseMacro) 1141 OS << "#define "; 1142 else 1143 OS << "__ai " << Types[0].str() << " "; 1144 1145 OS << NamePrefix.str() << mangleName(Name, ClassS) << "("; 1146 1147 for (unsigned I = 0; I < getNumParams(); ++I) { 1148 if (I != 0) 1149 OS << ", "; 1150 1151 char NameC = '0' + I; 1152 std::string Name = "p"; 1153 Name.push_back(NameC); 1154 assert(Variables.find(Name) != Variables.end()); 1155 Variable &V = Variables[Name]; 1156 1157 if (!UseMacro) 1158 OS << V.getType().str() << " "; 1159 OS << V.getName(); 1160 } 1161 1162 OS << ")"; 1163 } 1164 1165 void Intrinsic::emitOpeningBrace() { 1166 if (UseMacro) 1167 OS << " __extension__ ({"; 1168 else 1169 OS << " {"; 1170 emitNewLine(); 1171 } 1172 1173 void Intrinsic::emitClosingBrace() { 1174 if (UseMacro) 1175 OS << "})"; 1176 else 1177 OS << "}"; 1178 } 1179 1180 void Intrinsic::emitNewLine() { 1181 if (UseMacro) 1182 OS << " \\\n"; 1183 else 1184 OS << "\n"; 1185 } 1186 1187 void Intrinsic::emitReverseVariable(Variable &Dest, Variable &Src) { 1188 if (Dest.getType().getNumVectors() > 1) { 1189 emitNewLine(); 1190 1191 for (unsigned K = 0; K < Dest.getType().getNumVectors(); ++K) { 1192 OS << " " << Dest.getName() << ".val[" << K << "] = " 1193 << "__builtin_shufflevector(" 1194 << Src.getName() << ".val[" << K << "], " 1195 << Src.getName() << ".val[" << K << "]"; 1196 for (int J = Dest.getType().getNumElements() - 1; J >= 0; --J) 1197 OS << ", " << J; 1198 OS << ");"; 1199 emitNewLine(); 1200 } 1201 } else { 1202 OS << " " << Dest.getName() 1203 << " = __builtin_shufflevector(" << Src.getName() << ", " << Src.getName(); 1204 for (int J = Dest.getType().getNumElements() - 1; J >= 0; --J) 1205 OS << ", " << J; 1206 OS << ");"; 1207 emitNewLine(); 1208 } 1209 } 1210 1211 void Intrinsic::emitArgumentReversal() { 1212 if (isBigEndianSafe()) 1213 return; 1214 1215 // Reverse all vector arguments. 1216 for (unsigned I = 0; I < getNumParams(); ++I) { 1217 std::string Name = "p" + utostr(I); 1218 std::string NewName = "rev" + utostr(I); 1219 1220 Variable &V = Variables[Name]; 1221 Variable NewV(V.getType(), NewName + VariablePostfix); 1222 1223 if (!NewV.getType().isVector() || NewV.getType().getNumElements() == 1) 1224 continue; 1225 1226 OS << " " << NewV.getType().str() << " " << NewV.getName() << ";"; 1227 emitReverseVariable(NewV, V); 1228 V = NewV; 1229 } 1230 } 1231 1232 void Intrinsic::emitReturnReversal() { 1233 if (isBigEndianSafe()) 1234 return; 1235 if (!getReturnType().isVector() || getReturnType().isVoid() || 1236 getReturnType().getNumElements() == 1) 1237 return; 1238 emitReverseVariable(RetVar, RetVar); 1239 } 1240 1241 void Intrinsic::emitShadowedArgs() { 1242 // Macro arguments are not type-checked like inline function arguments, 1243 // so assign them to local temporaries to get the right type checking. 1244 if (!UseMacro) 1245 return; 1246 1247 for (unsigned I = 0; I < getNumParams(); ++I) { 1248 // Do not create a temporary for an immediate argument. 1249 // That would defeat the whole point of using a macro! 1250 if (getParamType(I).isImmediate()) 1251 continue; 1252 // Do not create a temporary for pointer arguments. The input 1253 // pointer may have an alignment hint. 1254 if (getParamType(I).isPointer()) 1255 continue; 1256 1257 std::string Name = "p" + utostr(I); 1258 1259 assert(Variables.find(Name) != Variables.end()); 1260 Variable &V = Variables[Name]; 1261 1262 std::string NewName = "s" + utostr(I); 1263 Variable V2(V.getType(), NewName + VariablePostfix); 1264 1265 OS << " " << V2.getType().str() << " " << V2.getName() << " = " 1266 << V.getName() << ";"; 1267 emitNewLine(); 1268 1269 V = V2; 1270 } 1271 } 1272 1273 bool Intrinsic::protoHasScalar() const { 1274 return std::any_of(Types.begin(), Types.end(), [](const Type &T) { 1275 return T.isScalar() && !T.isImmediate(); 1276 }); 1277 } 1278 1279 void Intrinsic::emitBodyAsBuiltinCall() { 1280 std::string S; 1281 1282 // If this builtin returns a struct 2, 3, or 4 vectors, pass it as an implicit 1283 // sret-like argument. 1284 bool SRet = getReturnType().getNumVectors() >= 2; 1285 1286 StringRef N = Name; 1287 ClassKind LocalCK = CK; 1288 if (!protoHasScalar()) 1289 LocalCK = ClassB; 1290 1291 if (!getReturnType().isVoid() && !SRet) 1292 S += "(" + RetVar.getType().str() + ") "; 1293 1294 S += "__builtin_neon_" + mangleName(std::string(N), LocalCK) + "("; 1295 1296 if (SRet) 1297 S += "&" + RetVar.getName() + ", "; 1298 1299 for (unsigned I = 0; I < getNumParams(); ++I) { 1300 Variable &V = Variables["p" + utostr(I)]; 1301 Type T = V.getType(); 1302 1303 // Handle multiple-vector values specially, emitting each subvector as an 1304 // argument to the builtin. 1305 if (T.getNumVectors() > 1) { 1306 // Check if an explicit cast is needed. 1307 std::string Cast; 1308 if (LocalCK == ClassB) { 1309 Type T2 = T; 1310 T2.makeOneVector(); 1311 T2.makeInteger(8, /*Signed=*/true); 1312 Cast = "(" + T2.str() + ")"; 1313 } 1314 1315 for (unsigned J = 0; J < T.getNumVectors(); ++J) 1316 S += Cast + V.getName() + ".val[" + utostr(J) + "], "; 1317 continue; 1318 } 1319 1320 std::string Arg = V.getName(); 1321 Type CastToType = T; 1322 1323 // Check if an explicit cast is needed. 1324 if (CastToType.isVector() && 1325 (LocalCK == ClassB || (T.isHalf() && !T.isScalarForMangling()))) { 1326 CastToType.makeInteger(8, true); 1327 Arg = "(" + CastToType.str() + ")" + Arg; 1328 } else if (CastToType.isVector() && LocalCK == ClassI) { 1329 if (CastToType.isInteger()) 1330 CastToType.makeSigned(); 1331 Arg = "(" + CastToType.str() + ")" + Arg; 1332 } 1333 1334 S += Arg + ", "; 1335 } 1336 1337 // Extra constant integer to hold type class enum for this function, e.g. s8 1338 if (getClassKind(true) == ClassB) { 1339 S += utostr(getPolymorphicKeyType().getNeonEnum()); 1340 } else { 1341 // Remove extraneous ", ". 1342 S.pop_back(); 1343 S.pop_back(); 1344 } 1345 S += ");"; 1346 1347 std::string RetExpr; 1348 if (!SRet && !RetVar.getType().isVoid()) 1349 RetExpr = RetVar.getName() + " = "; 1350 1351 OS << " " << RetExpr << S; 1352 emitNewLine(); 1353 } 1354 1355 void Intrinsic::emitBody(StringRef CallPrefix) { 1356 std::vector<std::string> Lines; 1357 1358 assert(RetVar.getType() == Types[0]); 1359 // Create a return variable, if we're not void. 1360 if (!RetVar.getType().isVoid()) { 1361 OS << " " << RetVar.getType().str() << " " << RetVar.getName() << ";"; 1362 emitNewLine(); 1363 } 1364 1365 if (!Body || Body->getValues().empty()) { 1366 // Nothing specific to output - must output a builtin. 1367 emitBodyAsBuiltinCall(); 1368 return; 1369 } 1370 1371 // We have a list of "things to output". The last should be returned. 1372 for (auto *I : Body->getValues()) { 1373 if (StringInit *SI = dyn_cast<StringInit>(I)) { 1374 Lines.push_back(replaceParamsIn(SI->getAsString())); 1375 } else if (DagInit *DI = dyn_cast<DagInit>(I)) { 1376 DagEmitter DE(*this, CallPrefix); 1377 Lines.push_back(DE.emitDag(DI).second + ";"); 1378 } 1379 } 1380 1381 assert(!Lines.empty() && "Empty def?"); 1382 if (!RetVar.getType().isVoid()) 1383 Lines.back().insert(0, RetVar.getName() + " = "); 1384 1385 for (auto &L : Lines) { 1386 OS << " " << L; 1387 emitNewLine(); 1388 } 1389 } 1390 1391 void Intrinsic::emitReturn() { 1392 if (RetVar.getType().isVoid()) 1393 return; 1394 if (UseMacro) 1395 OS << " " << RetVar.getName() << ";"; 1396 else 1397 OS << " return " << RetVar.getName() << ";"; 1398 emitNewLine(); 1399 } 1400 1401 std::pair<Type, std::string> Intrinsic::DagEmitter::emitDag(DagInit *DI) { 1402 // At this point we should only be seeing a def. 1403 DefInit *DefI = cast<DefInit>(DI->getOperator()); 1404 std::string Op = DefI->getAsString(); 1405 1406 if (Op == "cast" || Op == "bitcast") 1407 return emitDagCast(DI, Op == "bitcast"); 1408 if (Op == "shuffle") 1409 return emitDagShuffle(DI); 1410 if (Op == "dup") 1411 return emitDagDup(DI); 1412 if (Op == "dup_typed") 1413 return emitDagDupTyped(DI); 1414 if (Op == "splat") 1415 return emitDagSplat(DI); 1416 if (Op == "save_temp") 1417 return emitDagSaveTemp(DI); 1418 if (Op == "op") 1419 return emitDagOp(DI); 1420 if (Op == "call" || Op == "call_mangled") 1421 return emitDagCall(DI, Op == "call_mangled"); 1422 if (Op == "name_replace") 1423 return emitDagNameReplace(DI); 1424 if (Op == "literal") 1425 return emitDagLiteral(DI); 1426 assert_with_loc(false, "Unknown operation!"); 1427 return std::make_pair(Type::getVoid(), ""); 1428 } 1429 1430 std::pair<Type, std::string> Intrinsic::DagEmitter::emitDagOp(DagInit *DI) { 1431 std::string Op = cast<StringInit>(DI->getArg(0))->getAsUnquotedString(); 1432 if (DI->getNumArgs() == 2) { 1433 // Unary op. 1434 std::pair<Type, std::string> R = 1435 emitDagArg(DI->getArg(1), std::string(DI->getArgNameStr(1))); 1436 return std::make_pair(R.first, Op + R.second); 1437 } else { 1438 assert(DI->getNumArgs() == 3 && "Can only handle unary and binary ops!"); 1439 std::pair<Type, std::string> R1 = 1440 emitDagArg(DI->getArg(1), std::string(DI->getArgNameStr(1))); 1441 std::pair<Type, std::string> R2 = 1442 emitDagArg(DI->getArg(2), std::string(DI->getArgNameStr(2))); 1443 assert_with_loc(R1.first == R2.first, "Argument type mismatch!"); 1444 return std::make_pair(R1.first, R1.second + " " + Op + " " + R2.second); 1445 } 1446 } 1447 1448 std::pair<Type, std::string> 1449 Intrinsic::DagEmitter::emitDagCall(DagInit *DI, bool MatchMangledName) { 1450 std::vector<Type> Types; 1451 std::vector<std::string> Values; 1452 for (unsigned I = 0; I < DI->getNumArgs() - 1; ++I) { 1453 std::pair<Type, std::string> R = 1454 emitDagArg(DI->getArg(I + 1), std::string(DI->getArgNameStr(I + 1))); 1455 Types.push_back(R.first); 1456 Values.push_back(R.second); 1457 } 1458 1459 // Look up the called intrinsic. 1460 std::string N; 1461 if (StringInit *SI = dyn_cast<StringInit>(DI->getArg(0))) 1462 N = SI->getAsUnquotedString(); 1463 else 1464 N = emitDagArg(DI->getArg(0), "").second; 1465 Optional<std::string> MangledName; 1466 if (MatchMangledName) { 1467 if (Intr.getRecord()->getValueAsBit("isLaneQ")) 1468 N += "q"; 1469 MangledName = Intr.mangleName(N, ClassS); 1470 } 1471 Intrinsic &Callee = Intr.Emitter.getIntrinsic(N, Types, MangledName); 1472 1473 // Make sure the callee is known as an early def. 1474 Callee.setNeededEarly(); 1475 Intr.Dependencies.insert(&Callee); 1476 1477 // Now create the call itself. 1478 std::string S = ""; 1479 if (!Callee.isBigEndianSafe()) 1480 S += CallPrefix.str(); 1481 S += Callee.getMangledName(true) + "("; 1482 for (unsigned I = 0; I < DI->getNumArgs() - 1; ++I) { 1483 if (I != 0) 1484 S += ", "; 1485 S += Values[I]; 1486 } 1487 S += ")"; 1488 1489 return std::make_pair(Callee.getReturnType(), S); 1490 } 1491 1492 std::pair<Type, std::string> Intrinsic::DagEmitter::emitDagCast(DagInit *DI, 1493 bool IsBitCast){ 1494 // (cast MOD* VAL) -> cast VAL to type given by MOD. 1495 std::pair<Type, std::string> R = 1496 emitDagArg(DI->getArg(DI->getNumArgs() - 1), 1497 std::string(DI->getArgNameStr(DI->getNumArgs() - 1))); 1498 Type castToType = R.first; 1499 for (unsigned ArgIdx = 0; ArgIdx < DI->getNumArgs() - 1; ++ArgIdx) { 1500 1501 // MOD can take several forms: 1502 // 1. $X - take the type of parameter / variable X. 1503 // 2. The value "R" - take the type of the return type. 1504 // 3. a type string 1505 // 4. The value "U" or "S" to switch the signedness. 1506 // 5. The value "H" or "D" to half or double the bitwidth. 1507 // 6. The value "8" to convert to 8-bit (signed) integer lanes. 1508 if (!DI->getArgNameStr(ArgIdx).empty()) { 1509 assert_with_loc(Intr.Variables.find(std::string( 1510 DI->getArgNameStr(ArgIdx))) != Intr.Variables.end(), 1511 "Variable not found"); 1512 castToType = 1513 Intr.Variables[std::string(DI->getArgNameStr(ArgIdx))].getType(); 1514 } else { 1515 StringInit *SI = dyn_cast<StringInit>(DI->getArg(ArgIdx)); 1516 assert_with_loc(SI, "Expected string type or $Name for cast type"); 1517 1518 if (SI->getAsUnquotedString() == "R") { 1519 castToType = Intr.getReturnType(); 1520 } else if (SI->getAsUnquotedString() == "U") { 1521 castToType.makeUnsigned(); 1522 } else if (SI->getAsUnquotedString() == "S") { 1523 castToType.makeSigned(); 1524 } else if (SI->getAsUnquotedString() == "H") { 1525 castToType.halveLanes(); 1526 } else if (SI->getAsUnquotedString() == "D") { 1527 castToType.doubleLanes(); 1528 } else if (SI->getAsUnquotedString() == "8") { 1529 castToType.makeInteger(8, true); 1530 } else if (SI->getAsUnquotedString() == "32") { 1531 castToType.make32BitElement(); 1532 } else { 1533 castToType = Type::fromTypedefName(SI->getAsUnquotedString()); 1534 assert_with_loc(!castToType.isVoid(), "Unknown typedef"); 1535 } 1536 } 1537 } 1538 1539 std::string S; 1540 if (IsBitCast) { 1541 // Emit a reinterpret cast. The second operand must be an lvalue, so create 1542 // a temporary. 1543 std::string N = "reint"; 1544 unsigned I = 0; 1545 while (Intr.Variables.find(N) != Intr.Variables.end()) 1546 N = "reint" + utostr(++I); 1547 Intr.Variables[N] = Variable(R.first, N + Intr.VariablePostfix); 1548 1549 Intr.OS << R.first.str() << " " << Intr.Variables[N].getName() << " = " 1550 << R.second << ";"; 1551 Intr.emitNewLine(); 1552 1553 S = "*(" + castToType.str() + " *) &" + Intr.Variables[N].getName() + ""; 1554 } else { 1555 // Emit a normal (static) cast. 1556 S = "(" + castToType.str() + ")(" + R.second + ")"; 1557 } 1558 1559 return std::make_pair(castToType, S); 1560 } 1561 1562 std::pair<Type, std::string> Intrinsic::DagEmitter::emitDagShuffle(DagInit *DI){ 1563 // See the documentation in arm_neon.td for a description of these operators. 1564 class LowHalf : public SetTheory::Operator { 1565 public: 1566 void apply(SetTheory &ST, DagInit *Expr, SetTheory::RecSet &Elts, 1567 ArrayRef<SMLoc> Loc) override { 1568 SetTheory::RecSet Elts2; 1569 ST.evaluate(Expr->arg_begin(), Expr->arg_end(), Elts2, Loc); 1570 Elts.insert(Elts2.begin(), Elts2.begin() + (Elts2.size() / 2)); 1571 } 1572 }; 1573 1574 class HighHalf : public SetTheory::Operator { 1575 public: 1576 void apply(SetTheory &ST, DagInit *Expr, SetTheory::RecSet &Elts, 1577 ArrayRef<SMLoc> Loc) override { 1578 SetTheory::RecSet Elts2; 1579 ST.evaluate(Expr->arg_begin(), Expr->arg_end(), Elts2, Loc); 1580 Elts.insert(Elts2.begin() + (Elts2.size() / 2), Elts2.end()); 1581 } 1582 }; 1583 1584 class Rev : public SetTheory::Operator { 1585 unsigned ElementSize; 1586 1587 public: 1588 Rev(unsigned ElementSize) : ElementSize(ElementSize) {} 1589 1590 void apply(SetTheory &ST, DagInit *Expr, SetTheory::RecSet &Elts, 1591 ArrayRef<SMLoc> Loc) override { 1592 SetTheory::RecSet Elts2; 1593 ST.evaluate(Expr->arg_begin() + 1, Expr->arg_end(), Elts2, Loc); 1594 1595 int64_t VectorSize = cast<IntInit>(Expr->getArg(0))->getValue(); 1596 VectorSize /= ElementSize; 1597 1598 std::vector<Record *> Revved; 1599 for (unsigned VI = 0; VI < Elts2.size(); VI += VectorSize) { 1600 for (int LI = VectorSize - 1; LI >= 0; --LI) { 1601 Revved.push_back(Elts2[VI + LI]); 1602 } 1603 } 1604 1605 Elts.insert(Revved.begin(), Revved.end()); 1606 } 1607 }; 1608 1609 class MaskExpander : public SetTheory::Expander { 1610 unsigned N; 1611 1612 public: 1613 MaskExpander(unsigned N) : N(N) {} 1614 1615 void expand(SetTheory &ST, Record *R, SetTheory::RecSet &Elts) override { 1616 unsigned Addend = 0; 1617 if (R->getName() == "mask0") 1618 Addend = 0; 1619 else if (R->getName() == "mask1") 1620 Addend = N; 1621 else 1622 return; 1623 for (unsigned I = 0; I < N; ++I) 1624 Elts.insert(R->getRecords().getDef("sv" + utostr(I + Addend))); 1625 } 1626 }; 1627 1628 // (shuffle arg1, arg2, sequence) 1629 std::pair<Type, std::string> Arg1 = 1630 emitDagArg(DI->getArg(0), std::string(DI->getArgNameStr(0))); 1631 std::pair<Type, std::string> Arg2 = 1632 emitDagArg(DI->getArg(1), std::string(DI->getArgNameStr(1))); 1633 assert_with_loc(Arg1.first == Arg2.first, 1634 "Different types in arguments to shuffle!"); 1635 1636 SetTheory ST; 1637 SetTheory::RecSet Elts; 1638 ST.addOperator("lowhalf", std::make_unique<LowHalf>()); 1639 ST.addOperator("highhalf", std::make_unique<HighHalf>()); 1640 ST.addOperator("rev", 1641 std::make_unique<Rev>(Arg1.first.getElementSizeInBits())); 1642 ST.addExpander("MaskExpand", 1643 std::make_unique<MaskExpander>(Arg1.first.getNumElements())); 1644 ST.evaluate(DI->getArg(2), Elts, None); 1645 1646 std::string S = "__builtin_shufflevector(" + Arg1.second + ", " + Arg2.second; 1647 for (auto &E : Elts) { 1648 StringRef Name = E->getName(); 1649 assert_with_loc(Name.startswith("sv"), 1650 "Incorrect element kind in shuffle mask!"); 1651 S += ", " + Name.drop_front(2).str(); 1652 } 1653 S += ")"; 1654 1655 // Recalculate the return type - the shuffle may have halved or doubled it. 1656 Type T(Arg1.first); 1657 if (Elts.size() > T.getNumElements()) { 1658 assert_with_loc( 1659 Elts.size() == T.getNumElements() * 2, 1660 "Can only double or half the number of elements in a shuffle!"); 1661 T.doubleLanes(); 1662 } else if (Elts.size() < T.getNumElements()) { 1663 assert_with_loc( 1664 Elts.size() == T.getNumElements() / 2, 1665 "Can only double or half the number of elements in a shuffle!"); 1666 T.halveLanes(); 1667 } 1668 1669 return std::make_pair(T, S); 1670 } 1671 1672 std::pair<Type, std::string> Intrinsic::DagEmitter::emitDagDup(DagInit *DI) { 1673 assert_with_loc(DI->getNumArgs() == 1, "dup() expects one argument"); 1674 std::pair<Type, std::string> A = 1675 emitDagArg(DI->getArg(0), std::string(DI->getArgNameStr(0))); 1676 assert_with_loc(A.first.isScalar(), "dup() expects a scalar argument"); 1677 1678 Type T = Intr.getBaseType(); 1679 assert_with_loc(T.isVector(), "dup() used but default type is scalar!"); 1680 std::string S = "(" + T.str() + ") {"; 1681 for (unsigned I = 0; I < T.getNumElements(); ++I) { 1682 if (I != 0) 1683 S += ", "; 1684 S += A.second; 1685 } 1686 S += "}"; 1687 1688 return std::make_pair(T, S); 1689 } 1690 1691 std::pair<Type, std::string> Intrinsic::DagEmitter::emitDagDupTyped(DagInit *DI) { 1692 assert_with_loc(DI->getNumArgs() == 2, "dup_typed() expects two arguments"); 1693 std::pair<Type, std::string> B = 1694 emitDagArg(DI->getArg(1), std::string(DI->getArgNameStr(1))); 1695 assert_with_loc(B.first.isScalar(), 1696 "dup_typed() requires a scalar as the second argument"); 1697 Type T; 1698 // If the type argument is a constant string, construct the type directly. 1699 if (StringInit *SI = dyn_cast<StringInit>(DI->getArg(0))) { 1700 T = Type::fromTypedefName(SI->getAsUnquotedString()); 1701 assert_with_loc(!T.isVoid(), "Unknown typedef"); 1702 } else 1703 T = emitDagArg(DI->getArg(0), std::string(DI->getArgNameStr(0))).first; 1704 1705 assert_with_loc(T.isVector(), "dup_typed() used but target type is scalar!"); 1706 std::string S = "(" + T.str() + ") {"; 1707 for (unsigned I = 0; I < T.getNumElements(); ++I) { 1708 if (I != 0) 1709 S += ", "; 1710 S += B.second; 1711 } 1712 S += "}"; 1713 1714 return std::make_pair(T, S); 1715 } 1716 1717 std::pair<Type, std::string> Intrinsic::DagEmitter::emitDagSplat(DagInit *DI) { 1718 assert_with_loc(DI->getNumArgs() == 2, "splat() expects two arguments"); 1719 std::pair<Type, std::string> A = 1720 emitDagArg(DI->getArg(0), std::string(DI->getArgNameStr(0))); 1721 std::pair<Type, std::string> B = 1722 emitDagArg(DI->getArg(1), std::string(DI->getArgNameStr(1))); 1723 1724 assert_with_loc(B.first.isScalar(), 1725 "splat() requires a scalar int as the second argument"); 1726 1727 std::string S = "__builtin_shufflevector(" + A.second + ", " + A.second; 1728 for (unsigned I = 0; I < Intr.getBaseType().getNumElements(); ++I) { 1729 S += ", " + B.second; 1730 } 1731 S += ")"; 1732 1733 return std::make_pair(Intr.getBaseType(), S); 1734 } 1735 1736 std::pair<Type, std::string> Intrinsic::DagEmitter::emitDagSaveTemp(DagInit *DI) { 1737 assert_with_loc(DI->getNumArgs() == 2, "save_temp() expects two arguments"); 1738 std::pair<Type, std::string> A = 1739 emitDagArg(DI->getArg(1), std::string(DI->getArgNameStr(1))); 1740 1741 assert_with_loc(!A.first.isVoid(), 1742 "Argument to save_temp() must have non-void type!"); 1743 1744 std::string N = std::string(DI->getArgNameStr(0)); 1745 assert_with_loc(!N.empty(), 1746 "save_temp() expects a name as the first argument"); 1747 1748 assert_with_loc(Intr.Variables.find(N) == Intr.Variables.end(), 1749 "Variable already defined!"); 1750 Intr.Variables[N] = Variable(A.first, N + Intr.VariablePostfix); 1751 1752 std::string S = 1753 A.first.str() + " " + Intr.Variables[N].getName() + " = " + A.second; 1754 1755 return std::make_pair(Type::getVoid(), S); 1756 } 1757 1758 std::pair<Type, std::string> 1759 Intrinsic::DagEmitter::emitDagNameReplace(DagInit *DI) { 1760 std::string S = Intr.Name; 1761 1762 assert_with_loc(DI->getNumArgs() == 2, "name_replace requires 2 arguments!"); 1763 std::string ToReplace = cast<StringInit>(DI->getArg(0))->getAsUnquotedString(); 1764 std::string ReplaceWith = cast<StringInit>(DI->getArg(1))->getAsUnquotedString(); 1765 1766 size_t Idx = S.find(ToReplace); 1767 1768 assert_with_loc(Idx != std::string::npos, "name should contain '" + ToReplace + "'!"); 1769 S.replace(Idx, ToReplace.size(), ReplaceWith); 1770 1771 return std::make_pair(Type::getVoid(), S); 1772 } 1773 1774 std::pair<Type, std::string> Intrinsic::DagEmitter::emitDagLiteral(DagInit *DI){ 1775 std::string Ty = cast<StringInit>(DI->getArg(0))->getAsUnquotedString(); 1776 std::string Value = cast<StringInit>(DI->getArg(1))->getAsUnquotedString(); 1777 return std::make_pair(Type::fromTypedefName(Ty), Value); 1778 } 1779 1780 std::pair<Type, std::string> 1781 Intrinsic::DagEmitter::emitDagArg(Init *Arg, std::string ArgName) { 1782 if (!ArgName.empty()) { 1783 assert_with_loc(!Arg->isComplete(), 1784 "Arguments must either be DAGs or names, not both!"); 1785 assert_with_loc(Intr.Variables.find(ArgName) != Intr.Variables.end(), 1786 "Variable not defined!"); 1787 Variable &V = Intr.Variables[ArgName]; 1788 return std::make_pair(V.getType(), V.getName()); 1789 } 1790 1791 assert(Arg && "Neither ArgName nor Arg?!"); 1792 DagInit *DI = dyn_cast<DagInit>(Arg); 1793 assert_with_loc(DI, "Arguments must either be DAGs or names!"); 1794 1795 return emitDag(DI); 1796 } 1797 1798 std::string Intrinsic::generate() { 1799 // Avoid duplicated code for big and little endian 1800 if (isBigEndianSafe()) { 1801 generateImpl(false, "", ""); 1802 return OS.str(); 1803 } 1804 // Little endian intrinsics are simple and don't require any argument 1805 // swapping. 1806 OS << "#ifdef __LITTLE_ENDIAN__\n"; 1807 1808 generateImpl(false, "", ""); 1809 1810 OS << "#else\n"; 1811 1812 // Big endian intrinsics are more complex. The user intended these 1813 // intrinsics to operate on a vector "as-if" loaded by (V)LDR, 1814 // but we load as-if (V)LD1. So we should swap all arguments and 1815 // swap the return value too. 1816 // 1817 // If we call sub-intrinsics, we should call a version that does 1818 // not re-swap the arguments! 1819 generateImpl(true, "", "__noswap_"); 1820 1821 // If we're needed early, create a non-swapping variant for 1822 // big-endian. 1823 if (NeededEarly) { 1824 generateImpl(false, "__noswap_", "__noswap_"); 1825 } 1826 OS << "#endif\n\n"; 1827 1828 return OS.str(); 1829 } 1830 1831 void Intrinsic::generateImpl(bool ReverseArguments, 1832 StringRef NamePrefix, StringRef CallPrefix) { 1833 CurrentRecord = R; 1834 1835 // If we call a macro, our local variables may be corrupted due to 1836 // lack of proper lexical scoping. So, add a globally unique postfix 1837 // to every variable. 1838 // 1839 // indexBody() should have set up the Dependencies set by now. 1840 for (auto *I : Dependencies) 1841 if (I->UseMacro) { 1842 VariablePostfix = "_" + utostr(Emitter.getUniqueNumber()); 1843 break; 1844 } 1845 1846 initVariables(); 1847 1848 emitPrototype(NamePrefix); 1849 1850 if (IsUnavailable) { 1851 OS << " __attribute__((unavailable));"; 1852 } else { 1853 emitOpeningBrace(); 1854 emitShadowedArgs(); 1855 if (ReverseArguments) 1856 emitArgumentReversal(); 1857 emitBody(CallPrefix); 1858 if (ReverseArguments) 1859 emitReturnReversal(); 1860 emitReturn(); 1861 emitClosingBrace(); 1862 } 1863 OS << "\n"; 1864 1865 CurrentRecord = nullptr; 1866 } 1867 1868 void Intrinsic::indexBody() { 1869 CurrentRecord = R; 1870 1871 initVariables(); 1872 emitBody(""); 1873 OS.str(""); 1874 1875 CurrentRecord = nullptr; 1876 } 1877 1878 //===----------------------------------------------------------------------===// 1879 // NeonEmitter implementation 1880 //===----------------------------------------------------------------------===// 1881 1882 Intrinsic &NeonEmitter::getIntrinsic(StringRef Name, ArrayRef<Type> Types, 1883 Optional<std::string> MangledName) { 1884 // First, look up the name in the intrinsic map. 1885 assert_with_loc(IntrinsicMap.find(Name.str()) != IntrinsicMap.end(), 1886 ("Intrinsic '" + Name + "' not found!").str()); 1887 auto &V = IntrinsicMap.find(Name.str())->second; 1888 std::vector<Intrinsic *> GoodVec; 1889 1890 // Create a string to print if we end up failing. 1891 std::string ErrMsg = "looking up intrinsic '" + Name.str() + "("; 1892 for (unsigned I = 0; I < Types.size(); ++I) { 1893 if (I != 0) 1894 ErrMsg += ", "; 1895 ErrMsg += Types[I].str(); 1896 } 1897 ErrMsg += ")'\n"; 1898 ErrMsg += "Available overloads:\n"; 1899 1900 // Now, look through each intrinsic implementation and see if the types are 1901 // compatible. 1902 for (auto &I : V) { 1903 ErrMsg += " - " + I.getReturnType().str() + " " + I.getMangledName(); 1904 ErrMsg += "("; 1905 for (unsigned A = 0; A < I.getNumParams(); ++A) { 1906 if (A != 0) 1907 ErrMsg += ", "; 1908 ErrMsg += I.getParamType(A).str(); 1909 } 1910 ErrMsg += ")\n"; 1911 1912 if (MangledName && MangledName != I.getMangledName(true)) 1913 continue; 1914 1915 if (I.getNumParams() != Types.size()) 1916 continue; 1917 1918 unsigned ArgNum = 0; 1919 bool MatchingArgumentTypes = 1920 std::all_of(Types.begin(), Types.end(), [&](const auto &Type) { 1921 return Type == I.getParamType(ArgNum++); 1922 }); 1923 1924 if (MatchingArgumentTypes) 1925 GoodVec.push_back(&I); 1926 } 1927 1928 assert_with_loc(!GoodVec.empty(), 1929 "No compatible intrinsic found - " + ErrMsg); 1930 assert_with_loc(GoodVec.size() == 1, "Multiple overloads found - " + ErrMsg); 1931 1932 return *GoodVec.front(); 1933 } 1934 1935 void NeonEmitter::createIntrinsic(Record *R, 1936 SmallVectorImpl<Intrinsic *> &Out) { 1937 std::string Name = std::string(R->getValueAsString("Name")); 1938 std::string Proto = std::string(R->getValueAsString("Prototype")); 1939 std::string Types = std::string(R->getValueAsString("Types")); 1940 Record *OperationRec = R->getValueAsDef("Operation"); 1941 bool BigEndianSafe = R->getValueAsBit("BigEndianSafe"); 1942 std::string Guard = std::string(R->getValueAsString("ArchGuard")); 1943 bool IsUnavailable = OperationRec->getValueAsBit("Unavailable"); 1944 std::string CartesianProductWith = std::string(R->getValueAsString("CartesianProductWith")); 1945 1946 // Set the global current record. This allows assert_with_loc to produce 1947 // decent location information even when highly nested. 1948 CurrentRecord = R; 1949 1950 ListInit *Body = OperationRec->getValueAsListInit("Ops"); 1951 1952 std::vector<TypeSpec> TypeSpecs = TypeSpec::fromTypeSpecs(Types); 1953 1954 ClassKind CK = ClassNone; 1955 if (R->getSuperClasses().size() >= 2) 1956 CK = ClassMap[R->getSuperClasses()[1].first]; 1957 1958 std::vector<std::pair<TypeSpec, TypeSpec>> NewTypeSpecs; 1959 if (!CartesianProductWith.empty()) { 1960 std::vector<TypeSpec> ProductTypeSpecs = TypeSpec::fromTypeSpecs(CartesianProductWith); 1961 for (auto TS : TypeSpecs) { 1962 Type DefaultT(TS, "."); 1963 for (auto SrcTS : ProductTypeSpecs) { 1964 Type DefaultSrcT(SrcTS, "."); 1965 if (TS == SrcTS || 1966 DefaultSrcT.getSizeInBits() != DefaultT.getSizeInBits()) 1967 continue; 1968 NewTypeSpecs.push_back(std::make_pair(TS, SrcTS)); 1969 } 1970 } 1971 } else { 1972 for (auto TS : TypeSpecs) { 1973 NewTypeSpecs.push_back(std::make_pair(TS, TS)); 1974 } 1975 } 1976 1977 llvm::sort(NewTypeSpecs); 1978 NewTypeSpecs.erase(std::unique(NewTypeSpecs.begin(), NewTypeSpecs.end()), 1979 NewTypeSpecs.end()); 1980 auto &Entry = IntrinsicMap[Name]; 1981 1982 for (auto &I : NewTypeSpecs) { 1983 Entry.emplace_back(R, Name, Proto, I.first, I.second, CK, Body, *this, 1984 Guard, IsUnavailable, BigEndianSafe); 1985 Out.push_back(&Entry.back()); 1986 } 1987 1988 CurrentRecord = nullptr; 1989 } 1990 1991 /// genBuiltinsDef: Generate the BuiltinsARM.def and BuiltinsAArch64.def 1992 /// declaration of builtins, checking for unique builtin declarations. 1993 void NeonEmitter::genBuiltinsDef(raw_ostream &OS, 1994 SmallVectorImpl<Intrinsic *> &Defs) { 1995 OS << "#ifdef GET_NEON_BUILTINS\n"; 1996 1997 // We only want to emit a builtin once, and we want to emit them in 1998 // alphabetical order, so use a std::set. 1999 std::set<std::string> Builtins; 2000 2001 for (auto *Def : Defs) { 2002 if (Def->hasBody()) 2003 continue; 2004 2005 std::string S = "BUILTIN(__builtin_neon_" + Def->getMangledName() + ", \""; 2006 2007 S += Def->getBuiltinTypeStr(); 2008 S += "\", \"n\")"; 2009 2010 Builtins.insert(S); 2011 } 2012 2013 for (auto &S : Builtins) 2014 OS << S << "\n"; 2015 OS << "#endif\n\n"; 2016 } 2017 2018 /// Generate the ARM and AArch64 overloaded type checking code for 2019 /// SemaChecking.cpp, checking for unique builtin declarations. 2020 void NeonEmitter::genOverloadTypeCheckCode(raw_ostream &OS, 2021 SmallVectorImpl<Intrinsic *> &Defs) { 2022 OS << "#ifdef GET_NEON_OVERLOAD_CHECK\n"; 2023 2024 // We record each overload check line before emitting because subsequent Inst 2025 // definitions may extend the number of permitted types (i.e. augment the 2026 // Mask). Use std::map to avoid sorting the table by hash number. 2027 struct OverloadInfo { 2028 uint64_t Mask; 2029 int PtrArgNum; 2030 bool HasConstPtr; 2031 OverloadInfo() : Mask(0ULL), PtrArgNum(0), HasConstPtr(false) {} 2032 }; 2033 std::map<std::string, OverloadInfo> OverloadMap; 2034 2035 for (auto *Def : Defs) { 2036 // If the def has a body (that is, it has Operation DAGs), it won't call 2037 // __builtin_neon_* so we don't need to generate a definition for it. 2038 if (Def->hasBody()) 2039 continue; 2040 // Functions which have a scalar argument cannot be overloaded, no need to 2041 // check them if we are emitting the type checking code. 2042 if (Def->protoHasScalar()) 2043 continue; 2044 2045 uint64_t Mask = 0ULL; 2046 Mask |= 1ULL << Def->getPolymorphicKeyType().getNeonEnum(); 2047 2048 // Check if the function has a pointer or const pointer argument. 2049 int PtrArgNum = -1; 2050 bool HasConstPtr = false; 2051 for (unsigned I = 0; I < Def->getNumParams(); ++I) { 2052 const auto &Type = Def->getParamType(I); 2053 if (Type.isPointer()) { 2054 PtrArgNum = I; 2055 HasConstPtr = Type.isConstPointer(); 2056 } 2057 } 2058 2059 // For sret builtins, adjust the pointer argument index. 2060 if (PtrArgNum >= 0 && Def->getReturnType().getNumVectors() > 1) 2061 PtrArgNum += 1; 2062 2063 std::string Name = Def->getName(); 2064 // Omit type checking for the pointer arguments of vld1_lane, vld1_dup, 2065 // and vst1_lane intrinsics. Using a pointer to the vector element 2066 // type with one of those operations causes codegen to select an aligned 2067 // load/store instruction. If you want an unaligned operation, 2068 // the pointer argument needs to have less alignment than element type, 2069 // so just accept any pointer type. 2070 if (Name == "vld1_lane" || Name == "vld1_dup" || Name == "vst1_lane") { 2071 PtrArgNum = -1; 2072 HasConstPtr = false; 2073 } 2074 2075 if (Mask) { 2076 std::string Name = Def->getMangledName(); 2077 OverloadMap.insert(std::make_pair(Name, OverloadInfo())); 2078 OverloadInfo &OI = OverloadMap[Name]; 2079 OI.Mask |= Mask; 2080 OI.PtrArgNum |= PtrArgNum; 2081 OI.HasConstPtr = HasConstPtr; 2082 } 2083 } 2084 2085 for (auto &I : OverloadMap) { 2086 OverloadInfo &OI = I.second; 2087 2088 OS << "case NEON::BI__builtin_neon_" << I.first << ": "; 2089 OS << "mask = 0x" << Twine::utohexstr(OI.Mask) << "ULL"; 2090 if (OI.PtrArgNum >= 0) 2091 OS << "; PtrArgNum = " << OI.PtrArgNum; 2092 if (OI.HasConstPtr) 2093 OS << "; HasConstPtr = true"; 2094 OS << "; break;\n"; 2095 } 2096 OS << "#endif\n\n"; 2097 } 2098 2099 void NeonEmitter::genIntrinsicRangeCheckCode(raw_ostream &OS, 2100 SmallVectorImpl<Intrinsic *> &Defs) { 2101 OS << "#ifdef GET_NEON_IMMEDIATE_CHECK\n"; 2102 2103 std::set<std::string> Emitted; 2104 2105 for (auto *Def : Defs) { 2106 if (Def->hasBody()) 2107 continue; 2108 // Functions which do not have an immediate do not need to have range 2109 // checking code emitted. 2110 if (!Def->hasImmediate()) 2111 continue; 2112 if (Emitted.find(Def->getMangledName()) != Emitted.end()) 2113 continue; 2114 2115 std::string LowerBound, UpperBound; 2116 2117 Record *R = Def->getRecord(); 2118 if (R->getValueAsBit("isVXAR")) { 2119 //VXAR takes an immediate in the range [0, 63] 2120 LowerBound = "0"; 2121 UpperBound = "63"; 2122 } else if (R->getValueAsBit("isVCVT_N")) { 2123 // VCVT between floating- and fixed-point values takes an immediate 2124 // in the range [1, 32) for f32 or [1, 64) for f64 or [1, 16) for f16. 2125 LowerBound = "1"; 2126 if (Def->getBaseType().getElementSizeInBits() == 16 || 2127 Def->getName().find('h') != std::string::npos) 2128 // VCVTh operating on FP16 intrinsics in range [1, 16) 2129 UpperBound = "15"; 2130 else if (Def->getBaseType().getElementSizeInBits() == 32) 2131 UpperBound = "31"; 2132 else 2133 UpperBound = "63"; 2134 } else if (R->getValueAsBit("isScalarShift")) { 2135 // Right shifts have an 'r' in the name, left shifts do not. Convert 2136 // instructions have the same bounds and right shifts. 2137 if (Def->getName().find('r') != std::string::npos || 2138 Def->getName().find("cvt") != std::string::npos) 2139 LowerBound = "1"; 2140 2141 UpperBound = utostr(Def->getReturnType().getElementSizeInBits() - 1); 2142 } else if (R->getValueAsBit("isShift")) { 2143 // Builtins which are overloaded by type will need to have their upper 2144 // bound computed at Sema time based on the type constant. 2145 2146 // Right shifts have an 'r' in the name, left shifts do not. 2147 if (Def->getName().find('r') != std::string::npos) 2148 LowerBound = "1"; 2149 UpperBound = "RFT(TV, true)"; 2150 } else if (Def->getClassKind(true) == ClassB) { 2151 // ClassB intrinsics have a type (and hence lane number) that is only 2152 // known at runtime. 2153 if (R->getValueAsBit("isLaneQ")) 2154 UpperBound = "RFT(TV, false, true)"; 2155 else 2156 UpperBound = "RFT(TV, false, false)"; 2157 } else { 2158 // The immediate generally refers to a lane in the preceding argument. 2159 assert(Def->getImmediateIdx() > 0); 2160 Type T = Def->getParamType(Def->getImmediateIdx() - 1); 2161 UpperBound = utostr(T.getNumElements() - 1); 2162 } 2163 2164 // Calculate the index of the immediate that should be range checked. 2165 unsigned Idx = Def->getNumParams(); 2166 if (Def->hasImmediate()) 2167 Idx = Def->getGeneratedParamIdx(Def->getImmediateIdx()); 2168 2169 OS << "case NEON::BI__builtin_neon_" << Def->getMangledName() << ": " 2170 << "i = " << Idx << ";"; 2171 if (!LowerBound.empty()) 2172 OS << " l = " << LowerBound << ";"; 2173 if (!UpperBound.empty()) 2174 OS << " u = " << UpperBound << ";"; 2175 OS << " break;\n"; 2176 2177 Emitted.insert(Def->getMangledName()); 2178 } 2179 2180 OS << "#endif\n\n"; 2181 } 2182 2183 /// runHeader - Emit a file with sections defining: 2184 /// 1. the NEON section of BuiltinsARM.def and BuiltinsAArch64.def. 2185 /// 2. the SemaChecking code for the type overload checking. 2186 /// 3. the SemaChecking code for validation of intrinsic immediate arguments. 2187 void NeonEmitter::runHeader(raw_ostream &OS) { 2188 std::vector<Record *> RV = Records.getAllDerivedDefinitions("Inst"); 2189 2190 SmallVector<Intrinsic *, 128> Defs; 2191 for (auto *R : RV) 2192 createIntrinsic(R, Defs); 2193 2194 // Generate shared BuiltinsXXX.def 2195 genBuiltinsDef(OS, Defs); 2196 2197 // Generate ARM overloaded type checking code for SemaChecking.cpp 2198 genOverloadTypeCheckCode(OS, Defs); 2199 2200 // Generate ARM range checking code for shift/lane immediates. 2201 genIntrinsicRangeCheckCode(OS, Defs); 2202 } 2203 2204 static void emitNeonTypeDefs(const std::string& types, raw_ostream &OS) { 2205 std::string TypedefTypes(types); 2206 std::vector<TypeSpec> TDTypeVec = TypeSpec::fromTypeSpecs(TypedefTypes); 2207 2208 // Emit vector typedefs. 2209 bool InIfdef = false; 2210 for (auto &TS : TDTypeVec) { 2211 bool IsA64 = false; 2212 Type T(TS, "."); 2213 if (T.isDouble()) 2214 IsA64 = true; 2215 2216 if (InIfdef && !IsA64) { 2217 OS << "#endif\n"; 2218 InIfdef = false; 2219 } 2220 if (!InIfdef && IsA64) { 2221 OS << "#ifdef __aarch64__\n"; 2222 InIfdef = true; 2223 } 2224 2225 if (T.isPoly()) 2226 OS << "typedef __attribute__((neon_polyvector_type("; 2227 else 2228 OS << "typedef __attribute__((neon_vector_type("; 2229 2230 Type T2 = T; 2231 T2.makeScalar(); 2232 OS << T.getNumElements() << "))) "; 2233 OS << T2.str(); 2234 OS << " " << T.str() << ";\n"; 2235 } 2236 if (InIfdef) 2237 OS << "#endif\n"; 2238 OS << "\n"; 2239 2240 // Emit struct typedefs. 2241 InIfdef = false; 2242 for (unsigned NumMembers = 2; NumMembers <= 4; ++NumMembers) { 2243 for (auto &TS : TDTypeVec) { 2244 bool IsA64 = false; 2245 Type T(TS, "."); 2246 if (T.isDouble()) 2247 IsA64 = true; 2248 2249 if (InIfdef && !IsA64) { 2250 OS << "#endif\n"; 2251 InIfdef = false; 2252 } 2253 if (!InIfdef && IsA64) { 2254 OS << "#ifdef __aarch64__\n"; 2255 InIfdef = true; 2256 } 2257 2258 const char Mods[] = { static_cast<char>('2' + (NumMembers - 2)), 0}; 2259 Type VT(TS, Mods); 2260 OS << "typedef struct " << VT.str() << " {\n"; 2261 OS << " " << T.str() << " val"; 2262 OS << "[" << NumMembers << "]"; 2263 OS << ";\n} "; 2264 OS << VT.str() << ";\n"; 2265 OS << "\n"; 2266 } 2267 } 2268 if (InIfdef) 2269 OS << "#endif\n"; 2270 } 2271 2272 /// run - Read the records in arm_neon.td and output arm_neon.h. arm_neon.h 2273 /// is comprised of type definitions and function declarations. 2274 void NeonEmitter::run(raw_ostream &OS) { 2275 OS << "/*===---- arm_neon.h - ARM Neon intrinsics " 2276 "------------------------------" 2277 "---===\n" 2278 " *\n" 2279 " * Permission is hereby granted, free of charge, to any person " 2280 "obtaining " 2281 "a copy\n" 2282 " * of this software and associated documentation files (the " 2283 "\"Software\")," 2284 " to deal\n" 2285 " * in the Software without restriction, including without limitation " 2286 "the " 2287 "rights\n" 2288 " * to use, copy, modify, merge, publish, distribute, sublicense, " 2289 "and/or sell\n" 2290 " * copies of the Software, and to permit persons to whom the Software " 2291 "is\n" 2292 " * furnished to do so, subject to the following conditions:\n" 2293 " *\n" 2294 " * The above copyright notice and this permission notice shall be " 2295 "included in\n" 2296 " * all copies or substantial portions of the Software.\n" 2297 " *\n" 2298 " * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, " 2299 "EXPRESS OR\n" 2300 " * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF " 2301 "MERCHANTABILITY,\n" 2302 " * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT " 2303 "SHALL THE\n" 2304 " * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR " 2305 "OTHER\n" 2306 " * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, " 2307 "ARISING FROM,\n" 2308 " * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER " 2309 "DEALINGS IN\n" 2310 " * THE SOFTWARE.\n" 2311 " *\n" 2312 " *===-----------------------------------------------------------------" 2313 "---" 2314 "---===\n" 2315 " */\n\n"; 2316 2317 OS << "#ifndef __ARM_NEON_H\n"; 2318 OS << "#define __ARM_NEON_H\n\n"; 2319 2320 OS << "#ifndef __ARM_FP\n"; 2321 OS << "#error \"NEON intrinsics not available with the soft-float ABI. " 2322 "Please use -mfloat-abi=softfp or -mfloat-abi=hard\"\n"; 2323 OS << "#else\n\n"; 2324 2325 OS << "#if !defined(__ARM_NEON)\n"; 2326 OS << "#error \"NEON support not enabled\"\n"; 2327 OS << "#else\n\n"; 2328 2329 OS << "#include <stdint.h>\n\n"; 2330 2331 OS << "#ifdef __ARM_FEATURE_BF16\n"; 2332 OS << "#include <arm_bf16.h>\n"; 2333 OS << "typedef __bf16 bfloat16_t;\n"; 2334 OS << "#endif\n\n"; 2335 2336 // Emit NEON-specific scalar typedefs. 2337 OS << "typedef float float32_t;\n"; 2338 OS << "typedef __fp16 float16_t;\n"; 2339 2340 OS << "#ifdef __aarch64__\n"; 2341 OS << "typedef double float64_t;\n"; 2342 OS << "#endif\n\n"; 2343 2344 // For now, signedness of polynomial types depends on target 2345 OS << "#ifdef __aarch64__\n"; 2346 OS << "typedef uint8_t poly8_t;\n"; 2347 OS << "typedef uint16_t poly16_t;\n"; 2348 OS << "typedef uint64_t poly64_t;\n"; 2349 OS << "typedef __uint128_t poly128_t;\n"; 2350 OS << "#else\n"; 2351 OS << "typedef int8_t poly8_t;\n"; 2352 OS << "typedef int16_t poly16_t;\n"; 2353 OS << "typedef int64_t poly64_t;\n"; 2354 OS << "#endif\n"; 2355 2356 emitNeonTypeDefs("cQcsQsiQilQlUcQUcUsQUsUiQUiUlQUlhQhfQfdQdPcQPcPsQPsPlQPl", OS); 2357 2358 OS << "#ifdef __ARM_FEATURE_BF16\n"; 2359 emitNeonTypeDefs("bQb", OS); 2360 OS << "#endif\n\n"; 2361 2362 OS << "#define __ai static __inline__ __attribute__((__always_inline__, " 2363 "__nodebug__))\n\n"; 2364 2365 SmallVector<Intrinsic *, 128> Defs; 2366 std::vector<Record *> RV = Records.getAllDerivedDefinitions("Inst"); 2367 for (auto *R : RV) 2368 createIntrinsic(R, Defs); 2369 2370 for (auto *I : Defs) 2371 I->indexBody(); 2372 2373 llvm::stable_sort(Defs, llvm::deref<std::less<>>()); 2374 2375 // Only emit a def when its requirements have been met. 2376 // FIXME: This loop could be made faster, but it's fast enough for now. 2377 bool MadeProgress = true; 2378 std::string InGuard; 2379 while (!Defs.empty() && MadeProgress) { 2380 MadeProgress = false; 2381 2382 for (SmallVector<Intrinsic *, 128>::iterator I = Defs.begin(); 2383 I != Defs.end(); /*No step*/) { 2384 bool DependenciesSatisfied = true; 2385 for (auto *II : (*I)->getDependencies()) { 2386 if (llvm::is_contained(Defs, II)) 2387 DependenciesSatisfied = false; 2388 } 2389 if (!DependenciesSatisfied) { 2390 // Try the next one. 2391 ++I; 2392 continue; 2393 } 2394 2395 // Emit #endif/#if pair if needed. 2396 if ((*I)->getGuard() != InGuard) { 2397 if (!InGuard.empty()) 2398 OS << "#endif\n"; 2399 InGuard = (*I)->getGuard(); 2400 if (!InGuard.empty()) 2401 OS << "#if " << InGuard << "\n"; 2402 } 2403 2404 // Actually generate the intrinsic code. 2405 OS << (*I)->generate(); 2406 2407 MadeProgress = true; 2408 I = Defs.erase(I); 2409 } 2410 } 2411 assert(Defs.empty() && "Some requirements were not satisfied!"); 2412 if (!InGuard.empty()) 2413 OS << "#endif\n"; 2414 2415 OS << "\n"; 2416 OS << "#undef __ai\n\n"; 2417 OS << "#endif /* if !defined(__ARM_NEON) */\n"; 2418 OS << "#endif /* ifndef __ARM_FP */\n"; 2419 OS << "#endif /* __ARM_NEON_H */\n"; 2420 } 2421 2422 /// run - Read the records in arm_fp16.td and output arm_fp16.h. arm_fp16.h 2423 /// is comprised of type definitions and function declarations. 2424 void NeonEmitter::runFP16(raw_ostream &OS) { 2425 OS << "/*===---- arm_fp16.h - ARM FP16 intrinsics " 2426 "------------------------------" 2427 "---===\n" 2428 " *\n" 2429 " * Permission is hereby granted, free of charge, to any person " 2430 "obtaining a copy\n" 2431 " * of this software and associated documentation files (the " 2432 "\"Software\"), to deal\n" 2433 " * in the Software without restriction, including without limitation " 2434 "the rights\n" 2435 " * to use, copy, modify, merge, publish, distribute, sublicense, " 2436 "and/or sell\n" 2437 " * copies of the Software, and to permit persons to whom the Software " 2438 "is\n" 2439 " * furnished to do so, subject to the following conditions:\n" 2440 " *\n" 2441 " * The above copyright notice and this permission notice shall be " 2442 "included in\n" 2443 " * all copies or substantial portions of the Software.\n" 2444 " *\n" 2445 " * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, " 2446 "EXPRESS OR\n" 2447 " * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF " 2448 "MERCHANTABILITY,\n" 2449 " * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT " 2450 "SHALL THE\n" 2451 " * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR " 2452 "OTHER\n" 2453 " * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, " 2454 "ARISING FROM,\n" 2455 " * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER " 2456 "DEALINGS IN\n" 2457 " * THE SOFTWARE.\n" 2458 " *\n" 2459 " *===-----------------------------------------------------------------" 2460 "---" 2461 "---===\n" 2462 " */\n\n"; 2463 2464 OS << "#ifndef __ARM_FP16_H\n"; 2465 OS << "#define __ARM_FP16_H\n\n"; 2466 2467 OS << "#include <stdint.h>\n\n"; 2468 2469 OS << "typedef __fp16 float16_t;\n"; 2470 2471 OS << "#define __ai static __inline__ __attribute__((__always_inline__, " 2472 "__nodebug__))\n\n"; 2473 2474 SmallVector<Intrinsic *, 128> Defs; 2475 std::vector<Record *> RV = Records.getAllDerivedDefinitions("Inst"); 2476 for (auto *R : RV) 2477 createIntrinsic(R, Defs); 2478 2479 for (auto *I : Defs) 2480 I->indexBody(); 2481 2482 llvm::stable_sort(Defs, llvm::deref<std::less<>>()); 2483 2484 // Only emit a def when its requirements have been met. 2485 // FIXME: This loop could be made faster, but it's fast enough for now. 2486 bool MadeProgress = true; 2487 std::string InGuard; 2488 while (!Defs.empty() && MadeProgress) { 2489 MadeProgress = false; 2490 2491 for (SmallVector<Intrinsic *, 128>::iterator I = Defs.begin(); 2492 I != Defs.end(); /*No step*/) { 2493 bool DependenciesSatisfied = true; 2494 for (auto *II : (*I)->getDependencies()) { 2495 if (llvm::is_contained(Defs, II)) 2496 DependenciesSatisfied = false; 2497 } 2498 if (!DependenciesSatisfied) { 2499 // Try the next one. 2500 ++I; 2501 continue; 2502 } 2503 2504 // Emit #endif/#if pair if needed. 2505 if ((*I)->getGuard() != InGuard) { 2506 if (!InGuard.empty()) 2507 OS << "#endif\n"; 2508 InGuard = (*I)->getGuard(); 2509 if (!InGuard.empty()) 2510 OS << "#if " << InGuard << "\n"; 2511 } 2512 2513 // Actually generate the intrinsic code. 2514 OS << (*I)->generate(); 2515 2516 MadeProgress = true; 2517 I = Defs.erase(I); 2518 } 2519 } 2520 assert(Defs.empty() && "Some requirements were not satisfied!"); 2521 if (!InGuard.empty()) 2522 OS << "#endif\n"; 2523 2524 OS << "\n"; 2525 OS << "#undef __ai\n\n"; 2526 OS << "#endif /* __ARM_FP16_H */\n"; 2527 } 2528 2529 void NeonEmitter::runBF16(raw_ostream &OS) { 2530 OS << "/*===---- arm_bf16.h - ARM BF16 intrinsics " 2531 "-----------------------------------===\n" 2532 " *\n" 2533 " *\n" 2534 " * Part of the LLVM Project, under the Apache License v2.0 with LLVM " 2535 "Exceptions.\n" 2536 " * See https://llvm.org/LICENSE.txt for license information.\n" 2537 " * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception\n" 2538 " *\n" 2539 " *===-----------------------------------------------------------------" 2540 "------===\n" 2541 " */\n\n"; 2542 2543 OS << "#ifndef __ARM_BF16_H\n"; 2544 OS << "#define __ARM_BF16_H\n\n"; 2545 2546 OS << "typedef __bf16 bfloat16_t;\n"; 2547 2548 OS << "#define __ai static __inline__ __attribute__((__always_inline__, " 2549 "__nodebug__))\n\n"; 2550 2551 SmallVector<Intrinsic *, 128> Defs; 2552 std::vector<Record *> RV = Records.getAllDerivedDefinitions("Inst"); 2553 for (auto *R : RV) 2554 createIntrinsic(R, Defs); 2555 2556 for (auto *I : Defs) 2557 I->indexBody(); 2558 2559 llvm::stable_sort(Defs, llvm::deref<std::less<>>()); 2560 2561 // Only emit a def when its requirements have been met. 2562 // FIXME: This loop could be made faster, but it's fast enough for now. 2563 bool MadeProgress = true; 2564 std::string InGuard; 2565 while (!Defs.empty() && MadeProgress) { 2566 MadeProgress = false; 2567 2568 for (SmallVector<Intrinsic *, 128>::iterator I = Defs.begin(); 2569 I != Defs.end(); /*No step*/) { 2570 bool DependenciesSatisfied = true; 2571 for (auto *II : (*I)->getDependencies()) { 2572 if (llvm::is_contained(Defs, II)) 2573 DependenciesSatisfied = false; 2574 } 2575 if (!DependenciesSatisfied) { 2576 // Try the next one. 2577 ++I; 2578 continue; 2579 } 2580 2581 // Emit #endif/#if pair if needed. 2582 if ((*I)->getGuard() != InGuard) { 2583 if (!InGuard.empty()) 2584 OS << "#endif\n"; 2585 InGuard = (*I)->getGuard(); 2586 if (!InGuard.empty()) 2587 OS << "#if " << InGuard << "\n"; 2588 } 2589 2590 // Actually generate the intrinsic code. 2591 OS << (*I)->generate(); 2592 2593 MadeProgress = true; 2594 I = Defs.erase(I); 2595 } 2596 } 2597 assert(Defs.empty() && "Some requirements were not satisfied!"); 2598 if (!InGuard.empty()) 2599 OS << "#endif\n"; 2600 2601 OS << "\n"; 2602 OS << "#undef __ai\n\n"; 2603 2604 OS << "#endif\n"; 2605 } 2606 2607 void clang::EmitNeon(RecordKeeper &Records, raw_ostream &OS) { 2608 NeonEmitter(Records).run(OS); 2609 } 2610 2611 void clang::EmitFP16(RecordKeeper &Records, raw_ostream &OS) { 2612 NeonEmitter(Records).runFP16(OS); 2613 } 2614 2615 void clang::EmitBF16(RecordKeeper &Records, raw_ostream &OS) { 2616 NeonEmitter(Records).runBF16(OS); 2617 } 2618 2619 void clang::EmitNeonSema(RecordKeeper &Records, raw_ostream &OS) { 2620 NeonEmitter(Records).runHeader(OS); 2621 } 2622 2623 void clang::EmitNeonTest(RecordKeeper &Records, raw_ostream &OS) { 2624 llvm_unreachable("Neon test generation no longer implemented!"); 2625 } 2626