1 //===- NeonEmitter.cpp - Generate arm_neon.h for use with clang -*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This tablegen backend is responsible for emitting arm_neon.h, which includes 10 // a declaration and definition of each function specified by the ARM NEON 11 // compiler interface. See ARM document DUI0348B. 12 // 13 // Each NEON instruction is implemented in terms of 1 or more functions which 14 // are suffixed with the element type of the input vectors. Functions may be 15 // implemented in terms of generic vector operations such as +, *, -, etc. or 16 // by calling a __builtin_-prefixed function which will be handled by clang's 17 // CodeGen library. 18 // 19 // Additional validation code can be generated by this file when runHeader() is 20 // called, rather than the normal run() entry point. 21 // 22 // See also the documentation in include/clang/Basic/arm_neon.td. 23 // 24 //===----------------------------------------------------------------------===// 25 26 #include "TableGenBackends.h" 27 #include "llvm/ADT/ArrayRef.h" 28 #include "llvm/ADT/DenseMap.h" 29 #include "llvm/ADT/None.h" 30 #include "llvm/ADT/Optional.h" 31 #include "llvm/ADT/STLExtras.h" 32 #include "llvm/ADT/SmallVector.h" 33 #include "llvm/ADT/StringExtras.h" 34 #include "llvm/ADT/StringRef.h" 35 #include "llvm/Support/Casting.h" 36 #include "llvm/Support/ErrorHandling.h" 37 #include "llvm/Support/raw_ostream.h" 38 #include "llvm/TableGen/Error.h" 39 #include "llvm/TableGen/Record.h" 40 #include "llvm/TableGen/SetTheory.h" 41 #include <algorithm> 42 #include <cassert> 43 #include <cctype> 44 #include <cstddef> 45 #include <cstdint> 46 #include <deque> 47 #include <map> 48 #include <set> 49 #include <sstream> 50 #include <string> 51 #include <utility> 52 #include <vector> 53 54 using namespace llvm; 55 56 namespace { 57 58 // While globals are generally bad, this one allows us to perform assertions 59 // liberally and somehow still trace them back to the def they indirectly 60 // came from. 61 static Record *CurrentRecord = nullptr; 62 static void assert_with_loc(bool Assertion, const std::string &Str) { 63 if (!Assertion) { 64 if (CurrentRecord) 65 PrintFatalError(CurrentRecord->getLoc(), Str); 66 else 67 PrintFatalError(Str); 68 } 69 } 70 71 enum ClassKind { 72 ClassNone, 73 ClassI, // generic integer instruction, e.g., "i8" suffix 74 ClassS, // signed/unsigned/poly, e.g., "s8", "u8" or "p8" suffix 75 ClassW, // width-specific instruction, e.g., "8" suffix 76 ClassB, // bitcast arguments with enum argument to specify type 77 ClassL, // Logical instructions which are op instructions 78 // but we need to not emit any suffix for in our 79 // tests. 80 ClassNoTest // Instructions which we do not test since they are 81 // not TRUE instructions. 82 }; 83 84 /// NeonTypeFlags - Flags to identify the types for overloaded Neon 85 /// builtins. These must be kept in sync with the flags in 86 /// include/clang/Basic/TargetBuiltins.h. 87 namespace NeonTypeFlags { 88 89 enum { EltTypeMask = 0xf, UnsignedFlag = 0x10, QuadFlag = 0x20 }; 90 91 enum EltType { 92 Int8, 93 Int16, 94 Int32, 95 Int64, 96 Poly8, 97 Poly16, 98 Poly64, 99 Poly128, 100 Float16, 101 Float32, 102 Float64, 103 BFloat16 104 }; 105 106 } // end namespace NeonTypeFlags 107 108 class NeonEmitter; 109 110 //===----------------------------------------------------------------------===// 111 // TypeSpec 112 //===----------------------------------------------------------------------===// 113 114 /// A TypeSpec is just a simple wrapper around a string, but gets its own type 115 /// for strong typing purposes. 116 /// 117 /// A TypeSpec can be used to create a type. 118 class TypeSpec : public std::string { 119 public: 120 static std::vector<TypeSpec> fromTypeSpecs(StringRef Str) { 121 std::vector<TypeSpec> Ret; 122 TypeSpec Acc; 123 for (char I : Str.str()) { 124 if (islower(I)) { 125 Acc.push_back(I); 126 Ret.push_back(TypeSpec(Acc)); 127 Acc.clear(); 128 } else { 129 Acc.push_back(I); 130 } 131 } 132 return Ret; 133 } 134 }; 135 136 //===----------------------------------------------------------------------===// 137 // Type 138 //===----------------------------------------------------------------------===// 139 140 /// A Type. Not much more to say here. 141 class Type { 142 private: 143 TypeSpec TS; 144 145 enum TypeKind { 146 Void, 147 Float, 148 SInt, 149 UInt, 150 Poly, 151 BFloat16, 152 }; 153 TypeKind Kind; 154 bool Immediate, Constant, Pointer; 155 // ScalarForMangling and NoManglingQ are really not suited to live here as 156 // they are not related to the type. But they live in the TypeSpec (not the 157 // prototype), so this is really the only place to store them. 158 bool ScalarForMangling, NoManglingQ; 159 unsigned Bitwidth, ElementBitwidth, NumVectors; 160 161 public: 162 Type() 163 : Kind(Void), Immediate(false), Constant(false), 164 Pointer(false), ScalarForMangling(false), NoManglingQ(false), 165 Bitwidth(0), ElementBitwidth(0), NumVectors(0) {} 166 167 Type(TypeSpec TS, StringRef CharMods) 168 : TS(std::move(TS)), Kind(Void), Immediate(false), 169 Constant(false), Pointer(false), ScalarForMangling(false), 170 NoManglingQ(false), Bitwidth(0), ElementBitwidth(0), NumVectors(0) { 171 applyModifiers(CharMods); 172 } 173 174 /// Returns a type representing "void". 175 static Type getVoid() { return Type(); } 176 177 bool operator==(const Type &Other) const { return str() == Other.str(); } 178 bool operator!=(const Type &Other) const { return !operator==(Other); } 179 180 // 181 // Query functions 182 // 183 bool isScalarForMangling() const { return ScalarForMangling; } 184 bool noManglingQ() const { return NoManglingQ; } 185 186 bool isPointer() const { return Pointer; } 187 bool isValue() const { return !isVoid() && !isPointer(); } 188 bool isScalar() const { return isValue() && NumVectors == 0; } 189 bool isVector() const { return isValue() && NumVectors > 0; } 190 bool isConstPointer() const { return Constant; } 191 bool isFloating() const { return Kind == Float; } 192 bool isInteger() const { return Kind == SInt || Kind == UInt; } 193 bool isPoly() const { return Kind == Poly; } 194 bool isSigned() const { return Kind == SInt; } 195 bool isImmediate() const { return Immediate; } 196 bool isFloat() const { return isFloating() && ElementBitwidth == 32; } 197 bool isDouble() const { return isFloating() && ElementBitwidth == 64; } 198 bool isHalf() const { return isFloating() && ElementBitwidth == 16; } 199 bool isChar() const { return ElementBitwidth == 8; } 200 bool isShort() const { return isInteger() && ElementBitwidth == 16; } 201 bool isInt() const { return isInteger() && ElementBitwidth == 32; } 202 bool isLong() const { return isInteger() && ElementBitwidth == 64; } 203 bool isVoid() const { return Kind == Void; } 204 bool isBFloat16() const { return Kind == BFloat16; } 205 unsigned getNumElements() const { return Bitwidth / ElementBitwidth; } 206 unsigned getSizeInBits() const { return Bitwidth; } 207 unsigned getElementSizeInBits() const { return ElementBitwidth; } 208 unsigned getNumVectors() const { return NumVectors; } 209 210 // 211 // Mutator functions 212 // 213 void makeUnsigned() { 214 assert(!isVoid() && "not a potentially signed type"); 215 Kind = UInt; 216 } 217 void makeSigned() { 218 assert(!isVoid() && "not a potentially signed type"); 219 Kind = SInt; 220 } 221 222 void makeInteger(unsigned ElemWidth, bool Sign) { 223 assert(!isVoid() && "converting void to int probably not useful"); 224 Kind = Sign ? SInt : UInt; 225 Immediate = false; 226 ElementBitwidth = ElemWidth; 227 } 228 229 void makeImmediate(unsigned ElemWidth) { 230 Kind = SInt; 231 Immediate = true; 232 ElementBitwidth = ElemWidth; 233 } 234 235 void makeScalar() { 236 Bitwidth = ElementBitwidth; 237 NumVectors = 0; 238 } 239 240 void makeOneVector() { 241 assert(isVector()); 242 NumVectors = 1; 243 } 244 245 void make32BitElement() { 246 assert_with_loc(Bitwidth > 32, "Not enough bits to make it 32!"); 247 ElementBitwidth = 32; 248 } 249 250 void doubleLanes() { 251 assert_with_loc(Bitwidth != 128, "Can't get bigger than 128!"); 252 Bitwidth = 128; 253 } 254 255 void halveLanes() { 256 assert_with_loc(Bitwidth != 64, "Can't get smaller than 64!"); 257 Bitwidth = 64; 258 } 259 260 /// Return the C string representation of a type, which is the typename 261 /// defined in stdint.h or arm_neon.h. 262 std::string str() const; 263 264 /// Return the string representation of a type, which is an encoded 265 /// string for passing to the BUILTIN() macro in Builtins.def. 266 std::string builtin_str() const; 267 268 /// Return the value in NeonTypeFlags for this type. 269 unsigned getNeonEnum() const; 270 271 /// Parse a type from a stdint.h or arm_neon.h typedef name, 272 /// for example uint32x2_t or int64_t. 273 static Type fromTypedefName(StringRef Name); 274 275 private: 276 /// Creates the type based on the typespec string in TS. 277 /// Sets "Quad" to true if the "Q" or "H" modifiers were 278 /// seen. This is needed by applyModifier as some modifiers 279 /// only take effect if the type size was changed by "Q" or "H". 280 void applyTypespec(bool &Quad); 281 /// Applies prototype modifiers to the type. 282 void applyModifiers(StringRef Mods); 283 }; 284 285 //===----------------------------------------------------------------------===// 286 // Variable 287 //===----------------------------------------------------------------------===// 288 289 /// A variable is a simple class that just has a type and a name. 290 class Variable { 291 Type T; 292 std::string N; 293 294 public: 295 Variable() : T(Type::getVoid()) {} 296 Variable(Type T, std::string N) : T(std::move(T)), N(std::move(N)) {} 297 298 Type getType() const { return T; } 299 std::string getName() const { return "__" + N; } 300 }; 301 302 //===----------------------------------------------------------------------===// 303 // Intrinsic 304 //===----------------------------------------------------------------------===// 305 306 /// The main grunt class. This represents an instantiation of an intrinsic with 307 /// a particular typespec and prototype. 308 class Intrinsic { 309 /// The Record this intrinsic was created from. 310 Record *R; 311 /// The unmangled name. 312 std::string Name; 313 /// The input and output typespecs. InTS == OutTS except when 314 /// CartesianProductWith is non-empty - this is the case for vreinterpret. 315 TypeSpec OutTS, InTS; 316 /// The base class kind. Most intrinsics use ClassS, which has full type 317 /// info for integers (s32/u32). Some use ClassI, which doesn't care about 318 /// signedness (i32), while some (ClassB) have no type at all, only a width 319 /// (32). 320 ClassKind CK; 321 /// The list of DAGs for the body. May be empty, in which case we should 322 /// emit a builtin call. 323 ListInit *Body; 324 /// The architectural #ifdef guard. 325 std::string Guard; 326 /// Set if the Unavailable bit is 1. This means we don't generate a body, 327 /// just an "unavailable" attribute on a declaration. 328 bool IsUnavailable; 329 /// Is this intrinsic safe for big-endian? or does it need its arguments 330 /// reversing? 331 bool BigEndianSafe; 332 333 /// The types of return value [0] and parameters [1..]. 334 std::vector<Type> Types; 335 /// The index of the key type passed to CGBuiltin.cpp for polymorphic calls. 336 int PolymorphicKeyType; 337 /// The local variables defined. 338 std::map<std::string, Variable> Variables; 339 /// NeededEarly - set if any other intrinsic depends on this intrinsic. 340 bool NeededEarly; 341 /// UseMacro - set if we should implement using a macro or unset for a 342 /// function. 343 bool UseMacro; 344 /// The set of intrinsics that this intrinsic uses/requires. 345 std::set<Intrinsic *> Dependencies; 346 /// The "base type", which is Type('d', OutTS). InBaseType is only 347 /// different if CartesianProductWith is non-empty (for vreinterpret). 348 Type BaseType, InBaseType; 349 /// The return variable. 350 Variable RetVar; 351 /// A postfix to apply to every variable. Defaults to "". 352 std::string VariablePostfix; 353 354 NeonEmitter &Emitter; 355 std::stringstream OS; 356 357 bool isBigEndianSafe() const { 358 if (BigEndianSafe) 359 return true; 360 361 for (const auto &T : Types){ 362 if (T.isVector() && T.getNumElements() > 1) 363 return false; 364 } 365 return true; 366 } 367 368 public: 369 Intrinsic(Record *R, StringRef Name, StringRef Proto, TypeSpec OutTS, 370 TypeSpec InTS, ClassKind CK, ListInit *Body, NeonEmitter &Emitter, 371 StringRef Guard, bool IsUnavailable, bool BigEndianSafe) 372 : R(R), Name(Name.str()), OutTS(OutTS), InTS(InTS), CK(CK), Body(Body), 373 Guard(Guard.str()), IsUnavailable(IsUnavailable), 374 BigEndianSafe(BigEndianSafe), PolymorphicKeyType(0), NeededEarly(false), 375 UseMacro(false), BaseType(OutTS, "."), InBaseType(InTS, "."), 376 Emitter(Emitter) { 377 // Modify the TypeSpec per-argument to get a concrete Type, and create 378 // known variables for each. 379 // Types[0] is the return value. 380 unsigned Pos = 0; 381 Types.emplace_back(OutTS, getNextModifiers(Proto, Pos)); 382 StringRef Mods = getNextModifiers(Proto, Pos); 383 while (!Mods.empty()) { 384 Types.emplace_back(InTS, Mods); 385 if (Mods.contains('!')) 386 PolymorphicKeyType = Types.size() - 1; 387 388 Mods = getNextModifiers(Proto, Pos); 389 } 390 391 for (auto Type : Types) { 392 // If this builtin takes an immediate argument, we need to #define it rather 393 // than use a standard declaration, so that SemaChecking can range check 394 // the immediate passed by the user. 395 396 // Pointer arguments need to use macros to avoid hiding aligned attributes 397 // from the pointer type. 398 399 // It is not permitted to pass or return an __fp16 by value, so intrinsics 400 // taking a scalar float16_t must be implemented as macros. 401 if (Type.isImmediate() || Type.isPointer() || 402 (Type.isScalar() && Type.isHalf())) 403 UseMacro = true; 404 } 405 } 406 407 /// Get the Record that this intrinsic is based off. 408 Record *getRecord() const { return R; } 409 /// Get the set of Intrinsics that this intrinsic calls. 410 /// this is the set of immediate dependencies, NOT the 411 /// transitive closure. 412 const std::set<Intrinsic *> &getDependencies() const { return Dependencies; } 413 /// Get the architectural guard string (#ifdef). 414 std::string getGuard() const { return Guard; } 415 /// Get the non-mangled name. 416 std::string getName() const { return Name; } 417 418 /// Return true if the intrinsic takes an immediate operand. 419 bool hasImmediate() const { 420 return llvm::any_of(Types, [](const Type &T) { return T.isImmediate(); }); 421 } 422 423 /// Return the parameter index of the immediate operand. 424 unsigned getImmediateIdx() const { 425 for (unsigned Idx = 0; Idx < Types.size(); ++Idx) 426 if (Types[Idx].isImmediate()) 427 return Idx - 1; 428 llvm_unreachable("Intrinsic has no immediate"); 429 } 430 431 432 unsigned getNumParams() const { return Types.size() - 1; } 433 Type getReturnType() const { return Types[0]; } 434 Type getParamType(unsigned I) const { return Types[I + 1]; } 435 Type getBaseType() const { return BaseType; } 436 Type getPolymorphicKeyType() const { return Types[PolymorphicKeyType]; } 437 438 /// Return true if the prototype has a scalar argument. 439 bool protoHasScalar() const; 440 441 /// Return the index that parameter PIndex will sit at 442 /// in a generated function call. This is often just PIndex, 443 /// but may not be as things such as multiple-vector operands 444 /// and sret parameters need to be taken into accont. 445 unsigned getGeneratedParamIdx(unsigned PIndex) { 446 unsigned Idx = 0; 447 if (getReturnType().getNumVectors() > 1) 448 // Multiple vectors are passed as sret. 449 ++Idx; 450 451 for (unsigned I = 0; I < PIndex; ++I) 452 Idx += std::max(1U, getParamType(I).getNumVectors()); 453 454 return Idx; 455 } 456 457 bool hasBody() const { return Body && !Body->getValues().empty(); } 458 459 void setNeededEarly() { NeededEarly = true; } 460 461 bool operator<(const Intrinsic &Other) const { 462 // Sort lexicographically on a two-tuple (Guard, Name) 463 if (Guard != Other.Guard) 464 return Guard < Other.Guard; 465 return Name < Other.Name; 466 } 467 468 ClassKind getClassKind(bool UseClassBIfScalar = false) { 469 if (UseClassBIfScalar && !protoHasScalar()) 470 return ClassB; 471 return CK; 472 } 473 474 /// Return the name, mangled with type information. 475 /// If ForceClassS is true, use ClassS (u32/s32) instead 476 /// of the intrinsic's own type class. 477 std::string getMangledName(bool ForceClassS = false) const; 478 /// Return the type code for a builtin function call. 479 std::string getInstTypeCode(Type T, ClassKind CK) const; 480 /// Return the type string for a BUILTIN() macro in Builtins.def. 481 std::string getBuiltinTypeStr(); 482 483 /// Generate the intrinsic, returning code. 484 std::string generate(); 485 /// Perform type checking and populate the dependency graph, but 486 /// don't generate code yet. 487 void indexBody(); 488 489 private: 490 StringRef getNextModifiers(StringRef Proto, unsigned &Pos) const; 491 492 std::string mangleName(std::string Name, ClassKind CK) const; 493 494 void initVariables(); 495 std::string replaceParamsIn(std::string S); 496 497 void emitBodyAsBuiltinCall(); 498 499 void generateImpl(bool ReverseArguments, 500 StringRef NamePrefix, StringRef CallPrefix); 501 void emitReturn(); 502 void emitBody(StringRef CallPrefix); 503 void emitShadowedArgs(); 504 void emitArgumentReversal(); 505 void emitReturnVarDecl(); 506 void emitReturnReversal(); 507 void emitReverseVariable(Variable &Dest, Variable &Src); 508 void emitNewLine(); 509 void emitClosingBrace(); 510 void emitOpeningBrace(); 511 void emitPrototype(StringRef NamePrefix); 512 513 class DagEmitter { 514 Intrinsic &Intr; 515 StringRef CallPrefix; 516 517 public: 518 DagEmitter(Intrinsic &Intr, StringRef CallPrefix) : 519 Intr(Intr), CallPrefix(CallPrefix) { 520 } 521 std::pair<Type, std::string> emitDagArg(Init *Arg, std::string ArgName); 522 std::pair<Type, std::string> emitDagSaveTemp(DagInit *DI); 523 std::pair<Type, std::string> emitDagSplat(DagInit *DI); 524 std::pair<Type, std::string> emitDagDup(DagInit *DI); 525 std::pair<Type, std::string> emitDagDupTyped(DagInit *DI); 526 std::pair<Type, std::string> emitDagShuffle(DagInit *DI); 527 std::pair<Type, std::string> emitDagCast(DagInit *DI, bool IsBitCast); 528 std::pair<Type, std::string> emitDagCall(DagInit *DI, 529 bool MatchMangledName); 530 std::pair<Type, std::string> emitDagNameReplace(DagInit *DI); 531 std::pair<Type, std::string> emitDagLiteral(DagInit *DI); 532 std::pair<Type, std::string> emitDagOp(DagInit *DI); 533 std::pair<Type, std::string> emitDag(DagInit *DI); 534 }; 535 }; 536 537 //===----------------------------------------------------------------------===// 538 // NeonEmitter 539 //===----------------------------------------------------------------------===// 540 541 class NeonEmitter { 542 RecordKeeper &Records; 543 DenseMap<Record *, ClassKind> ClassMap; 544 std::map<std::string, std::deque<Intrinsic>> IntrinsicMap; 545 unsigned UniqueNumber; 546 547 void createIntrinsic(Record *R, SmallVectorImpl<Intrinsic *> &Out); 548 void genBuiltinsDef(raw_ostream &OS, SmallVectorImpl<Intrinsic *> &Defs); 549 void genOverloadTypeCheckCode(raw_ostream &OS, 550 SmallVectorImpl<Intrinsic *> &Defs); 551 void genIntrinsicRangeCheckCode(raw_ostream &OS, 552 SmallVectorImpl<Intrinsic *> &Defs); 553 554 public: 555 /// Called by Intrinsic - this attempts to get an intrinsic that takes 556 /// the given types as arguments. 557 Intrinsic &getIntrinsic(StringRef Name, ArrayRef<Type> Types, 558 Optional<std::string> MangledName); 559 560 /// Called by Intrinsic - returns a globally-unique number. 561 unsigned getUniqueNumber() { return UniqueNumber++; } 562 563 NeonEmitter(RecordKeeper &R) : Records(R), UniqueNumber(0) { 564 Record *SI = R.getClass("SInst"); 565 Record *II = R.getClass("IInst"); 566 Record *WI = R.getClass("WInst"); 567 Record *SOpI = R.getClass("SOpInst"); 568 Record *IOpI = R.getClass("IOpInst"); 569 Record *WOpI = R.getClass("WOpInst"); 570 Record *LOpI = R.getClass("LOpInst"); 571 Record *NoTestOpI = R.getClass("NoTestOpInst"); 572 573 ClassMap[SI] = ClassS; 574 ClassMap[II] = ClassI; 575 ClassMap[WI] = ClassW; 576 ClassMap[SOpI] = ClassS; 577 ClassMap[IOpI] = ClassI; 578 ClassMap[WOpI] = ClassW; 579 ClassMap[LOpI] = ClassL; 580 ClassMap[NoTestOpI] = ClassNoTest; 581 } 582 583 // Emit arm_neon.h.inc 584 void run(raw_ostream &o); 585 586 // Emit arm_fp16.h.inc 587 void runFP16(raw_ostream &o); 588 589 // Emit arm_bf16.h.inc 590 void runBF16(raw_ostream &o); 591 592 // Emit all the __builtin prototypes used in arm_neon.h, arm_fp16.h and 593 // arm_bf16.h 594 void runHeader(raw_ostream &o); 595 }; 596 597 } // end anonymous namespace 598 599 //===----------------------------------------------------------------------===// 600 // Type implementation 601 //===----------------------------------------------------------------------===// 602 603 std::string Type::str() const { 604 if (isVoid()) 605 return "void"; 606 std::string S; 607 608 if (isInteger() && !isSigned()) 609 S += "u"; 610 611 if (isPoly()) 612 S += "poly"; 613 else if (isFloating()) 614 S += "float"; 615 else if (isBFloat16()) 616 S += "bfloat"; 617 else 618 S += "int"; 619 620 S += utostr(ElementBitwidth); 621 if (isVector()) 622 S += "x" + utostr(getNumElements()); 623 if (NumVectors > 1) 624 S += "x" + utostr(NumVectors); 625 S += "_t"; 626 627 if (Constant) 628 S += " const"; 629 if (Pointer) 630 S += " *"; 631 632 return S; 633 } 634 635 std::string Type::builtin_str() const { 636 std::string S; 637 if (isVoid()) 638 return "v"; 639 640 if (isPointer()) { 641 // All pointers are void pointers. 642 S = "v"; 643 if (isConstPointer()) 644 S += "C"; 645 S += "*"; 646 return S; 647 } else if (isInteger()) 648 switch (ElementBitwidth) { 649 case 8: S += "c"; break; 650 case 16: S += "s"; break; 651 case 32: S += "i"; break; 652 case 64: S += "Wi"; break; 653 case 128: S += "LLLi"; break; 654 default: llvm_unreachable("Unhandled case!"); 655 } 656 else if (isBFloat16()) { 657 assert(ElementBitwidth == 16 && "BFloat16 can only be 16 bits"); 658 S += "y"; 659 } else 660 switch (ElementBitwidth) { 661 case 16: S += "h"; break; 662 case 32: S += "f"; break; 663 case 64: S += "d"; break; 664 default: llvm_unreachable("Unhandled case!"); 665 } 666 667 // FIXME: NECESSARY??????????????????????????????????????????????????????????????????????? 668 if (isChar() && !isPointer() && isSigned()) 669 // Make chars explicitly signed. 670 S = "S" + S; 671 else if (isInteger() && !isSigned()) 672 S = "U" + S; 673 674 // Constant indices are "int", but have the "constant expression" modifier. 675 if (isImmediate()) { 676 assert(isInteger() && isSigned()); 677 S = "I" + S; 678 } 679 680 if (isScalar()) 681 return S; 682 683 std::string Ret; 684 for (unsigned I = 0; I < NumVectors; ++I) 685 Ret += "V" + utostr(getNumElements()) + S; 686 687 return Ret; 688 } 689 690 unsigned Type::getNeonEnum() const { 691 unsigned Addend; 692 switch (ElementBitwidth) { 693 case 8: Addend = 0; break; 694 case 16: Addend = 1; break; 695 case 32: Addend = 2; break; 696 case 64: Addend = 3; break; 697 case 128: Addend = 4; break; 698 default: llvm_unreachable("Unhandled element bitwidth!"); 699 } 700 701 unsigned Base = (unsigned)NeonTypeFlags::Int8 + Addend; 702 if (isPoly()) { 703 // Adjustment needed because Poly32 doesn't exist. 704 if (Addend >= 2) 705 --Addend; 706 Base = (unsigned)NeonTypeFlags::Poly8 + Addend; 707 } 708 if (isFloating()) { 709 assert(Addend != 0 && "Float8 doesn't exist!"); 710 Base = (unsigned)NeonTypeFlags::Float16 + (Addend - 1); 711 } 712 713 if (isBFloat16()) { 714 assert(Addend == 1 && "BFloat16 is only 16 bit"); 715 Base = (unsigned)NeonTypeFlags::BFloat16; 716 } 717 718 if (Bitwidth == 128) 719 Base |= (unsigned)NeonTypeFlags::QuadFlag; 720 if (isInteger() && !isSigned()) 721 Base |= (unsigned)NeonTypeFlags::UnsignedFlag; 722 723 return Base; 724 } 725 726 Type Type::fromTypedefName(StringRef Name) { 727 Type T; 728 T.Kind = SInt; 729 730 if (Name.front() == 'u') { 731 T.Kind = UInt; 732 Name = Name.drop_front(); 733 } 734 735 if (Name.startswith("float")) { 736 T.Kind = Float; 737 Name = Name.drop_front(5); 738 } else if (Name.startswith("poly")) { 739 T.Kind = Poly; 740 Name = Name.drop_front(4); 741 } else if (Name.startswith("bfloat")) { 742 T.Kind = BFloat16; 743 Name = Name.drop_front(6); 744 } else { 745 assert(Name.startswith("int")); 746 Name = Name.drop_front(3); 747 } 748 749 unsigned I = 0; 750 for (I = 0; I < Name.size(); ++I) { 751 if (!isdigit(Name[I])) 752 break; 753 } 754 Name.substr(0, I).getAsInteger(10, T.ElementBitwidth); 755 Name = Name.drop_front(I); 756 757 T.Bitwidth = T.ElementBitwidth; 758 T.NumVectors = 1; 759 760 if (Name.front() == 'x') { 761 Name = Name.drop_front(); 762 unsigned I = 0; 763 for (I = 0; I < Name.size(); ++I) { 764 if (!isdigit(Name[I])) 765 break; 766 } 767 unsigned NumLanes; 768 Name.substr(0, I).getAsInteger(10, NumLanes); 769 Name = Name.drop_front(I); 770 T.Bitwidth = T.ElementBitwidth * NumLanes; 771 } else { 772 // Was scalar. 773 T.NumVectors = 0; 774 } 775 if (Name.front() == 'x') { 776 Name = Name.drop_front(); 777 unsigned I = 0; 778 for (I = 0; I < Name.size(); ++I) { 779 if (!isdigit(Name[I])) 780 break; 781 } 782 Name.substr(0, I).getAsInteger(10, T.NumVectors); 783 Name = Name.drop_front(I); 784 } 785 786 assert(Name.startswith("_t") && "Malformed typedef!"); 787 return T; 788 } 789 790 void Type::applyTypespec(bool &Quad) { 791 std::string S = TS; 792 ScalarForMangling = false; 793 Kind = SInt; 794 ElementBitwidth = ~0U; 795 NumVectors = 1; 796 797 for (char I : S) { 798 switch (I) { 799 case 'S': 800 ScalarForMangling = true; 801 break; 802 case 'H': 803 NoManglingQ = true; 804 Quad = true; 805 break; 806 case 'Q': 807 Quad = true; 808 break; 809 case 'P': 810 Kind = Poly; 811 break; 812 case 'U': 813 Kind = UInt; 814 break; 815 case 'c': 816 ElementBitwidth = 8; 817 break; 818 case 'h': 819 Kind = Float; 820 LLVM_FALLTHROUGH; 821 case 's': 822 ElementBitwidth = 16; 823 break; 824 case 'f': 825 Kind = Float; 826 LLVM_FALLTHROUGH; 827 case 'i': 828 ElementBitwidth = 32; 829 break; 830 case 'd': 831 Kind = Float; 832 LLVM_FALLTHROUGH; 833 case 'l': 834 ElementBitwidth = 64; 835 break; 836 case 'k': 837 ElementBitwidth = 128; 838 // Poly doesn't have a 128x1 type. 839 if (isPoly()) 840 NumVectors = 0; 841 break; 842 case 'b': 843 Kind = BFloat16; 844 ElementBitwidth = 16; 845 break; 846 default: 847 llvm_unreachable("Unhandled type code!"); 848 } 849 } 850 assert(ElementBitwidth != ~0U && "Bad element bitwidth!"); 851 852 Bitwidth = Quad ? 128 : 64; 853 } 854 855 void Type::applyModifiers(StringRef Mods) { 856 bool AppliedQuad = false; 857 applyTypespec(AppliedQuad); 858 859 for (char Mod : Mods) { 860 switch (Mod) { 861 case '.': 862 break; 863 case 'v': 864 Kind = Void; 865 break; 866 case 'S': 867 Kind = SInt; 868 break; 869 case 'U': 870 Kind = UInt; 871 break; 872 case 'B': 873 Kind = BFloat16; 874 ElementBitwidth = 16; 875 break; 876 case 'F': 877 Kind = Float; 878 break; 879 case 'P': 880 Kind = Poly; 881 break; 882 case '>': 883 assert(ElementBitwidth < 128); 884 ElementBitwidth *= 2; 885 break; 886 case '<': 887 assert(ElementBitwidth > 8); 888 ElementBitwidth /= 2; 889 break; 890 case '1': 891 NumVectors = 0; 892 break; 893 case '2': 894 NumVectors = 2; 895 break; 896 case '3': 897 NumVectors = 3; 898 break; 899 case '4': 900 NumVectors = 4; 901 break; 902 case '*': 903 Pointer = true; 904 break; 905 case 'c': 906 Constant = true; 907 break; 908 case 'Q': 909 Bitwidth = 128; 910 break; 911 case 'q': 912 Bitwidth = 64; 913 break; 914 case 'I': 915 Kind = SInt; 916 ElementBitwidth = Bitwidth = 32; 917 NumVectors = 0; 918 Immediate = true; 919 break; 920 case 'p': 921 if (isPoly()) 922 Kind = UInt; 923 break; 924 case '!': 925 // Key type, handled elsewhere. 926 break; 927 default: 928 llvm_unreachable("Unhandled character!"); 929 } 930 } 931 } 932 933 //===----------------------------------------------------------------------===// 934 // Intrinsic implementation 935 //===----------------------------------------------------------------------===// 936 937 StringRef Intrinsic::getNextModifiers(StringRef Proto, unsigned &Pos) const { 938 if (Proto.size() == Pos) 939 return StringRef(); 940 else if (Proto[Pos] != '(') 941 return Proto.substr(Pos++, 1); 942 943 size_t Start = Pos + 1; 944 size_t End = Proto.find(')', Start); 945 assert_with_loc(End != StringRef::npos, "unmatched modifier group paren"); 946 Pos = End + 1; 947 return Proto.slice(Start, End); 948 } 949 950 std::string Intrinsic::getInstTypeCode(Type T, ClassKind CK) const { 951 char typeCode = '\0'; 952 bool printNumber = true; 953 954 if (CK == ClassB) 955 return ""; 956 957 if (T.isBFloat16()) 958 return "bf16"; 959 960 if (T.isPoly()) 961 typeCode = 'p'; 962 else if (T.isInteger()) 963 typeCode = T.isSigned() ? 's' : 'u'; 964 else 965 typeCode = 'f'; 966 967 if (CK == ClassI) { 968 switch (typeCode) { 969 default: 970 break; 971 case 's': 972 case 'u': 973 case 'p': 974 typeCode = 'i'; 975 break; 976 } 977 } 978 if (CK == ClassB) { 979 typeCode = '\0'; 980 } 981 982 std::string S; 983 if (typeCode != '\0') 984 S.push_back(typeCode); 985 if (printNumber) 986 S += utostr(T.getElementSizeInBits()); 987 988 return S; 989 } 990 991 std::string Intrinsic::getBuiltinTypeStr() { 992 ClassKind LocalCK = getClassKind(true); 993 std::string S; 994 995 Type RetT = getReturnType(); 996 if ((LocalCK == ClassI || LocalCK == ClassW) && RetT.isScalar() && 997 !RetT.isFloating() && !RetT.isBFloat16()) 998 RetT.makeInteger(RetT.getElementSizeInBits(), false); 999 1000 // Since the return value must be one type, return a vector type of the 1001 // appropriate width which we will bitcast. An exception is made for 1002 // returning structs of 2, 3, or 4 vectors which are returned in a sret-like 1003 // fashion, storing them to a pointer arg. 1004 if (RetT.getNumVectors() > 1) { 1005 S += "vv*"; // void result with void* first argument 1006 } else { 1007 if (RetT.isPoly()) 1008 RetT.makeInteger(RetT.getElementSizeInBits(), false); 1009 if (!RetT.isScalar() && RetT.isInteger() && !RetT.isSigned()) 1010 RetT.makeSigned(); 1011 1012 if (LocalCK == ClassB && RetT.isValue() && !RetT.isScalar()) 1013 // Cast to vector of 8-bit elements. 1014 RetT.makeInteger(8, true); 1015 1016 S += RetT.builtin_str(); 1017 } 1018 1019 for (unsigned I = 0; I < getNumParams(); ++I) { 1020 Type T = getParamType(I); 1021 if (T.isPoly()) 1022 T.makeInteger(T.getElementSizeInBits(), false); 1023 1024 if (LocalCK == ClassB && !T.isScalar()) 1025 T.makeInteger(8, true); 1026 // Halves always get converted to 8-bit elements. 1027 if (T.isHalf() && T.isVector() && !T.isScalarForMangling()) 1028 T.makeInteger(8, true); 1029 1030 if (LocalCK == ClassI && T.isInteger()) 1031 T.makeSigned(); 1032 1033 if (hasImmediate() && getImmediateIdx() == I) 1034 T.makeImmediate(32); 1035 1036 S += T.builtin_str(); 1037 } 1038 1039 // Extra constant integer to hold type class enum for this function, e.g. s8 1040 if (LocalCK == ClassB) 1041 S += "i"; 1042 1043 return S; 1044 } 1045 1046 std::string Intrinsic::getMangledName(bool ForceClassS) const { 1047 // Check if the prototype has a scalar operand with the type of the vector 1048 // elements. If not, bitcasting the args will take care of arg checking. 1049 // The actual signedness etc. will be taken care of with special enums. 1050 ClassKind LocalCK = CK; 1051 if (!protoHasScalar()) 1052 LocalCK = ClassB; 1053 1054 return mangleName(Name, ForceClassS ? ClassS : LocalCK); 1055 } 1056 1057 std::string Intrinsic::mangleName(std::string Name, ClassKind LocalCK) const { 1058 std::string typeCode = getInstTypeCode(BaseType, LocalCK); 1059 std::string S = Name; 1060 1061 if (Name == "vcvt_f16_f32" || Name == "vcvt_f32_f16" || 1062 Name == "vcvt_f32_f64" || Name == "vcvt_f64_f32" || 1063 Name == "vcvt_f32_bf16") 1064 return Name; 1065 1066 if (!typeCode.empty()) { 1067 // If the name ends with _xN (N = 2,3,4), insert the typeCode before _xN. 1068 if (Name.size() >= 3 && isdigit(Name.back()) && 1069 Name[Name.length() - 2] == 'x' && Name[Name.length() - 3] == '_') 1070 S.insert(S.length() - 3, "_" + typeCode); 1071 else 1072 S += "_" + typeCode; 1073 } 1074 1075 if (BaseType != InBaseType) { 1076 // A reinterpret - out the input base type at the end. 1077 S += "_" + getInstTypeCode(InBaseType, LocalCK); 1078 } 1079 1080 if (LocalCK == ClassB) 1081 S += "_v"; 1082 1083 // Insert a 'q' before the first '_' character so that it ends up before 1084 // _lane or _n on vector-scalar operations. 1085 if (BaseType.getSizeInBits() == 128 && !BaseType.noManglingQ()) { 1086 size_t Pos = S.find('_'); 1087 S.insert(Pos, "q"); 1088 } 1089 1090 char Suffix = '\0'; 1091 if (BaseType.isScalarForMangling()) { 1092 switch (BaseType.getElementSizeInBits()) { 1093 case 8: Suffix = 'b'; break; 1094 case 16: Suffix = 'h'; break; 1095 case 32: Suffix = 's'; break; 1096 case 64: Suffix = 'd'; break; 1097 default: llvm_unreachable("Bad suffix!"); 1098 } 1099 } 1100 if (Suffix != '\0') { 1101 size_t Pos = S.find('_'); 1102 S.insert(Pos, &Suffix, 1); 1103 } 1104 1105 return S; 1106 } 1107 1108 std::string Intrinsic::replaceParamsIn(std::string S) { 1109 while (S.find('$') != std::string::npos) { 1110 size_t Pos = S.find('$'); 1111 size_t End = Pos + 1; 1112 while (isalpha(S[End])) 1113 ++End; 1114 1115 std::string VarName = S.substr(Pos + 1, End - Pos - 1); 1116 assert_with_loc(Variables.find(VarName) != Variables.end(), 1117 "Variable not defined!"); 1118 S.replace(Pos, End - Pos, Variables.find(VarName)->second.getName()); 1119 } 1120 1121 return S; 1122 } 1123 1124 void Intrinsic::initVariables() { 1125 Variables.clear(); 1126 1127 // Modify the TypeSpec per-argument to get a concrete Type, and create 1128 // known variables for each. 1129 for (unsigned I = 1; I < Types.size(); ++I) { 1130 char NameC = '0' + (I - 1); 1131 std::string Name = "p"; 1132 Name.push_back(NameC); 1133 1134 Variables[Name] = Variable(Types[I], Name + VariablePostfix); 1135 } 1136 RetVar = Variable(Types[0], "ret" + VariablePostfix); 1137 } 1138 1139 void Intrinsic::emitPrototype(StringRef NamePrefix) { 1140 if (UseMacro) 1141 OS << "#define "; 1142 else 1143 OS << "__ai " << Types[0].str() << " "; 1144 1145 OS << NamePrefix.str() << mangleName(Name, ClassS) << "("; 1146 1147 for (unsigned I = 0; I < getNumParams(); ++I) { 1148 if (I != 0) 1149 OS << ", "; 1150 1151 char NameC = '0' + I; 1152 std::string Name = "p"; 1153 Name.push_back(NameC); 1154 assert(Variables.find(Name) != Variables.end()); 1155 Variable &V = Variables[Name]; 1156 1157 if (!UseMacro) 1158 OS << V.getType().str() << " "; 1159 OS << V.getName(); 1160 } 1161 1162 OS << ")"; 1163 } 1164 1165 void Intrinsic::emitOpeningBrace() { 1166 if (UseMacro) 1167 OS << " __extension__ ({"; 1168 else 1169 OS << " {"; 1170 emitNewLine(); 1171 } 1172 1173 void Intrinsic::emitClosingBrace() { 1174 if (UseMacro) 1175 OS << "})"; 1176 else 1177 OS << "}"; 1178 } 1179 1180 void Intrinsic::emitNewLine() { 1181 if (UseMacro) 1182 OS << " \\\n"; 1183 else 1184 OS << "\n"; 1185 } 1186 1187 void Intrinsic::emitReverseVariable(Variable &Dest, Variable &Src) { 1188 if (Dest.getType().getNumVectors() > 1) { 1189 emitNewLine(); 1190 1191 for (unsigned K = 0; K < Dest.getType().getNumVectors(); ++K) { 1192 OS << " " << Dest.getName() << ".val[" << K << "] = " 1193 << "__builtin_shufflevector(" 1194 << Src.getName() << ".val[" << K << "], " 1195 << Src.getName() << ".val[" << K << "]"; 1196 for (int J = Dest.getType().getNumElements() - 1; J >= 0; --J) 1197 OS << ", " << J; 1198 OS << ");"; 1199 emitNewLine(); 1200 } 1201 } else { 1202 OS << " " << Dest.getName() 1203 << " = __builtin_shufflevector(" << Src.getName() << ", " << Src.getName(); 1204 for (int J = Dest.getType().getNumElements() - 1; J >= 0; --J) 1205 OS << ", " << J; 1206 OS << ");"; 1207 emitNewLine(); 1208 } 1209 } 1210 1211 void Intrinsic::emitArgumentReversal() { 1212 if (isBigEndianSafe()) 1213 return; 1214 1215 // Reverse all vector arguments. 1216 for (unsigned I = 0; I < getNumParams(); ++I) { 1217 std::string Name = "p" + utostr(I); 1218 std::string NewName = "rev" + utostr(I); 1219 1220 Variable &V = Variables[Name]; 1221 Variable NewV(V.getType(), NewName + VariablePostfix); 1222 1223 if (!NewV.getType().isVector() || NewV.getType().getNumElements() == 1) 1224 continue; 1225 1226 OS << " " << NewV.getType().str() << " " << NewV.getName() << ";"; 1227 emitReverseVariable(NewV, V); 1228 V = NewV; 1229 } 1230 } 1231 1232 void Intrinsic::emitReturnVarDecl() { 1233 assert(RetVar.getType() == Types[0]); 1234 // Create a return variable, if we're not void. 1235 if (!RetVar.getType().isVoid()) { 1236 OS << " " << RetVar.getType().str() << " " << RetVar.getName() << ";"; 1237 emitNewLine(); 1238 } 1239 } 1240 1241 void Intrinsic::emitReturnReversal() { 1242 if (isBigEndianSafe()) 1243 return; 1244 if (!getReturnType().isVector() || getReturnType().isVoid() || 1245 getReturnType().getNumElements() == 1) 1246 return; 1247 emitReverseVariable(RetVar, RetVar); 1248 } 1249 1250 void Intrinsic::emitShadowedArgs() { 1251 // Macro arguments are not type-checked like inline function arguments, 1252 // so assign them to local temporaries to get the right type checking. 1253 if (!UseMacro) 1254 return; 1255 1256 for (unsigned I = 0; I < getNumParams(); ++I) { 1257 // Do not create a temporary for an immediate argument. 1258 // That would defeat the whole point of using a macro! 1259 if (getParamType(I).isImmediate()) 1260 continue; 1261 // Do not create a temporary for pointer arguments. The input 1262 // pointer may have an alignment hint. 1263 if (getParamType(I).isPointer()) 1264 continue; 1265 1266 std::string Name = "p" + utostr(I); 1267 1268 assert(Variables.find(Name) != Variables.end()); 1269 Variable &V = Variables[Name]; 1270 1271 std::string NewName = "s" + utostr(I); 1272 Variable V2(V.getType(), NewName + VariablePostfix); 1273 1274 OS << " " << V2.getType().str() << " " << V2.getName() << " = " 1275 << V.getName() << ";"; 1276 emitNewLine(); 1277 1278 V = V2; 1279 } 1280 } 1281 1282 bool Intrinsic::protoHasScalar() const { 1283 return llvm::any_of( 1284 Types, [](const Type &T) { return T.isScalar() && !T.isImmediate(); }); 1285 } 1286 1287 void Intrinsic::emitBodyAsBuiltinCall() { 1288 std::string S; 1289 1290 // If this builtin returns a struct 2, 3, or 4 vectors, pass it as an implicit 1291 // sret-like argument. 1292 bool SRet = getReturnType().getNumVectors() >= 2; 1293 1294 StringRef N = Name; 1295 ClassKind LocalCK = CK; 1296 if (!protoHasScalar()) 1297 LocalCK = ClassB; 1298 1299 if (!getReturnType().isVoid() && !SRet) 1300 S += "(" + RetVar.getType().str() + ") "; 1301 1302 S += "__builtin_neon_" + mangleName(std::string(N), LocalCK) + "("; 1303 1304 if (SRet) 1305 S += "&" + RetVar.getName() + ", "; 1306 1307 for (unsigned I = 0; I < getNumParams(); ++I) { 1308 Variable &V = Variables["p" + utostr(I)]; 1309 Type T = V.getType(); 1310 1311 // Handle multiple-vector values specially, emitting each subvector as an 1312 // argument to the builtin. 1313 if (T.getNumVectors() > 1) { 1314 // Check if an explicit cast is needed. 1315 std::string Cast; 1316 if (LocalCK == ClassB) { 1317 Type T2 = T; 1318 T2.makeOneVector(); 1319 T2.makeInteger(8, /*Sign=*/true); 1320 Cast = "(" + T2.str() + ")"; 1321 } 1322 1323 for (unsigned J = 0; J < T.getNumVectors(); ++J) 1324 S += Cast + V.getName() + ".val[" + utostr(J) + "], "; 1325 continue; 1326 } 1327 1328 std::string Arg = V.getName(); 1329 Type CastToType = T; 1330 1331 // Check if an explicit cast is needed. 1332 if (CastToType.isVector() && 1333 (LocalCK == ClassB || (T.isHalf() && !T.isScalarForMangling()))) { 1334 CastToType.makeInteger(8, true); 1335 Arg = "(" + CastToType.str() + ")" + Arg; 1336 } else if (CastToType.isVector() && LocalCK == ClassI) { 1337 if (CastToType.isInteger()) 1338 CastToType.makeSigned(); 1339 Arg = "(" + CastToType.str() + ")" + Arg; 1340 } 1341 1342 S += Arg + ", "; 1343 } 1344 1345 // Extra constant integer to hold type class enum for this function, e.g. s8 1346 if (getClassKind(true) == ClassB) { 1347 S += utostr(getPolymorphicKeyType().getNeonEnum()); 1348 } else { 1349 // Remove extraneous ", ". 1350 S.pop_back(); 1351 S.pop_back(); 1352 } 1353 S += ");"; 1354 1355 std::string RetExpr; 1356 if (!SRet && !RetVar.getType().isVoid()) 1357 RetExpr = RetVar.getName() + " = "; 1358 1359 OS << " " << RetExpr << S; 1360 emitNewLine(); 1361 } 1362 1363 void Intrinsic::emitBody(StringRef CallPrefix) { 1364 std::vector<std::string> Lines; 1365 1366 if (!Body || Body->getValues().empty()) { 1367 // Nothing specific to output - must output a builtin. 1368 emitBodyAsBuiltinCall(); 1369 return; 1370 } 1371 1372 // We have a list of "things to output". The last should be returned. 1373 for (auto *I : Body->getValues()) { 1374 if (StringInit *SI = dyn_cast<StringInit>(I)) { 1375 Lines.push_back(replaceParamsIn(SI->getAsString())); 1376 } else if (DagInit *DI = dyn_cast<DagInit>(I)) { 1377 DagEmitter DE(*this, CallPrefix); 1378 Lines.push_back(DE.emitDag(DI).second + ";"); 1379 } 1380 } 1381 1382 assert(!Lines.empty() && "Empty def?"); 1383 if (!RetVar.getType().isVoid()) 1384 Lines.back().insert(0, RetVar.getName() + " = "); 1385 1386 for (auto &L : Lines) { 1387 OS << " " << L; 1388 emitNewLine(); 1389 } 1390 } 1391 1392 void Intrinsic::emitReturn() { 1393 if (RetVar.getType().isVoid()) 1394 return; 1395 if (UseMacro) 1396 OS << " " << RetVar.getName() << ";"; 1397 else 1398 OS << " return " << RetVar.getName() << ";"; 1399 emitNewLine(); 1400 } 1401 1402 std::pair<Type, std::string> Intrinsic::DagEmitter::emitDag(DagInit *DI) { 1403 // At this point we should only be seeing a def. 1404 DefInit *DefI = cast<DefInit>(DI->getOperator()); 1405 std::string Op = DefI->getAsString(); 1406 1407 if (Op == "cast" || Op == "bitcast") 1408 return emitDagCast(DI, Op == "bitcast"); 1409 if (Op == "shuffle") 1410 return emitDagShuffle(DI); 1411 if (Op == "dup") 1412 return emitDagDup(DI); 1413 if (Op == "dup_typed") 1414 return emitDagDupTyped(DI); 1415 if (Op == "splat") 1416 return emitDagSplat(DI); 1417 if (Op == "save_temp") 1418 return emitDagSaveTemp(DI); 1419 if (Op == "op") 1420 return emitDagOp(DI); 1421 if (Op == "call" || Op == "call_mangled") 1422 return emitDagCall(DI, Op == "call_mangled"); 1423 if (Op == "name_replace") 1424 return emitDagNameReplace(DI); 1425 if (Op == "literal") 1426 return emitDagLiteral(DI); 1427 assert_with_loc(false, "Unknown operation!"); 1428 return std::make_pair(Type::getVoid(), ""); 1429 } 1430 1431 std::pair<Type, std::string> Intrinsic::DagEmitter::emitDagOp(DagInit *DI) { 1432 std::string Op = cast<StringInit>(DI->getArg(0))->getAsUnquotedString(); 1433 if (DI->getNumArgs() == 2) { 1434 // Unary op. 1435 std::pair<Type, std::string> R = 1436 emitDagArg(DI->getArg(1), std::string(DI->getArgNameStr(1))); 1437 return std::make_pair(R.first, Op + R.second); 1438 } else { 1439 assert(DI->getNumArgs() == 3 && "Can only handle unary and binary ops!"); 1440 std::pair<Type, std::string> R1 = 1441 emitDagArg(DI->getArg(1), std::string(DI->getArgNameStr(1))); 1442 std::pair<Type, std::string> R2 = 1443 emitDagArg(DI->getArg(2), std::string(DI->getArgNameStr(2))); 1444 assert_with_loc(R1.first == R2.first, "Argument type mismatch!"); 1445 return std::make_pair(R1.first, R1.second + " " + Op + " " + R2.second); 1446 } 1447 } 1448 1449 std::pair<Type, std::string> 1450 Intrinsic::DagEmitter::emitDagCall(DagInit *DI, bool MatchMangledName) { 1451 std::vector<Type> Types; 1452 std::vector<std::string> Values; 1453 for (unsigned I = 0; I < DI->getNumArgs() - 1; ++I) { 1454 std::pair<Type, std::string> R = 1455 emitDagArg(DI->getArg(I + 1), std::string(DI->getArgNameStr(I + 1))); 1456 Types.push_back(R.first); 1457 Values.push_back(R.second); 1458 } 1459 1460 // Look up the called intrinsic. 1461 std::string N; 1462 if (StringInit *SI = dyn_cast<StringInit>(DI->getArg(0))) 1463 N = SI->getAsUnquotedString(); 1464 else 1465 N = emitDagArg(DI->getArg(0), "").second; 1466 Optional<std::string> MangledName; 1467 if (MatchMangledName) { 1468 if (Intr.getRecord()->getValueAsBit("isLaneQ")) 1469 N += "q"; 1470 MangledName = Intr.mangleName(N, ClassS); 1471 } 1472 Intrinsic &Callee = Intr.Emitter.getIntrinsic(N, Types, MangledName); 1473 1474 // Make sure the callee is known as an early def. 1475 Callee.setNeededEarly(); 1476 Intr.Dependencies.insert(&Callee); 1477 1478 // Now create the call itself. 1479 std::string S; 1480 if (!Callee.isBigEndianSafe()) 1481 S += CallPrefix.str(); 1482 S += Callee.getMangledName(true) + "("; 1483 for (unsigned I = 0; I < DI->getNumArgs() - 1; ++I) { 1484 if (I != 0) 1485 S += ", "; 1486 S += Values[I]; 1487 } 1488 S += ")"; 1489 1490 return std::make_pair(Callee.getReturnType(), S); 1491 } 1492 1493 std::pair<Type, std::string> Intrinsic::DagEmitter::emitDagCast(DagInit *DI, 1494 bool IsBitCast){ 1495 // (cast MOD* VAL) -> cast VAL to type given by MOD. 1496 std::pair<Type, std::string> R = 1497 emitDagArg(DI->getArg(DI->getNumArgs() - 1), 1498 std::string(DI->getArgNameStr(DI->getNumArgs() - 1))); 1499 Type castToType = R.first; 1500 for (unsigned ArgIdx = 0; ArgIdx < DI->getNumArgs() - 1; ++ArgIdx) { 1501 1502 // MOD can take several forms: 1503 // 1. $X - take the type of parameter / variable X. 1504 // 2. The value "R" - take the type of the return type. 1505 // 3. a type string 1506 // 4. The value "U" or "S" to switch the signedness. 1507 // 5. The value "H" or "D" to half or double the bitwidth. 1508 // 6. The value "8" to convert to 8-bit (signed) integer lanes. 1509 if (!DI->getArgNameStr(ArgIdx).empty()) { 1510 assert_with_loc(Intr.Variables.find(std::string( 1511 DI->getArgNameStr(ArgIdx))) != Intr.Variables.end(), 1512 "Variable not found"); 1513 castToType = 1514 Intr.Variables[std::string(DI->getArgNameStr(ArgIdx))].getType(); 1515 } else { 1516 StringInit *SI = dyn_cast<StringInit>(DI->getArg(ArgIdx)); 1517 assert_with_loc(SI, "Expected string type or $Name for cast type"); 1518 1519 if (SI->getAsUnquotedString() == "R") { 1520 castToType = Intr.getReturnType(); 1521 } else if (SI->getAsUnquotedString() == "U") { 1522 castToType.makeUnsigned(); 1523 } else if (SI->getAsUnquotedString() == "S") { 1524 castToType.makeSigned(); 1525 } else if (SI->getAsUnquotedString() == "H") { 1526 castToType.halveLanes(); 1527 } else if (SI->getAsUnquotedString() == "D") { 1528 castToType.doubleLanes(); 1529 } else if (SI->getAsUnquotedString() == "8") { 1530 castToType.makeInteger(8, true); 1531 } else if (SI->getAsUnquotedString() == "32") { 1532 castToType.make32BitElement(); 1533 } else { 1534 castToType = Type::fromTypedefName(SI->getAsUnquotedString()); 1535 assert_with_loc(!castToType.isVoid(), "Unknown typedef"); 1536 } 1537 } 1538 } 1539 1540 std::string S; 1541 if (IsBitCast) { 1542 // Emit a reinterpret cast. The second operand must be an lvalue, so create 1543 // a temporary. 1544 std::string N = "reint"; 1545 unsigned I = 0; 1546 while (Intr.Variables.find(N) != Intr.Variables.end()) 1547 N = "reint" + utostr(++I); 1548 Intr.Variables[N] = Variable(R.first, N + Intr.VariablePostfix); 1549 1550 Intr.OS << R.first.str() << " " << Intr.Variables[N].getName() << " = " 1551 << R.second << ";"; 1552 Intr.emitNewLine(); 1553 1554 S = "*(" + castToType.str() + " *) &" + Intr.Variables[N].getName() + ""; 1555 } else { 1556 // Emit a normal (static) cast. 1557 S = "(" + castToType.str() + ")(" + R.second + ")"; 1558 } 1559 1560 return std::make_pair(castToType, S); 1561 } 1562 1563 std::pair<Type, std::string> Intrinsic::DagEmitter::emitDagShuffle(DagInit *DI){ 1564 // See the documentation in arm_neon.td for a description of these operators. 1565 class LowHalf : public SetTheory::Operator { 1566 public: 1567 void apply(SetTheory &ST, DagInit *Expr, SetTheory::RecSet &Elts, 1568 ArrayRef<SMLoc> Loc) override { 1569 SetTheory::RecSet Elts2; 1570 ST.evaluate(Expr->arg_begin(), Expr->arg_end(), Elts2, Loc); 1571 Elts.insert(Elts2.begin(), Elts2.begin() + (Elts2.size() / 2)); 1572 } 1573 }; 1574 1575 class HighHalf : public SetTheory::Operator { 1576 public: 1577 void apply(SetTheory &ST, DagInit *Expr, SetTheory::RecSet &Elts, 1578 ArrayRef<SMLoc> Loc) override { 1579 SetTheory::RecSet Elts2; 1580 ST.evaluate(Expr->arg_begin(), Expr->arg_end(), Elts2, Loc); 1581 Elts.insert(Elts2.begin() + (Elts2.size() / 2), Elts2.end()); 1582 } 1583 }; 1584 1585 class Rev : public SetTheory::Operator { 1586 unsigned ElementSize; 1587 1588 public: 1589 Rev(unsigned ElementSize) : ElementSize(ElementSize) {} 1590 1591 void apply(SetTheory &ST, DagInit *Expr, SetTheory::RecSet &Elts, 1592 ArrayRef<SMLoc> Loc) override { 1593 SetTheory::RecSet Elts2; 1594 ST.evaluate(Expr->arg_begin() + 1, Expr->arg_end(), Elts2, Loc); 1595 1596 int64_t VectorSize = cast<IntInit>(Expr->getArg(0))->getValue(); 1597 VectorSize /= ElementSize; 1598 1599 std::vector<Record *> Revved; 1600 for (unsigned VI = 0; VI < Elts2.size(); VI += VectorSize) { 1601 for (int LI = VectorSize - 1; LI >= 0; --LI) { 1602 Revved.push_back(Elts2[VI + LI]); 1603 } 1604 } 1605 1606 Elts.insert(Revved.begin(), Revved.end()); 1607 } 1608 }; 1609 1610 class MaskExpander : public SetTheory::Expander { 1611 unsigned N; 1612 1613 public: 1614 MaskExpander(unsigned N) : N(N) {} 1615 1616 void expand(SetTheory &ST, Record *R, SetTheory::RecSet &Elts) override { 1617 unsigned Addend = 0; 1618 if (R->getName() == "mask0") 1619 Addend = 0; 1620 else if (R->getName() == "mask1") 1621 Addend = N; 1622 else 1623 return; 1624 for (unsigned I = 0; I < N; ++I) 1625 Elts.insert(R->getRecords().getDef("sv" + utostr(I + Addend))); 1626 } 1627 }; 1628 1629 // (shuffle arg1, arg2, sequence) 1630 std::pair<Type, std::string> Arg1 = 1631 emitDagArg(DI->getArg(0), std::string(DI->getArgNameStr(0))); 1632 std::pair<Type, std::string> Arg2 = 1633 emitDagArg(DI->getArg(1), std::string(DI->getArgNameStr(1))); 1634 assert_with_loc(Arg1.first == Arg2.first, 1635 "Different types in arguments to shuffle!"); 1636 1637 SetTheory ST; 1638 SetTheory::RecSet Elts; 1639 ST.addOperator("lowhalf", std::make_unique<LowHalf>()); 1640 ST.addOperator("highhalf", std::make_unique<HighHalf>()); 1641 ST.addOperator("rev", 1642 std::make_unique<Rev>(Arg1.first.getElementSizeInBits())); 1643 ST.addExpander("MaskExpand", 1644 std::make_unique<MaskExpander>(Arg1.first.getNumElements())); 1645 ST.evaluate(DI->getArg(2), Elts, None); 1646 1647 std::string S = "__builtin_shufflevector(" + Arg1.second + ", " + Arg2.second; 1648 for (auto &E : Elts) { 1649 StringRef Name = E->getName(); 1650 assert_with_loc(Name.startswith("sv"), 1651 "Incorrect element kind in shuffle mask!"); 1652 S += ", " + Name.drop_front(2).str(); 1653 } 1654 S += ")"; 1655 1656 // Recalculate the return type - the shuffle may have halved or doubled it. 1657 Type T(Arg1.first); 1658 if (Elts.size() > T.getNumElements()) { 1659 assert_with_loc( 1660 Elts.size() == T.getNumElements() * 2, 1661 "Can only double or half the number of elements in a shuffle!"); 1662 T.doubleLanes(); 1663 } else if (Elts.size() < T.getNumElements()) { 1664 assert_with_loc( 1665 Elts.size() == T.getNumElements() / 2, 1666 "Can only double or half the number of elements in a shuffle!"); 1667 T.halveLanes(); 1668 } 1669 1670 return std::make_pair(T, S); 1671 } 1672 1673 std::pair<Type, std::string> Intrinsic::DagEmitter::emitDagDup(DagInit *DI) { 1674 assert_with_loc(DI->getNumArgs() == 1, "dup() expects one argument"); 1675 std::pair<Type, std::string> A = 1676 emitDagArg(DI->getArg(0), std::string(DI->getArgNameStr(0))); 1677 assert_with_loc(A.first.isScalar(), "dup() expects a scalar argument"); 1678 1679 Type T = Intr.getBaseType(); 1680 assert_with_loc(T.isVector(), "dup() used but default type is scalar!"); 1681 std::string S = "(" + T.str() + ") {"; 1682 for (unsigned I = 0; I < T.getNumElements(); ++I) { 1683 if (I != 0) 1684 S += ", "; 1685 S += A.second; 1686 } 1687 S += "}"; 1688 1689 return std::make_pair(T, S); 1690 } 1691 1692 std::pair<Type, std::string> Intrinsic::DagEmitter::emitDagDupTyped(DagInit *DI) { 1693 assert_with_loc(DI->getNumArgs() == 2, "dup_typed() expects two arguments"); 1694 std::pair<Type, std::string> B = 1695 emitDagArg(DI->getArg(1), std::string(DI->getArgNameStr(1))); 1696 assert_with_loc(B.first.isScalar(), 1697 "dup_typed() requires a scalar as the second argument"); 1698 Type T; 1699 // If the type argument is a constant string, construct the type directly. 1700 if (StringInit *SI = dyn_cast<StringInit>(DI->getArg(0))) { 1701 T = Type::fromTypedefName(SI->getAsUnquotedString()); 1702 assert_with_loc(!T.isVoid(), "Unknown typedef"); 1703 } else 1704 T = emitDagArg(DI->getArg(0), std::string(DI->getArgNameStr(0))).first; 1705 1706 assert_with_loc(T.isVector(), "dup_typed() used but target type is scalar!"); 1707 std::string S = "(" + T.str() + ") {"; 1708 for (unsigned I = 0; I < T.getNumElements(); ++I) { 1709 if (I != 0) 1710 S += ", "; 1711 S += B.second; 1712 } 1713 S += "}"; 1714 1715 return std::make_pair(T, S); 1716 } 1717 1718 std::pair<Type, std::string> Intrinsic::DagEmitter::emitDagSplat(DagInit *DI) { 1719 assert_with_loc(DI->getNumArgs() == 2, "splat() expects two arguments"); 1720 std::pair<Type, std::string> A = 1721 emitDagArg(DI->getArg(0), std::string(DI->getArgNameStr(0))); 1722 std::pair<Type, std::string> B = 1723 emitDagArg(DI->getArg(1), std::string(DI->getArgNameStr(1))); 1724 1725 assert_with_loc(B.first.isScalar(), 1726 "splat() requires a scalar int as the second argument"); 1727 1728 std::string S = "__builtin_shufflevector(" + A.second + ", " + A.second; 1729 for (unsigned I = 0; I < Intr.getBaseType().getNumElements(); ++I) { 1730 S += ", " + B.second; 1731 } 1732 S += ")"; 1733 1734 return std::make_pair(Intr.getBaseType(), S); 1735 } 1736 1737 std::pair<Type, std::string> Intrinsic::DagEmitter::emitDagSaveTemp(DagInit *DI) { 1738 assert_with_loc(DI->getNumArgs() == 2, "save_temp() expects two arguments"); 1739 std::pair<Type, std::string> A = 1740 emitDagArg(DI->getArg(1), std::string(DI->getArgNameStr(1))); 1741 1742 assert_with_loc(!A.first.isVoid(), 1743 "Argument to save_temp() must have non-void type!"); 1744 1745 std::string N = std::string(DI->getArgNameStr(0)); 1746 assert_with_loc(!N.empty(), 1747 "save_temp() expects a name as the first argument"); 1748 1749 assert_with_loc(Intr.Variables.find(N) == Intr.Variables.end(), 1750 "Variable already defined!"); 1751 Intr.Variables[N] = Variable(A.first, N + Intr.VariablePostfix); 1752 1753 std::string S = 1754 A.first.str() + " " + Intr.Variables[N].getName() + " = " + A.second; 1755 1756 return std::make_pair(Type::getVoid(), S); 1757 } 1758 1759 std::pair<Type, std::string> 1760 Intrinsic::DagEmitter::emitDagNameReplace(DagInit *DI) { 1761 std::string S = Intr.Name; 1762 1763 assert_with_loc(DI->getNumArgs() == 2, "name_replace requires 2 arguments!"); 1764 std::string ToReplace = cast<StringInit>(DI->getArg(0))->getAsUnquotedString(); 1765 std::string ReplaceWith = cast<StringInit>(DI->getArg(1))->getAsUnquotedString(); 1766 1767 size_t Idx = S.find(ToReplace); 1768 1769 assert_with_loc(Idx != std::string::npos, "name should contain '" + ToReplace + "'!"); 1770 S.replace(Idx, ToReplace.size(), ReplaceWith); 1771 1772 return std::make_pair(Type::getVoid(), S); 1773 } 1774 1775 std::pair<Type, std::string> Intrinsic::DagEmitter::emitDagLiteral(DagInit *DI){ 1776 std::string Ty = cast<StringInit>(DI->getArg(0))->getAsUnquotedString(); 1777 std::string Value = cast<StringInit>(DI->getArg(1))->getAsUnquotedString(); 1778 return std::make_pair(Type::fromTypedefName(Ty), Value); 1779 } 1780 1781 std::pair<Type, std::string> 1782 Intrinsic::DagEmitter::emitDagArg(Init *Arg, std::string ArgName) { 1783 if (!ArgName.empty()) { 1784 assert_with_loc(!Arg->isComplete(), 1785 "Arguments must either be DAGs or names, not both!"); 1786 assert_with_loc(Intr.Variables.find(ArgName) != Intr.Variables.end(), 1787 "Variable not defined!"); 1788 Variable &V = Intr.Variables[ArgName]; 1789 return std::make_pair(V.getType(), V.getName()); 1790 } 1791 1792 assert(Arg && "Neither ArgName nor Arg?!"); 1793 DagInit *DI = dyn_cast<DagInit>(Arg); 1794 assert_with_loc(DI, "Arguments must either be DAGs or names!"); 1795 1796 return emitDag(DI); 1797 } 1798 1799 std::string Intrinsic::generate() { 1800 // Avoid duplicated code for big and little endian 1801 if (isBigEndianSafe()) { 1802 generateImpl(false, "", ""); 1803 return OS.str(); 1804 } 1805 // Little endian intrinsics are simple and don't require any argument 1806 // swapping. 1807 OS << "#ifdef __LITTLE_ENDIAN__\n"; 1808 1809 generateImpl(false, "", ""); 1810 1811 OS << "#else\n"; 1812 1813 // Big endian intrinsics are more complex. The user intended these 1814 // intrinsics to operate on a vector "as-if" loaded by (V)LDR, 1815 // but we load as-if (V)LD1. So we should swap all arguments and 1816 // swap the return value too. 1817 // 1818 // If we call sub-intrinsics, we should call a version that does 1819 // not re-swap the arguments! 1820 generateImpl(true, "", "__noswap_"); 1821 1822 // If we're needed early, create a non-swapping variant for 1823 // big-endian. 1824 if (NeededEarly) { 1825 generateImpl(false, "__noswap_", "__noswap_"); 1826 } 1827 OS << "#endif\n\n"; 1828 1829 return OS.str(); 1830 } 1831 1832 void Intrinsic::generateImpl(bool ReverseArguments, 1833 StringRef NamePrefix, StringRef CallPrefix) { 1834 CurrentRecord = R; 1835 1836 // If we call a macro, our local variables may be corrupted due to 1837 // lack of proper lexical scoping. So, add a globally unique postfix 1838 // to every variable. 1839 // 1840 // indexBody() should have set up the Dependencies set by now. 1841 for (auto *I : Dependencies) 1842 if (I->UseMacro) { 1843 VariablePostfix = "_" + utostr(Emitter.getUniqueNumber()); 1844 break; 1845 } 1846 1847 initVariables(); 1848 1849 emitPrototype(NamePrefix); 1850 1851 if (IsUnavailable) { 1852 OS << " __attribute__((unavailable));"; 1853 } else { 1854 emitOpeningBrace(); 1855 // Emit return variable declaration first as to not trigger 1856 // -Wdeclaration-after-statement. 1857 emitReturnVarDecl(); 1858 emitShadowedArgs(); 1859 if (ReverseArguments) 1860 emitArgumentReversal(); 1861 emitBody(CallPrefix); 1862 if (ReverseArguments) 1863 emitReturnReversal(); 1864 emitReturn(); 1865 emitClosingBrace(); 1866 } 1867 OS << "\n"; 1868 1869 CurrentRecord = nullptr; 1870 } 1871 1872 void Intrinsic::indexBody() { 1873 CurrentRecord = R; 1874 1875 initVariables(); 1876 // Emit return variable declaration first as to not trigger 1877 // -Wdeclaration-after-statement. 1878 emitReturnVarDecl(); 1879 emitBody(""); 1880 OS.str(""); 1881 1882 CurrentRecord = nullptr; 1883 } 1884 1885 //===----------------------------------------------------------------------===// 1886 // NeonEmitter implementation 1887 //===----------------------------------------------------------------------===// 1888 1889 Intrinsic &NeonEmitter::getIntrinsic(StringRef Name, ArrayRef<Type> Types, 1890 Optional<std::string> MangledName) { 1891 // First, look up the name in the intrinsic map. 1892 assert_with_loc(IntrinsicMap.find(Name.str()) != IntrinsicMap.end(), 1893 ("Intrinsic '" + Name + "' not found!").str()); 1894 auto &V = IntrinsicMap.find(Name.str())->second; 1895 std::vector<Intrinsic *> GoodVec; 1896 1897 // Create a string to print if we end up failing. 1898 std::string ErrMsg = "looking up intrinsic '" + Name.str() + "("; 1899 for (unsigned I = 0; I < Types.size(); ++I) { 1900 if (I != 0) 1901 ErrMsg += ", "; 1902 ErrMsg += Types[I].str(); 1903 } 1904 ErrMsg += ")'\n"; 1905 ErrMsg += "Available overloads:\n"; 1906 1907 // Now, look through each intrinsic implementation and see if the types are 1908 // compatible. 1909 for (auto &I : V) { 1910 ErrMsg += " - " + I.getReturnType().str() + " " + I.getMangledName(); 1911 ErrMsg += "("; 1912 for (unsigned A = 0; A < I.getNumParams(); ++A) { 1913 if (A != 0) 1914 ErrMsg += ", "; 1915 ErrMsg += I.getParamType(A).str(); 1916 } 1917 ErrMsg += ")\n"; 1918 1919 if (MangledName && MangledName != I.getMangledName(true)) 1920 continue; 1921 1922 if (I.getNumParams() != Types.size()) 1923 continue; 1924 1925 unsigned ArgNum = 0; 1926 bool MatchingArgumentTypes = llvm::all_of(Types, [&](const auto &Type) { 1927 return Type == I.getParamType(ArgNum++); 1928 }); 1929 1930 if (MatchingArgumentTypes) 1931 GoodVec.push_back(&I); 1932 } 1933 1934 assert_with_loc(!GoodVec.empty(), 1935 "No compatible intrinsic found - " + ErrMsg); 1936 assert_with_loc(GoodVec.size() == 1, "Multiple overloads found - " + ErrMsg); 1937 1938 return *GoodVec.front(); 1939 } 1940 1941 void NeonEmitter::createIntrinsic(Record *R, 1942 SmallVectorImpl<Intrinsic *> &Out) { 1943 std::string Name = std::string(R->getValueAsString("Name")); 1944 std::string Proto = std::string(R->getValueAsString("Prototype")); 1945 std::string Types = std::string(R->getValueAsString("Types")); 1946 Record *OperationRec = R->getValueAsDef("Operation"); 1947 bool BigEndianSafe = R->getValueAsBit("BigEndianSafe"); 1948 std::string Guard = std::string(R->getValueAsString("ArchGuard")); 1949 bool IsUnavailable = OperationRec->getValueAsBit("Unavailable"); 1950 std::string CartesianProductWith = std::string(R->getValueAsString("CartesianProductWith")); 1951 1952 // Set the global current record. This allows assert_with_loc to produce 1953 // decent location information even when highly nested. 1954 CurrentRecord = R; 1955 1956 ListInit *Body = OperationRec->getValueAsListInit("Ops"); 1957 1958 std::vector<TypeSpec> TypeSpecs = TypeSpec::fromTypeSpecs(Types); 1959 1960 ClassKind CK = ClassNone; 1961 if (R->getSuperClasses().size() >= 2) 1962 CK = ClassMap[R->getSuperClasses()[1].first]; 1963 1964 std::vector<std::pair<TypeSpec, TypeSpec>> NewTypeSpecs; 1965 if (!CartesianProductWith.empty()) { 1966 std::vector<TypeSpec> ProductTypeSpecs = TypeSpec::fromTypeSpecs(CartesianProductWith); 1967 for (auto TS : TypeSpecs) { 1968 Type DefaultT(TS, "."); 1969 for (auto SrcTS : ProductTypeSpecs) { 1970 Type DefaultSrcT(SrcTS, "."); 1971 if (TS == SrcTS || 1972 DefaultSrcT.getSizeInBits() != DefaultT.getSizeInBits()) 1973 continue; 1974 NewTypeSpecs.push_back(std::make_pair(TS, SrcTS)); 1975 } 1976 } 1977 } else { 1978 for (auto TS : TypeSpecs) { 1979 NewTypeSpecs.push_back(std::make_pair(TS, TS)); 1980 } 1981 } 1982 1983 llvm::sort(NewTypeSpecs); 1984 NewTypeSpecs.erase(std::unique(NewTypeSpecs.begin(), NewTypeSpecs.end()), 1985 NewTypeSpecs.end()); 1986 auto &Entry = IntrinsicMap[Name]; 1987 1988 for (auto &I : NewTypeSpecs) { 1989 Entry.emplace_back(R, Name, Proto, I.first, I.second, CK, Body, *this, 1990 Guard, IsUnavailable, BigEndianSafe); 1991 Out.push_back(&Entry.back()); 1992 } 1993 1994 CurrentRecord = nullptr; 1995 } 1996 1997 /// genBuiltinsDef: Generate the BuiltinsARM.def and BuiltinsAArch64.def 1998 /// declaration of builtins, checking for unique builtin declarations. 1999 void NeonEmitter::genBuiltinsDef(raw_ostream &OS, 2000 SmallVectorImpl<Intrinsic *> &Defs) { 2001 OS << "#ifdef GET_NEON_BUILTINS\n"; 2002 2003 // We only want to emit a builtin once, and we want to emit them in 2004 // alphabetical order, so use a std::set. 2005 std::set<std::string> Builtins; 2006 2007 for (auto *Def : Defs) { 2008 if (Def->hasBody()) 2009 continue; 2010 2011 std::string S = "BUILTIN(__builtin_neon_" + Def->getMangledName() + ", \""; 2012 2013 S += Def->getBuiltinTypeStr(); 2014 S += "\", \"n\")"; 2015 2016 Builtins.insert(S); 2017 } 2018 2019 for (auto &S : Builtins) 2020 OS << S << "\n"; 2021 OS << "#endif\n\n"; 2022 } 2023 2024 /// Generate the ARM and AArch64 overloaded type checking code for 2025 /// SemaChecking.cpp, checking for unique builtin declarations. 2026 void NeonEmitter::genOverloadTypeCheckCode(raw_ostream &OS, 2027 SmallVectorImpl<Intrinsic *> &Defs) { 2028 OS << "#ifdef GET_NEON_OVERLOAD_CHECK\n"; 2029 2030 // We record each overload check line before emitting because subsequent Inst 2031 // definitions may extend the number of permitted types (i.e. augment the 2032 // Mask). Use std::map to avoid sorting the table by hash number. 2033 struct OverloadInfo { 2034 uint64_t Mask; 2035 int PtrArgNum; 2036 bool HasConstPtr; 2037 OverloadInfo() : Mask(0ULL), PtrArgNum(0), HasConstPtr(false) {} 2038 }; 2039 std::map<std::string, OverloadInfo> OverloadMap; 2040 2041 for (auto *Def : Defs) { 2042 // If the def has a body (that is, it has Operation DAGs), it won't call 2043 // __builtin_neon_* so we don't need to generate a definition for it. 2044 if (Def->hasBody()) 2045 continue; 2046 // Functions which have a scalar argument cannot be overloaded, no need to 2047 // check them if we are emitting the type checking code. 2048 if (Def->protoHasScalar()) 2049 continue; 2050 2051 uint64_t Mask = 0ULL; 2052 Mask |= 1ULL << Def->getPolymorphicKeyType().getNeonEnum(); 2053 2054 // Check if the function has a pointer or const pointer argument. 2055 int PtrArgNum = -1; 2056 bool HasConstPtr = false; 2057 for (unsigned I = 0; I < Def->getNumParams(); ++I) { 2058 const auto &Type = Def->getParamType(I); 2059 if (Type.isPointer()) { 2060 PtrArgNum = I; 2061 HasConstPtr = Type.isConstPointer(); 2062 } 2063 } 2064 2065 // For sret builtins, adjust the pointer argument index. 2066 if (PtrArgNum >= 0 && Def->getReturnType().getNumVectors() > 1) 2067 PtrArgNum += 1; 2068 2069 std::string Name = Def->getName(); 2070 // Omit type checking for the pointer arguments of vld1_lane, vld1_dup, 2071 // and vst1_lane intrinsics. Using a pointer to the vector element 2072 // type with one of those operations causes codegen to select an aligned 2073 // load/store instruction. If you want an unaligned operation, 2074 // the pointer argument needs to have less alignment than element type, 2075 // so just accept any pointer type. 2076 if (Name == "vld1_lane" || Name == "vld1_dup" || Name == "vst1_lane") { 2077 PtrArgNum = -1; 2078 HasConstPtr = false; 2079 } 2080 2081 if (Mask) { 2082 std::string Name = Def->getMangledName(); 2083 OverloadMap.insert(std::make_pair(Name, OverloadInfo())); 2084 OverloadInfo &OI = OverloadMap[Name]; 2085 OI.Mask |= Mask; 2086 OI.PtrArgNum |= PtrArgNum; 2087 OI.HasConstPtr = HasConstPtr; 2088 } 2089 } 2090 2091 for (auto &I : OverloadMap) { 2092 OverloadInfo &OI = I.second; 2093 2094 OS << "case NEON::BI__builtin_neon_" << I.first << ": "; 2095 OS << "mask = 0x" << Twine::utohexstr(OI.Mask) << "ULL"; 2096 if (OI.PtrArgNum >= 0) 2097 OS << "; PtrArgNum = " << OI.PtrArgNum; 2098 if (OI.HasConstPtr) 2099 OS << "; HasConstPtr = true"; 2100 OS << "; break;\n"; 2101 } 2102 OS << "#endif\n\n"; 2103 } 2104 2105 void NeonEmitter::genIntrinsicRangeCheckCode(raw_ostream &OS, 2106 SmallVectorImpl<Intrinsic *> &Defs) { 2107 OS << "#ifdef GET_NEON_IMMEDIATE_CHECK\n"; 2108 2109 std::set<std::string> Emitted; 2110 2111 for (auto *Def : Defs) { 2112 if (Def->hasBody()) 2113 continue; 2114 // Functions which do not have an immediate do not need to have range 2115 // checking code emitted. 2116 if (!Def->hasImmediate()) 2117 continue; 2118 if (Emitted.find(Def->getMangledName()) != Emitted.end()) 2119 continue; 2120 2121 std::string LowerBound, UpperBound; 2122 2123 Record *R = Def->getRecord(); 2124 if (R->getValueAsBit("isVXAR")) { 2125 //VXAR takes an immediate in the range [0, 63] 2126 LowerBound = "0"; 2127 UpperBound = "63"; 2128 } else if (R->getValueAsBit("isVCVT_N")) { 2129 // VCVT between floating- and fixed-point values takes an immediate 2130 // in the range [1, 32) for f32 or [1, 64) for f64 or [1, 16) for f16. 2131 LowerBound = "1"; 2132 if (Def->getBaseType().getElementSizeInBits() == 16 || 2133 Def->getName().find('h') != std::string::npos) 2134 // VCVTh operating on FP16 intrinsics in range [1, 16) 2135 UpperBound = "15"; 2136 else if (Def->getBaseType().getElementSizeInBits() == 32) 2137 UpperBound = "31"; 2138 else 2139 UpperBound = "63"; 2140 } else if (R->getValueAsBit("isScalarShift")) { 2141 // Right shifts have an 'r' in the name, left shifts do not. Convert 2142 // instructions have the same bounds and right shifts. 2143 if (Def->getName().find('r') != std::string::npos || 2144 Def->getName().find("cvt") != std::string::npos) 2145 LowerBound = "1"; 2146 2147 UpperBound = utostr(Def->getReturnType().getElementSizeInBits() - 1); 2148 } else if (R->getValueAsBit("isShift")) { 2149 // Builtins which are overloaded by type will need to have their upper 2150 // bound computed at Sema time based on the type constant. 2151 2152 // Right shifts have an 'r' in the name, left shifts do not. 2153 if (Def->getName().find('r') != std::string::npos) 2154 LowerBound = "1"; 2155 UpperBound = "RFT(TV, true)"; 2156 } else if (Def->getClassKind(true) == ClassB) { 2157 // ClassB intrinsics have a type (and hence lane number) that is only 2158 // known at runtime. 2159 if (R->getValueAsBit("isLaneQ")) 2160 UpperBound = "RFT(TV, false, true)"; 2161 else 2162 UpperBound = "RFT(TV, false, false)"; 2163 } else { 2164 // The immediate generally refers to a lane in the preceding argument. 2165 assert(Def->getImmediateIdx() > 0); 2166 Type T = Def->getParamType(Def->getImmediateIdx() - 1); 2167 UpperBound = utostr(T.getNumElements() - 1); 2168 } 2169 2170 // Calculate the index of the immediate that should be range checked. 2171 unsigned Idx = Def->getNumParams(); 2172 if (Def->hasImmediate()) 2173 Idx = Def->getGeneratedParamIdx(Def->getImmediateIdx()); 2174 2175 OS << "case NEON::BI__builtin_neon_" << Def->getMangledName() << ": " 2176 << "i = " << Idx << ";"; 2177 if (!LowerBound.empty()) 2178 OS << " l = " << LowerBound << ";"; 2179 if (!UpperBound.empty()) 2180 OS << " u = " << UpperBound << ";"; 2181 OS << " break;\n"; 2182 2183 Emitted.insert(Def->getMangledName()); 2184 } 2185 2186 OS << "#endif\n\n"; 2187 } 2188 2189 /// runHeader - Emit a file with sections defining: 2190 /// 1. the NEON section of BuiltinsARM.def and BuiltinsAArch64.def. 2191 /// 2. the SemaChecking code for the type overload checking. 2192 /// 3. the SemaChecking code for validation of intrinsic immediate arguments. 2193 void NeonEmitter::runHeader(raw_ostream &OS) { 2194 std::vector<Record *> RV = Records.getAllDerivedDefinitions("Inst"); 2195 2196 SmallVector<Intrinsic *, 128> Defs; 2197 for (auto *R : RV) 2198 createIntrinsic(R, Defs); 2199 2200 // Generate shared BuiltinsXXX.def 2201 genBuiltinsDef(OS, Defs); 2202 2203 // Generate ARM overloaded type checking code for SemaChecking.cpp 2204 genOverloadTypeCheckCode(OS, Defs); 2205 2206 // Generate ARM range checking code for shift/lane immediates. 2207 genIntrinsicRangeCheckCode(OS, Defs); 2208 } 2209 2210 static void emitNeonTypeDefs(const std::string& types, raw_ostream &OS) { 2211 std::string TypedefTypes(types); 2212 std::vector<TypeSpec> TDTypeVec = TypeSpec::fromTypeSpecs(TypedefTypes); 2213 2214 // Emit vector typedefs. 2215 bool InIfdef = false; 2216 for (auto &TS : TDTypeVec) { 2217 bool IsA64 = false; 2218 Type T(TS, "."); 2219 if (T.isDouble()) 2220 IsA64 = true; 2221 2222 if (InIfdef && !IsA64) { 2223 OS << "#endif\n"; 2224 InIfdef = false; 2225 } 2226 if (!InIfdef && IsA64) { 2227 OS << "#ifdef __aarch64__\n"; 2228 InIfdef = true; 2229 } 2230 2231 if (T.isPoly()) 2232 OS << "typedef __attribute__((neon_polyvector_type("; 2233 else 2234 OS << "typedef __attribute__((neon_vector_type("; 2235 2236 Type T2 = T; 2237 T2.makeScalar(); 2238 OS << T.getNumElements() << "))) "; 2239 OS << T2.str(); 2240 OS << " " << T.str() << ";\n"; 2241 } 2242 if (InIfdef) 2243 OS << "#endif\n"; 2244 OS << "\n"; 2245 2246 // Emit struct typedefs. 2247 InIfdef = false; 2248 for (unsigned NumMembers = 2; NumMembers <= 4; ++NumMembers) { 2249 for (auto &TS : TDTypeVec) { 2250 bool IsA64 = false; 2251 Type T(TS, "."); 2252 if (T.isDouble()) 2253 IsA64 = true; 2254 2255 if (InIfdef && !IsA64) { 2256 OS << "#endif\n"; 2257 InIfdef = false; 2258 } 2259 if (!InIfdef && IsA64) { 2260 OS << "#ifdef __aarch64__\n"; 2261 InIfdef = true; 2262 } 2263 2264 const char Mods[] = { static_cast<char>('2' + (NumMembers - 2)), 0}; 2265 Type VT(TS, Mods); 2266 OS << "typedef struct " << VT.str() << " {\n"; 2267 OS << " " << T.str() << " val"; 2268 OS << "[" << NumMembers << "]"; 2269 OS << ";\n} "; 2270 OS << VT.str() << ";\n"; 2271 OS << "\n"; 2272 } 2273 } 2274 if (InIfdef) 2275 OS << "#endif\n"; 2276 } 2277 2278 /// run - Read the records in arm_neon.td and output arm_neon.h. arm_neon.h 2279 /// is comprised of type definitions and function declarations. 2280 void NeonEmitter::run(raw_ostream &OS) { 2281 OS << "/*===---- arm_neon.h - ARM Neon intrinsics " 2282 "------------------------------" 2283 "---===\n" 2284 " *\n" 2285 " * Permission is hereby granted, free of charge, to any person " 2286 "obtaining " 2287 "a copy\n" 2288 " * of this software and associated documentation files (the " 2289 "\"Software\")," 2290 " to deal\n" 2291 " * in the Software without restriction, including without limitation " 2292 "the " 2293 "rights\n" 2294 " * to use, copy, modify, merge, publish, distribute, sublicense, " 2295 "and/or sell\n" 2296 " * copies of the Software, and to permit persons to whom the Software " 2297 "is\n" 2298 " * furnished to do so, subject to the following conditions:\n" 2299 " *\n" 2300 " * The above copyright notice and this permission notice shall be " 2301 "included in\n" 2302 " * all copies or substantial portions of the Software.\n" 2303 " *\n" 2304 " * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, " 2305 "EXPRESS OR\n" 2306 " * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF " 2307 "MERCHANTABILITY,\n" 2308 " * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT " 2309 "SHALL THE\n" 2310 " * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR " 2311 "OTHER\n" 2312 " * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, " 2313 "ARISING FROM,\n" 2314 " * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER " 2315 "DEALINGS IN\n" 2316 " * THE SOFTWARE.\n" 2317 " *\n" 2318 " *===-----------------------------------------------------------------" 2319 "---" 2320 "---===\n" 2321 " */\n\n"; 2322 2323 OS << "#ifndef __ARM_NEON_H\n"; 2324 OS << "#define __ARM_NEON_H\n\n"; 2325 2326 OS << "#ifndef __ARM_FP\n"; 2327 OS << "#error \"NEON intrinsics not available with the soft-float ABI. " 2328 "Please use -mfloat-abi=softfp or -mfloat-abi=hard\"\n"; 2329 OS << "#else\n\n"; 2330 2331 OS << "#if !defined(__ARM_NEON)\n"; 2332 OS << "#error \"NEON support not enabled\"\n"; 2333 OS << "#else\n\n"; 2334 2335 OS << "#include <stdint.h>\n\n"; 2336 2337 OS << "#ifdef __ARM_FEATURE_BF16\n"; 2338 OS << "#include <arm_bf16.h>\n"; 2339 OS << "typedef __bf16 bfloat16_t;\n"; 2340 OS << "#endif\n\n"; 2341 2342 // Emit NEON-specific scalar typedefs. 2343 OS << "typedef float float32_t;\n"; 2344 OS << "typedef __fp16 float16_t;\n"; 2345 2346 OS << "#ifdef __aarch64__\n"; 2347 OS << "typedef double float64_t;\n"; 2348 OS << "#endif\n\n"; 2349 2350 // For now, signedness of polynomial types depends on target 2351 OS << "#ifdef __aarch64__\n"; 2352 OS << "typedef uint8_t poly8_t;\n"; 2353 OS << "typedef uint16_t poly16_t;\n"; 2354 OS << "typedef uint64_t poly64_t;\n"; 2355 OS << "typedef __uint128_t poly128_t;\n"; 2356 OS << "#else\n"; 2357 OS << "typedef int8_t poly8_t;\n"; 2358 OS << "typedef int16_t poly16_t;\n"; 2359 OS << "typedef int64_t poly64_t;\n"; 2360 OS << "#endif\n"; 2361 2362 emitNeonTypeDefs("cQcsQsiQilQlUcQUcUsQUsUiQUiUlQUlhQhfQfdQdPcQPcPsQPsPlQPl", OS); 2363 2364 OS << "#ifdef __ARM_FEATURE_BF16\n"; 2365 emitNeonTypeDefs("bQb", OS); 2366 OS << "#endif\n\n"; 2367 2368 OS << "#define __ai static __inline__ __attribute__((__always_inline__, " 2369 "__nodebug__))\n\n"; 2370 2371 SmallVector<Intrinsic *, 128> Defs; 2372 std::vector<Record *> RV = Records.getAllDerivedDefinitions("Inst"); 2373 for (auto *R : RV) 2374 createIntrinsic(R, Defs); 2375 2376 for (auto *I : Defs) 2377 I->indexBody(); 2378 2379 llvm::stable_sort(Defs, llvm::deref<std::less<>>()); 2380 2381 // Only emit a def when its requirements have been met. 2382 // FIXME: This loop could be made faster, but it's fast enough for now. 2383 bool MadeProgress = true; 2384 std::string InGuard; 2385 while (!Defs.empty() && MadeProgress) { 2386 MadeProgress = false; 2387 2388 for (SmallVector<Intrinsic *, 128>::iterator I = Defs.begin(); 2389 I != Defs.end(); /*No step*/) { 2390 bool DependenciesSatisfied = true; 2391 for (auto *II : (*I)->getDependencies()) { 2392 if (llvm::is_contained(Defs, II)) 2393 DependenciesSatisfied = false; 2394 } 2395 if (!DependenciesSatisfied) { 2396 // Try the next one. 2397 ++I; 2398 continue; 2399 } 2400 2401 // Emit #endif/#if pair if needed. 2402 if ((*I)->getGuard() != InGuard) { 2403 if (!InGuard.empty()) 2404 OS << "#endif\n"; 2405 InGuard = (*I)->getGuard(); 2406 if (!InGuard.empty()) 2407 OS << "#if " << InGuard << "\n"; 2408 } 2409 2410 // Actually generate the intrinsic code. 2411 OS << (*I)->generate(); 2412 2413 MadeProgress = true; 2414 I = Defs.erase(I); 2415 } 2416 } 2417 assert(Defs.empty() && "Some requirements were not satisfied!"); 2418 if (!InGuard.empty()) 2419 OS << "#endif\n"; 2420 2421 OS << "\n"; 2422 OS << "#undef __ai\n\n"; 2423 OS << "#endif /* if !defined(__ARM_NEON) */\n"; 2424 OS << "#endif /* ifndef __ARM_FP */\n"; 2425 OS << "#endif /* __ARM_NEON_H */\n"; 2426 } 2427 2428 /// run - Read the records in arm_fp16.td and output arm_fp16.h. arm_fp16.h 2429 /// is comprised of type definitions and function declarations. 2430 void NeonEmitter::runFP16(raw_ostream &OS) { 2431 OS << "/*===---- arm_fp16.h - ARM FP16 intrinsics " 2432 "------------------------------" 2433 "---===\n" 2434 " *\n" 2435 " * Permission is hereby granted, free of charge, to any person " 2436 "obtaining a copy\n" 2437 " * of this software and associated documentation files (the " 2438 "\"Software\"), to deal\n" 2439 " * in the Software without restriction, including without limitation " 2440 "the rights\n" 2441 " * to use, copy, modify, merge, publish, distribute, sublicense, " 2442 "and/or sell\n" 2443 " * copies of the Software, and to permit persons to whom the Software " 2444 "is\n" 2445 " * furnished to do so, subject to the following conditions:\n" 2446 " *\n" 2447 " * The above copyright notice and this permission notice shall be " 2448 "included in\n" 2449 " * all copies or substantial portions of the Software.\n" 2450 " *\n" 2451 " * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, " 2452 "EXPRESS OR\n" 2453 " * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF " 2454 "MERCHANTABILITY,\n" 2455 " * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT " 2456 "SHALL THE\n" 2457 " * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR " 2458 "OTHER\n" 2459 " * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, " 2460 "ARISING FROM,\n" 2461 " * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER " 2462 "DEALINGS IN\n" 2463 " * THE SOFTWARE.\n" 2464 " *\n" 2465 " *===-----------------------------------------------------------------" 2466 "---" 2467 "---===\n" 2468 " */\n\n"; 2469 2470 OS << "#ifndef __ARM_FP16_H\n"; 2471 OS << "#define __ARM_FP16_H\n\n"; 2472 2473 OS << "#include <stdint.h>\n\n"; 2474 2475 OS << "typedef __fp16 float16_t;\n"; 2476 2477 OS << "#define __ai static __inline__ __attribute__((__always_inline__, " 2478 "__nodebug__))\n\n"; 2479 2480 SmallVector<Intrinsic *, 128> Defs; 2481 std::vector<Record *> RV = Records.getAllDerivedDefinitions("Inst"); 2482 for (auto *R : RV) 2483 createIntrinsic(R, Defs); 2484 2485 for (auto *I : Defs) 2486 I->indexBody(); 2487 2488 llvm::stable_sort(Defs, llvm::deref<std::less<>>()); 2489 2490 // Only emit a def when its requirements have been met. 2491 // FIXME: This loop could be made faster, but it's fast enough for now. 2492 bool MadeProgress = true; 2493 std::string InGuard; 2494 while (!Defs.empty() && MadeProgress) { 2495 MadeProgress = false; 2496 2497 for (SmallVector<Intrinsic *, 128>::iterator I = Defs.begin(); 2498 I != Defs.end(); /*No step*/) { 2499 bool DependenciesSatisfied = true; 2500 for (auto *II : (*I)->getDependencies()) { 2501 if (llvm::is_contained(Defs, II)) 2502 DependenciesSatisfied = false; 2503 } 2504 if (!DependenciesSatisfied) { 2505 // Try the next one. 2506 ++I; 2507 continue; 2508 } 2509 2510 // Emit #endif/#if pair if needed. 2511 if ((*I)->getGuard() != InGuard) { 2512 if (!InGuard.empty()) 2513 OS << "#endif\n"; 2514 InGuard = (*I)->getGuard(); 2515 if (!InGuard.empty()) 2516 OS << "#if " << InGuard << "\n"; 2517 } 2518 2519 // Actually generate the intrinsic code. 2520 OS << (*I)->generate(); 2521 2522 MadeProgress = true; 2523 I = Defs.erase(I); 2524 } 2525 } 2526 assert(Defs.empty() && "Some requirements were not satisfied!"); 2527 if (!InGuard.empty()) 2528 OS << "#endif\n"; 2529 2530 OS << "\n"; 2531 OS << "#undef __ai\n\n"; 2532 OS << "#endif /* __ARM_FP16_H */\n"; 2533 } 2534 2535 void NeonEmitter::runBF16(raw_ostream &OS) { 2536 OS << "/*===---- arm_bf16.h - ARM BF16 intrinsics " 2537 "-----------------------------------===\n" 2538 " *\n" 2539 " *\n" 2540 " * Part of the LLVM Project, under the Apache License v2.0 with LLVM " 2541 "Exceptions.\n" 2542 " * See https://llvm.org/LICENSE.txt for license information.\n" 2543 " * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception\n" 2544 " *\n" 2545 " *===-----------------------------------------------------------------" 2546 "------===\n" 2547 " */\n\n"; 2548 2549 OS << "#ifndef __ARM_BF16_H\n"; 2550 OS << "#define __ARM_BF16_H\n\n"; 2551 2552 OS << "typedef __bf16 bfloat16_t;\n"; 2553 2554 OS << "#define __ai static __inline__ __attribute__((__always_inline__, " 2555 "__nodebug__))\n\n"; 2556 2557 SmallVector<Intrinsic *, 128> Defs; 2558 std::vector<Record *> RV = Records.getAllDerivedDefinitions("Inst"); 2559 for (auto *R : RV) 2560 createIntrinsic(R, Defs); 2561 2562 for (auto *I : Defs) 2563 I->indexBody(); 2564 2565 llvm::stable_sort(Defs, llvm::deref<std::less<>>()); 2566 2567 // Only emit a def when its requirements have been met. 2568 // FIXME: This loop could be made faster, but it's fast enough for now. 2569 bool MadeProgress = true; 2570 std::string InGuard; 2571 while (!Defs.empty() && MadeProgress) { 2572 MadeProgress = false; 2573 2574 for (SmallVector<Intrinsic *, 128>::iterator I = Defs.begin(); 2575 I != Defs.end(); /*No step*/) { 2576 bool DependenciesSatisfied = true; 2577 for (auto *II : (*I)->getDependencies()) { 2578 if (llvm::is_contained(Defs, II)) 2579 DependenciesSatisfied = false; 2580 } 2581 if (!DependenciesSatisfied) { 2582 // Try the next one. 2583 ++I; 2584 continue; 2585 } 2586 2587 // Emit #endif/#if pair if needed. 2588 if ((*I)->getGuard() != InGuard) { 2589 if (!InGuard.empty()) 2590 OS << "#endif\n"; 2591 InGuard = (*I)->getGuard(); 2592 if (!InGuard.empty()) 2593 OS << "#if " << InGuard << "\n"; 2594 } 2595 2596 // Actually generate the intrinsic code. 2597 OS << (*I)->generate(); 2598 2599 MadeProgress = true; 2600 I = Defs.erase(I); 2601 } 2602 } 2603 assert(Defs.empty() && "Some requirements were not satisfied!"); 2604 if (!InGuard.empty()) 2605 OS << "#endif\n"; 2606 2607 OS << "\n"; 2608 OS << "#undef __ai\n\n"; 2609 2610 OS << "#endif\n"; 2611 } 2612 2613 void clang::EmitNeon(RecordKeeper &Records, raw_ostream &OS) { 2614 NeonEmitter(Records).run(OS); 2615 } 2616 2617 void clang::EmitFP16(RecordKeeper &Records, raw_ostream &OS) { 2618 NeonEmitter(Records).runFP16(OS); 2619 } 2620 2621 void clang::EmitBF16(RecordKeeper &Records, raw_ostream &OS) { 2622 NeonEmitter(Records).runBF16(OS); 2623 } 2624 2625 void clang::EmitNeonSema(RecordKeeper &Records, raw_ostream &OS) { 2626 NeonEmitter(Records).runHeader(OS); 2627 } 2628 2629 void clang::EmitNeonTest(RecordKeeper &Records, raw_ostream &OS) { 2630 llvm_unreachable("Neon test generation no longer implemented!"); 2631 } 2632