xref: /freebsd/contrib/llvm-project/clang/utils/TableGen/NeonEmitter.cpp (revision 700637cbb5e582861067a11aaca4d053546871d2)
1 //===-- NeonEmitter.cpp - Generate arm_neon.h for use with clang ----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This tablegen backend is responsible for emitting arm_neon.h, which includes
10 // a declaration and definition of each function specified by the ARM NEON
11 // compiler interface.  See ARM document DUI0348B.
12 //
13 // Each NEON instruction is implemented in terms of 1 or more functions which
14 // are suffixed with the element type of the input vectors.  Functions may be
15 // implemented in terms of generic vector operations such as +, *, -, etc. or
16 // by calling a __builtin_-prefixed function which will be handled by clang's
17 // CodeGen library.
18 //
19 // Additional validation code can be generated by this file when runHeader() is
20 // called, rather than the normal run() entry point.
21 //
22 // See also the documentation in include/clang/Basic/arm_neon.td.
23 //
24 //===----------------------------------------------------------------------===//
25 
26 #include "TableGenBackends.h"
27 #include "llvm/ADT/ArrayRef.h"
28 #include "llvm/ADT/DenseMap.h"
29 #include "llvm/ADT/STLExtras.h"
30 #include "llvm/ADT/SmallVector.h"
31 #include "llvm/ADT/StringExtras.h"
32 #include "llvm/ADT/StringRef.h"
33 #include "llvm/Support/Casting.h"
34 #include "llvm/Support/ErrorHandling.h"
35 #include "llvm/Support/raw_ostream.h"
36 #include "llvm/TableGen/AArch64ImmCheck.h"
37 #include "llvm/TableGen/Error.h"
38 #include "llvm/TableGen/Record.h"
39 #include "llvm/TableGen/SetTheory.h"
40 #include "llvm/TableGen/StringToOffsetTable.h"
41 #include <algorithm>
42 #include <cassert>
43 #include <cctype>
44 #include <cstddef>
45 #include <cstdint>
46 #include <deque>
47 #include <map>
48 #include <optional>
49 #include <set>
50 #include <sstream>
51 #include <string>
52 #include <unordered_map>
53 #include <utility>
54 #include <vector>
55 
56 using namespace llvm;
57 
58 namespace {
59 
60 // While globals are generally bad, this one allows us to perform assertions
61 // liberally and somehow still trace them back to the def they indirectly
62 // came from.
63 static const Record *CurrentRecord = nullptr;
assert_with_loc(bool Assertion,const std::string & Str)64 static void assert_with_loc(bool Assertion, const std::string &Str) {
65   if (!Assertion) {
66     if (CurrentRecord)
67       PrintFatalError(CurrentRecord->getLoc(), Str);
68     else
69       PrintFatalError(Str);
70   }
71 }
72 
73 enum ClassKind {
74   ClassNone,
75   ClassI,     // generic integer instruction, e.g., "i8" suffix
76   ClassS,     // signed/unsigned/poly, e.g., "s8", "u8" or "p8" suffix
77   ClassW,     // width-specific instruction, e.g., "8" suffix
78   ClassV,     // void-suffix instruction, no suffix
79   ClassB,     // bitcast arguments with enum argument to specify type
80   ClassL,     // Logical instructions which are op instructions
81               // but we need to not emit any suffix for in our
82               // tests.
83   ClassNoTest // Instructions which we do not test since they are
84               // not TRUE instructions.
85 };
86 
87 /// NeonTypeFlags - Flags to identify the types for overloaded Neon
88 /// builtins.  These must be kept in sync with the flags in
89 /// include/clang/Basic/TargetBuiltins.h.
90 namespace NeonTypeFlags {
91 
92 enum { EltTypeMask = 0xf, UnsignedFlag = 0x10, QuadFlag = 0x20 };
93 
94 enum EltType {
95   Int8,
96   Int16,
97   Int32,
98   Int64,
99   Poly8,
100   Poly16,
101   Poly64,
102   Poly128,
103   Float16,
104   Float32,
105   Float64,
106   BFloat16,
107   MFloat8
108 };
109 
110 } // end namespace NeonTypeFlags
111 
112 class NeonEmitter;
113 
114 //===----------------------------------------------------------------------===//
115 // TypeSpec
116 //===----------------------------------------------------------------------===//
117 
118 /// A TypeSpec is just a simple wrapper around a string, but gets its own type
119 /// for strong typing purposes.
120 ///
121 /// A TypeSpec can be used to create a type.
122 class TypeSpec : public std::string {
123 public:
fromTypeSpecs(StringRef Str)124   static std::vector<TypeSpec> fromTypeSpecs(StringRef Str) {
125     std::vector<TypeSpec> Ret;
126     TypeSpec Acc;
127     for (char I : Str.str()) {
128       if (islower(I)) {
129         Acc.push_back(I);
130         Ret.push_back(TypeSpec(Acc));
131         Acc.clear();
132       } else {
133         Acc.push_back(I);
134       }
135     }
136     return Ret;
137   }
138 };
139 
140 //===----------------------------------------------------------------------===//
141 // Type
142 //===----------------------------------------------------------------------===//
143 
144 /// A Type. Not much more to say here.
145 class Type {
146 private:
147   TypeSpec TS;
148 
149   enum TypeKind { Void, Float, SInt, UInt, Poly, BFloat16, MFloat8, FPM };
150   TypeKind Kind;
151   bool Immediate, Constant, Pointer;
152   // ScalarForMangling and NoManglingQ are really not suited to live here as
153   // they are not related to the type. But they live in the TypeSpec (not the
154   // prototype), so this is really the only place to store them.
155   bool ScalarForMangling, NoManglingQ;
156   unsigned Bitwidth, ElementBitwidth, NumVectors;
157 
158 public:
Type()159   Type()
160       : Kind(Void), Immediate(false), Constant(false),
161         Pointer(false), ScalarForMangling(false), NoManglingQ(false),
162         Bitwidth(0), ElementBitwidth(0), NumVectors(0) {}
163 
Type(TypeSpec TS,StringRef CharMods)164   Type(TypeSpec TS, StringRef CharMods)
165       : TS(std::move(TS)), Kind(Void), Immediate(false),
166         Constant(false), Pointer(false), ScalarForMangling(false),
167         NoManglingQ(false), Bitwidth(0), ElementBitwidth(0), NumVectors(0) {
168     applyModifiers(CharMods);
169   }
170 
171   /// Returns a type representing "void".
getVoid()172   static Type getVoid() { return Type(); }
173 
operator ==(const Type & Other) const174   bool operator==(const Type &Other) const { return str() == Other.str(); }
operator !=(const Type & Other) const175   bool operator!=(const Type &Other) const { return !operator==(Other); }
176 
177   //
178   // Query functions
179   //
isScalarForMangling() const180   bool isScalarForMangling() const { return ScalarForMangling; }
noManglingQ() const181   bool noManglingQ() const { return NoManglingQ; }
182 
isPointer() const183   bool isPointer() const { return Pointer; }
isValue() const184   bool isValue() const { return !isVoid() && !isPointer(); }
isScalar() const185   bool isScalar() const { return isValue() && NumVectors == 0; }
isVector() const186   bool isVector() const { return isValue() && NumVectors > 0; }
isConstPointer() const187   bool isConstPointer() const { return Constant; }
isFloating() const188   bool isFloating() const { return Kind == Float; }
isInteger() const189   bool isInteger() const { return Kind == SInt || Kind == UInt; }
isPoly() const190   bool isPoly() const { return Kind == Poly; }
isSigned() const191   bool isSigned() const { return Kind == SInt; }
isImmediate() const192   bool isImmediate() const { return Immediate; }
isFloat() const193   bool isFloat() const { return isFloating() && ElementBitwidth == 32; }
isDouble() const194   bool isDouble() const { return isFloating() && ElementBitwidth == 64; }
isHalf() const195   bool isHalf() const { return isFloating() && ElementBitwidth == 16; }
isChar() const196   bool isChar() const { return ElementBitwidth == 8; }
isShort() const197   bool isShort() const { return isInteger() && ElementBitwidth == 16; }
isInt() const198   bool isInt() const { return isInteger() && ElementBitwidth == 32; }
isLong() const199   bool isLong() const { return isInteger() && ElementBitwidth == 64; }
isVoid() const200   bool isVoid() const { return Kind == Void; }
isBFloat16() const201   bool isBFloat16() const { return Kind == BFloat16; }
isMFloat8() const202   bool isMFloat8() const { return Kind == MFloat8; }
isFPM() const203   bool isFPM() const { return Kind == FPM; }
getNumElements() const204   unsigned getNumElements() const { return Bitwidth / ElementBitwidth; }
getSizeInBits() const205   unsigned getSizeInBits() const { return Bitwidth; }
getElementSizeInBits() const206   unsigned getElementSizeInBits() const { return ElementBitwidth; }
getNumVectors() const207   unsigned getNumVectors() const { return NumVectors; }
208 
209   //
210   // Mutator functions
211   //
makeUnsigned()212   void makeUnsigned() {
213     assert(!isVoid() && "not a potentially signed type");
214     Kind = UInt;
215   }
makeSigned()216   void makeSigned() {
217     assert(!isVoid() && "not a potentially signed type");
218     Kind = SInt;
219   }
220 
makeInteger(unsigned ElemWidth,bool Sign)221   void makeInteger(unsigned ElemWidth, bool Sign) {
222     assert(!isVoid() && "converting void to int probably not useful");
223     Kind = Sign ? SInt : UInt;
224     Immediate = false;
225     ElementBitwidth = ElemWidth;
226   }
227 
makeImmediate(unsigned ElemWidth)228   void makeImmediate(unsigned ElemWidth) {
229     Kind = SInt;
230     Immediate = true;
231     ElementBitwidth = ElemWidth;
232   }
233 
makeScalar()234   void makeScalar() {
235     Bitwidth = ElementBitwidth;
236     NumVectors = 0;
237   }
238 
makeOneVector()239   void makeOneVector() {
240     assert(isVector());
241     NumVectors = 1;
242   }
243 
make32BitElement()244   void make32BitElement() {
245     assert_with_loc(Bitwidth > 32, "Not enough bits to make it 32!");
246     ElementBitwidth = 32;
247   }
248 
doubleLanes()249   void doubleLanes() {
250     assert_with_loc(Bitwidth != 128, "Can't get bigger than 128!");
251     Bitwidth = 128;
252   }
253 
halveLanes()254   void halveLanes() {
255     assert_with_loc(Bitwidth != 64, "Can't get smaller than 64!");
256     Bitwidth = 64;
257   }
258 
259   /// Return the C string representation of a type, which is the typename
260   /// defined in stdint.h or arm_neon.h.
261   std::string str() const;
262 
263   /// Return the string representation of a type, which is an encoded
264   /// string for passing to the BUILTIN() macro in Builtins.def.
265   std::string builtin_str() const;
266 
267   /// Return the value in NeonTypeFlags for this type.
268   unsigned getNeonEnum() const;
269 
270   /// Parse a type from a stdint.h or arm_neon.h typedef name,
271   /// for example uint32x2_t or int64_t.
272   static Type fromTypedefName(StringRef Name);
273 
274 private:
275   /// Creates the type based on the typespec string in TS.
276   /// Sets "Quad" to true if the "Q" or "H" modifiers were
277   /// seen. This is needed by applyModifier as some modifiers
278   /// only take effect if the type size was changed by "Q" or "H".
279   void applyTypespec(bool &Quad);
280   /// Applies prototype modifiers to the type.
281   void applyModifiers(StringRef Mods);
282 };
283 
284 //===----------------------------------------------------------------------===//
285 // Variable
286 //===----------------------------------------------------------------------===//
287 
288 /// A variable is a simple class that just has a type and a name.
289 class Variable {
290   Type T;
291   std::string N;
292 
293 public:
Variable()294   Variable() : T(Type::getVoid()) {}
Variable(Type T,std::string N)295   Variable(Type T, std::string N) : T(std::move(T)), N(std::move(N)) {}
296 
getType() const297   Type getType() const { return T; }
getName() const298   std::string getName() const { return "__" + N; }
299 };
300 
301 //===----------------------------------------------------------------------===//
302 // Intrinsic
303 //===----------------------------------------------------------------------===//
304 
305 /// The main grunt class. This represents an instantiation of an intrinsic with
306 /// a particular typespec and prototype.
307 class Intrinsic {
308   /// The Record this intrinsic was created from.
309   const Record *R;
310   /// The unmangled name.
311   std::string Name;
312   /// The input and output typespecs. InTS == OutTS except when
313   /// CartesianProductWith is non-empty - this is the case for vreinterpret.
314   TypeSpec OutTS, InTS;
315   /// The base class kind. Most intrinsics use ClassS, which has full type
316   /// info for integers (s32/u32). Some use ClassI, which doesn't care about
317   /// signedness (i32), while some (ClassB) have no type at all, only a width
318   /// (32).
319   ClassKind CK;
320   /// The list of DAGs for the body. May be empty, in which case we should
321   /// emit a builtin call.
322   const ListInit *Body;
323   /// The architectural ifdef guard.
324   std::string ArchGuard;
325   /// The architectural target() guard.
326   std::string TargetGuard;
327   /// Set if the Unavailable bit is 1. This means we don't generate a body,
328   /// just an "unavailable" attribute on a declaration.
329   bool IsUnavailable;
330   /// Is this intrinsic safe for big-endian? or does it need its arguments
331   /// reversing?
332   bool BigEndianSafe;
333 
334   /// The types of return value [0] and parameters [1..].
335   std::vector<Type> Types;
336 
337   SmallVector<ImmCheck, 2> ImmChecks;
338   /// The index of the key type passed to CGBuiltin.cpp for polymorphic calls.
339   int PolymorphicKeyType;
340   /// The local variables defined.
341   std::map<std::string, Variable, std::less<>> Variables;
342   /// NeededEarly - set if any other intrinsic depends on this intrinsic.
343   bool NeededEarly;
344   /// UseMacro - set if we should implement using a macro or unset for a
345   ///            function.
346   bool UseMacro;
347   /// The set of intrinsics that this intrinsic uses/requires.
348   std::set<Intrinsic *> Dependencies;
349   /// The "base type", which is Type('d', OutTS). InBaseType is only
350   /// different if CartesianProductWith is non-empty (for vreinterpret).
351   Type BaseType, InBaseType;
352   /// The return variable.
353   Variable RetVar;
354   /// A postfix to apply to every variable. Defaults to "".
355   std::string VariablePostfix;
356 
357   NeonEmitter &Emitter;
358   std::stringstream OS;
359 
isBigEndianSafe() const360   bool isBigEndianSafe() const {
361     if (BigEndianSafe)
362       return true;
363 
364     for (const auto &T : Types){
365       if (T.isVector() && T.getNumElements() > 1)
366         return false;
367     }
368     return true;
369   }
370 
371 public:
Intrinsic(const Record * R,StringRef Name,StringRef Proto,TypeSpec OutTS,TypeSpec InTS,ClassKind CK,const ListInit * Body,NeonEmitter & Emitter,StringRef ArchGuard,StringRef TargetGuard,bool IsUnavailable,bool BigEndianSafe)372   Intrinsic(const Record *R, StringRef Name, StringRef Proto, TypeSpec OutTS,
373             TypeSpec InTS, ClassKind CK, const ListInit *Body,
374             NeonEmitter &Emitter, StringRef ArchGuard, StringRef TargetGuard,
375             bool IsUnavailable, bool BigEndianSafe)
376       : R(R), Name(Name.str()), OutTS(OutTS), InTS(InTS), CK(CK), Body(Body),
377         ArchGuard(ArchGuard.str()), TargetGuard(TargetGuard.str()),
378         IsUnavailable(IsUnavailable), BigEndianSafe(BigEndianSafe),
379         PolymorphicKeyType(0), NeededEarly(false), UseMacro(false),
380         BaseType(OutTS, "."), InBaseType(InTS, "."), Emitter(Emitter) {
381     // Modify the TypeSpec per-argument to get a concrete Type, and create
382     // known variables for each.
383     // Types[0] is the return value.
384     unsigned Pos = 0;
385     Types.emplace_back(OutTS, getNextModifiers(Proto, Pos));
386     StringRef Mods = getNextModifiers(Proto, Pos);
387     while (!Mods.empty()) {
388       Types.emplace_back(InTS, Mods);
389       if (Mods.contains('!'))
390         PolymorphicKeyType = Types.size() - 1;
391 
392       Mods = getNextModifiers(Proto, Pos);
393     }
394 
395     for (const auto &Type : Types) {
396       // If this builtin takes an immediate argument, we need to #define it rather
397       // than use a standard declaration, so that SemaChecking can range check
398       // the immediate passed by the user.
399 
400       // Pointer arguments need to use macros to avoid hiding aligned attributes
401       // from the pointer type.
402 
403       // It is not permitted to pass or return an __fp16 by value, so intrinsics
404       // taking a scalar float16_t must be implemented as macros.
405       if (Type.isImmediate() || Type.isPointer() ||
406           (Type.isScalar() && Type.isHalf()))
407         UseMacro = true;
408     }
409 
410     int ArgIdx, Kind, TypeArgIdx;
411     for (const Record *I : R->getValueAsListOfDefs("ImmChecks")) {
412       unsigned EltSizeInBits = 0, VecSizeInBits = 0;
413 
414       ArgIdx = I->getValueAsInt("ImmArgIdx");
415       TypeArgIdx = I->getValueAsInt("TypeContextArgIdx");
416       Kind = I->getValueAsDef("Kind")->getValueAsInt("Value");
417 
418       assert((ArgIdx >= 0 && Kind >= 0) &&
419              "ImmArgIdx and Kind must be nonnegative");
420 
421       if (TypeArgIdx >= 0) {
422         Type ContextType = getParamType(TypeArgIdx);
423 
424         // Element size cannot be set for intrinscs that map to polymorphic
425         // builtins.
426         if (CK != ClassB)
427           EltSizeInBits = ContextType.getElementSizeInBits();
428 
429         VecSizeInBits = ContextType.getSizeInBits();
430       }
431 
432       ImmChecks.emplace_back(ArgIdx, Kind, EltSizeInBits, VecSizeInBits);
433     }
434     sort(ImmChecks.begin(), ImmChecks.end(),
435          [](const ImmCheck &a, const ImmCheck &b) {
436            return a.getImmArgIdx() < b.getImmArgIdx();
437          }); // Sort for comparison with other intrinsics which map to the
438              // same builtin
439   }
440 
441   /// Get the Record that this intrinsic is based off.
getRecord() const442   const Record *getRecord() const { return R; }
443   /// Get the set of Intrinsics that this intrinsic calls.
444   /// this is the set of immediate dependencies, NOT the
445   /// transitive closure.
getDependencies() const446   const std::set<Intrinsic *> &getDependencies() const { return Dependencies; }
447   /// Get the architectural guard string (#ifdef).
getArchGuard() const448   std::string getArchGuard() const { return ArchGuard; }
getTargetGuard() const449   std::string getTargetGuard() const { return TargetGuard; }
getImmChecks() const450   ArrayRef<ImmCheck> getImmChecks() const { return ImmChecks; }
451   /// Get the non-mangled name.
getName() const452   std::string getName() const { return Name; }
453 
454   /// Return true if the intrinsic takes an immediate operand.
hasImmediate() const455   bool hasImmediate() const {
456     return any_of(Types, [](const Type &T) { return T.isImmediate(); });
457   }
458 
459   // Return if the supplied argument is an immediate
isArgImmediate(unsigned idx) const460   bool isArgImmediate(unsigned idx) const {
461     return Types[idx + 1].isImmediate();
462   }
463 
getNumParams() const464   unsigned getNumParams() const { return Types.size() - 1; }
getReturnType() const465   Type getReturnType() const { return Types[0]; }
getParamType(unsigned I) const466   Type getParamType(unsigned I) const { return Types[I + 1]; }
getBaseType() const467   Type getBaseType() const { return BaseType; }
getPolymorphicKeyType() const468   Type getPolymorphicKeyType() const { return Types[PolymorphicKeyType]; }
469 
470   /// Return true if the prototype has a scalar argument.
471   bool protoHasScalar() const;
472 
473   /// Return the index that parameter PIndex will sit at
474   /// in a generated function call. This is often just PIndex,
475   /// but may not be as things such as multiple-vector operands
476   /// and sret parameters need to be taken into account.
getGeneratedParamIdx(unsigned PIndex)477   unsigned getGeneratedParamIdx(unsigned PIndex) {
478     unsigned Idx = 0;
479     if (getReturnType().getNumVectors() > 1)
480       // Multiple vectors are passed as sret.
481       ++Idx;
482 
483     for (unsigned I = 0; I < PIndex; ++I)
484       Idx += std::max(1U, getParamType(I).getNumVectors());
485 
486     return Idx;
487   }
488 
hasBody() const489   bool hasBody() const { return Body && !Body->empty(); }
490 
setNeededEarly()491   void setNeededEarly() { NeededEarly = true; }
492 
operator <(const Intrinsic & Other) const493   bool operator<(const Intrinsic &Other) const {
494     // Sort lexicographically on a three-tuple (ArchGuard, TargetGuard, Name)
495     return std::tie(ArchGuard, TargetGuard, Name) <
496            std::tie(Other.ArchGuard, Other.TargetGuard, Other.Name);
497   }
498 
getClassKind(bool UseClassBIfScalar=false)499   ClassKind getClassKind(bool UseClassBIfScalar = false) {
500     if (UseClassBIfScalar && !protoHasScalar())
501       return ClassB;
502     return CK;
503   }
504 
505   /// Return the name, mangled with type information.
506   /// If ForceClassS is true, use ClassS (u32/s32) instead
507   /// of the intrinsic's own type class.
508   std::string getMangledName(bool ForceClassS = false) const;
509   /// Return the type code for a builtin function call.
510   std::string getInstTypeCode(Type T, ClassKind CK) const;
511   /// Return the type string for a BUILTIN() macro in Builtins.def.
512   std::string getBuiltinTypeStr();
513 
514   /// Generate the intrinsic, returning code.
515   std::string generate();
516   /// Perform type checking and populate the dependency graph, but
517   /// don't generate code yet.
518   void indexBody();
519 
520 private:
521   StringRef getNextModifiers(StringRef Proto, unsigned &Pos) const;
522 
523   std::string mangleName(std::string Name, ClassKind CK) const;
524 
525   void initVariables();
526   std::string replaceParamsIn(std::string S);
527 
528   void emitBodyAsBuiltinCall();
529 
530   void generateImpl(bool ReverseArguments,
531                     StringRef NamePrefix, StringRef CallPrefix);
532   void emitReturn();
533   void emitBody(StringRef CallPrefix);
534   void emitShadowedArgs();
535   void emitArgumentReversal();
536   void emitReturnVarDecl();
537   void emitReturnReversal();
538   void emitReverseVariable(Variable &Dest, Variable &Src);
539   void emitNewLine();
540   void emitClosingBrace();
541   void emitOpeningBrace();
542   void emitPrototype(StringRef NamePrefix);
543 
544   class DagEmitter {
545     Intrinsic &Intr;
546     StringRef CallPrefix;
547 
548   public:
DagEmitter(Intrinsic & Intr,StringRef CallPrefix)549     DagEmitter(Intrinsic &Intr, StringRef CallPrefix) :
550       Intr(Intr), CallPrefix(CallPrefix) {
551     }
552     std::pair<Type, std::string> emitDagArg(const Init *Arg,
553                                             std::string ArgName);
554     std::pair<Type, std::string> emitDagSaveTemp(const DagInit *DI);
555     std::pair<Type, std::string> emitDagSplat(const DagInit *DI);
556     std::pair<Type, std::string> emitDagDup(const DagInit *DI);
557     std::pair<Type, std::string> emitDagDupTyped(const DagInit *DI);
558     std::pair<Type, std::string> emitDagShuffle(const DagInit *DI);
559     std::pair<Type, std::string> emitDagCast(const DagInit *DI, bool IsBitCast);
560     std::pair<Type, std::string> emitDagCall(const DagInit *DI,
561                                              bool MatchMangledName);
562     std::pair<Type, std::string> emitDagNameReplace(const DagInit *DI);
563     std::pair<Type, std::string> emitDagLiteral(const DagInit *DI);
564     std::pair<Type, std::string> emitDagOp(const DagInit *DI);
565     std::pair<Type, std::string> emitDag(const DagInit *DI);
566   };
567 };
568 
569 //===----------------------------------------------------------------------===//
570 // NeonEmitter
571 //===----------------------------------------------------------------------===//
572 
573 class NeonEmitter {
574   const RecordKeeper &Records;
575   DenseMap<const Record *, ClassKind> ClassMap;
576   std::map<std::string, std::deque<Intrinsic>, std::less<>> IntrinsicMap;
577   unsigned UniqueNumber;
578 
579   void createIntrinsic(const Record *R, SmallVectorImpl<Intrinsic *> &Out);
580   void genBuiltinsDef(raw_ostream &OS, SmallVectorImpl<Intrinsic *> &Defs);
581   void genStreamingSVECompatibleList(raw_ostream &OS,
582                                      SmallVectorImpl<Intrinsic *> &Defs);
583   void genOverloadTypeCheckCode(raw_ostream &OS,
584                                 SmallVectorImpl<Intrinsic *> &Defs);
585   bool areRangeChecksCompatible(const ArrayRef<ImmCheck> ChecksA,
586                                 const ArrayRef<ImmCheck> ChecksB);
587   void genIntrinsicRangeCheckCode(raw_ostream &OS,
588                                   SmallVectorImpl<Intrinsic *> &Defs);
589 
590 public:
591   /// Called by Intrinsic - this attempts to get an intrinsic that takes
592   /// the given types as arguments.
593   Intrinsic &getIntrinsic(StringRef Name, ArrayRef<Type> Types,
594                           std::optional<std::string> MangledName);
595 
596   /// Called by Intrinsic - returns a globally-unique number.
getUniqueNumber()597   unsigned getUniqueNumber() { return UniqueNumber++; }
598 
NeonEmitter(const RecordKeeper & R)599   NeonEmitter(const RecordKeeper &R) : Records(R), UniqueNumber(0) {
600     const Record *SI = R.getClass("SInst");
601     const Record *II = R.getClass("IInst");
602     const Record *WI = R.getClass("WInst");
603     const Record *VI = R.getClass("VInst");
604     const Record *SOpI = R.getClass("SOpInst");
605     const Record *IOpI = R.getClass("IOpInst");
606     const Record *WOpI = R.getClass("WOpInst");
607     const Record *LOpI = R.getClass("LOpInst");
608     const Record *NoTestOpI = R.getClass("NoTestOpInst");
609 
610     ClassMap[SI] = ClassS;
611     ClassMap[II] = ClassI;
612     ClassMap[WI] = ClassW;
613     ClassMap[VI] = ClassV;
614     ClassMap[SOpI] = ClassS;
615     ClassMap[IOpI] = ClassI;
616     ClassMap[WOpI] = ClassW;
617     ClassMap[LOpI] = ClassL;
618     ClassMap[NoTestOpI] = ClassNoTest;
619   }
620 
621   // Emit arm_neon.h.inc
622   void run(raw_ostream &o);
623 
624   // Emit arm_fp16.h.inc
625   void runFP16(raw_ostream &o);
626 
627   // Emit arm_bf16.h.inc
628   void runBF16(raw_ostream &o);
629 
630   void runVectorTypes(raw_ostream &o);
631 
632   // Emit all the __builtin prototypes used in arm_neon.h, arm_fp16.h and
633   // arm_bf16.h
634   void runHeader(raw_ostream &o);
635 };
636 
637 } // end anonymous namespace
638 
639 //===----------------------------------------------------------------------===//
640 // Type implementation
641 //===----------------------------------------------------------------------===//
642 
str() const643 std::string Type::str() const {
644   if (isVoid())
645     return "void";
646   if (isFPM())
647     return "fpm_t";
648 
649   std::string S;
650 
651   if (isInteger() && !isSigned())
652     S += "u";
653 
654   if (isPoly())
655     S += "poly";
656   else if (isFloating())
657     S += "float";
658   else if (isBFloat16())
659     S += "bfloat";
660   else if (isMFloat8())
661     S += "mfloat";
662   else
663     S += "int";
664 
665   S += utostr(ElementBitwidth);
666   if (isVector())
667     S += "x" + utostr(getNumElements());
668   if (NumVectors > 1)
669     S += "x" + utostr(NumVectors);
670   S += "_t";
671 
672   if (Constant)
673     S += " const";
674   if (Pointer)
675     S += " *";
676 
677   return S;
678 }
679 
builtin_str() const680 std::string Type::builtin_str() const {
681   std::string S;
682   if (isVoid())
683     return "v";
684 
685   if (isPointer()) {
686     // All pointers are void pointers.
687     S = "v";
688     if (isConstPointer())
689       S += "C";
690     S += "*";
691     return S;
692   } else if (isInteger())
693     switch (ElementBitwidth) {
694     case 8: S += "c"; break;
695     case 16: S += "s"; break;
696     case 32: S += "i"; break;
697     case 64: S += "Wi"; break;
698     case 128: S += "LLLi"; break;
699     default: llvm_unreachable("Unhandled case!");
700     }
701   else if (isBFloat16()) {
702     assert(ElementBitwidth == 16 && "BFloat16 can only be 16 bits");
703     S += "y";
704   } else if (isMFloat8()) {
705     assert(ElementBitwidth == 8 && "MFloat8 can only be 8 bits");
706     S += "m";
707   } else if (isFPM()) {
708     S += "UWi";
709   } else
710     switch (ElementBitwidth) {
711     case 16: S += "h"; break;
712     case 32: S += "f"; break;
713     case 64: S += "d"; break;
714     default: llvm_unreachable("Unhandled case!");
715     }
716 
717   // FIXME: NECESSARY???????????????????????????????????????????????????????????????????????
718   if (isChar() && !isPointer() && isSigned())
719     // Make chars explicitly signed.
720     S = "S" + S;
721   else if (isInteger() && !isSigned())
722     S = "U" + S;
723 
724   // Constant indices are "int", but have the "constant expression" modifier.
725   if (isImmediate()) {
726     assert(isInteger() && isSigned());
727     S = "I" + S;
728   }
729 
730   if (isScalar())
731     return S;
732 
733   std::string Ret;
734   for (unsigned I = 0; I < NumVectors; ++I)
735     Ret += "V" + utostr(getNumElements()) + S;
736 
737   return Ret;
738 }
739 
getNeonEnum() const740 unsigned Type::getNeonEnum() const {
741   unsigned Addend;
742   switch (ElementBitwidth) {
743   case 8: Addend = 0; break;
744   case 16: Addend = 1; break;
745   case 32: Addend = 2; break;
746   case 64: Addend = 3; break;
747   case 128: Addend = 4; break;
748   default: llvm_unreachable("Unhandled element bitwidth!");
749   }
750 
751   unsigned Base = (unsigned)NeonTypeFlags::Int8 + Addend;
752   if (isPoly()) {
753     // Adjustment needed because Poly32 doesn't exist.
754     if (Addend >= 2)
755       --Addend;
756     Base = (unsigned)NeonTypeFlags::Poly8 + Addend;
757   }
758   if (isFloating()) {
759     assert(Addend != 0 && "Float8 doesn't exist!");
760     Base = (unsigned)NeonTypeFlags::Float16 + (Addend - 1);
761   }
762 
763   if (isBFloat16()) {
764     assert(Addend == 1 && "BFloat16 is only 16 bit");
765     Base = (unsigned)NeonTypeFlags::BFloat16;
766   }
767 
768   if (isMFloat8()) {
769     Base = (unsigned)NeonTypeFlags::MFloat8;
770   }
771 
772   if (Bitwidth == 128)
773     Base |= (unsigned)NeonTypeFlags::QuadFlag;
774   if (isInteger() && !isSigned())
775     Base |= (unsigned)NeonTypeFlags::UnsignedFlag;
776 
777   return Base;
778 }
779 
fromTypedefName(StringRef Name)780 Type Type::fromTypedefName(StringRef Name) {
781   Type T;
782   T.Kind = SInt;
783 
784   if (Name.consume_front("u"))
785     T.Kind = UInt;
786 
787   if (Name.consume_front("float")) {
788     T.Kind = Float;
789   } else if (Name.consume_front("poly")) {
790     T.Kind = Poly;
791   } else if (Name.consume_front("bfloat")) {
792     T.Kind = BFloat16;
793   } else if (Name.consume_front("mfloat")) {
794     T.Kind = MFloat8;
795   } else {
796     assert(Name.starts_with("int"));
797     Name = Name.drop_front(3);
798   }
799 
800   unsigned I = 0;
801   for (I = 0; I < Name.size(); ++I) {
802     if (!isdigit(Name[I]))
803       break;
804   }
805   Name.substr(0, I).getAsInteger(10, T.ElementBitwidth);
806   Name = Name.drop_front(I);
807 
808   T.Bitwidth = T.ElementBitwidth;
809   T.NumVectors = 1;
810 
811   if (Name.consume_front("x")) {
812     unsigned I = 0;
813     for (I = 0; I < Name.size(); ++I) {
814       if (!isdigit(Name[I]))
815         break;
816     }
817     unsigned NumLanes;
818     Name.substr(0, I).getAsInteger(10, NumLanes);
819     Name = Name.drop_front(I);
820     T.Bitwidth = T.ElementBitwidth * NumLanes;
821   } else {
822     // Was scalar.
823     T.NumVectors = 0;
824   }
825   if (Name.consume_front("x")) {
826     unsigned I = 0;
827     for (I = 0; I < Name.size(); ++I) {
828       if (!isdigit(Name[I]))
829         break;
830     }
831     Name.substr(0, I).getAsInteger(10, T.NumVectors);
832     Name = Name.drop_front(I);
833   }
834 
835   assert(Name.starts_with("_t") && "Malformed typedef!");
836   return T;
837 }
838 
applyTypespec(bool & Quad)839 void Type::applyTypespec(bool &Quad) {
840   std::string S = TS;
841   ScalarForMangling = false;
842   Kind = SInt;
843   ElementBitwidth = ~0U;
844   NumVectors = 1;
845 
846   for (char I : S) {
847     switch (I) {
848     case 'S':
849       ScalarForMangling = true;
850       break;
851     case 'H':
852       NoManglingQ = true;
853       Quad = true;
854       break;
855     case 'Q':
856       Quad = true;
857       break;
858     case 'P':
859       Kind = Poly;
860       break;
861     case 'U':
862       Kind = UInt;
863       break;
864     case 'c':
865       ElementBitwidth = 8;
866       break;
867     case 'h':
868       Kind = Float;
869       [[fallthrough]];
870     case 's':
871       ElementBitwidth = 16;
872       break;
873     case 'f':
874       Kind = Float;
875       [[fallthrough]];
876     case 'i':
877       ElementBitwidth = 32;
878       break;
879     case 'd':
880       Kind = Float;
881       [[fallthrough]];
882     case 'l':
883       ElementBitwidth = 64;
884       break;
885     case 'k':
886       ElementBitwidth = 128;
887       // Poly doesn't have a 128x1 type.
888       if (isPoly())
889         NumVectors = 0;
890       break;
891     case 'b':
892       Kind = BFloat16;
893       ElementBitwidth = 16;
894       break;
895     case 'm':
896       Kind = MFloat8;
897       ElementBitwidth = 8;
898       break;
899     default:
900       llvm_unreachable("Unhandled type code!");
901     }
902   }
903   assert(ElementBitwidth != ~0U && "Bad element bitwidth!");
904 
905   Bitwidth = Quad ? 128 : 64;
906 }
907 
applyModifiers(StringRef Mods)908 void Type::applyModifiers(StringRef Mods) {
909   bool AppliedQuad = false;
910   applyTypespec(AppliedQuad);
911 
912   for (char Mod : Mods) {
913     switch (Mod) {
914     case '.':
915       break;
916     case 'v':
917       Kind = Void;
918       break;
919     case 'S':
920       Kind = SInt;
921       break;
922     case 'U':
923       Kind = UInt;
924       break;
925     case 'B':
926       Kind = BFloat16;
927       ElementBitwidth = 16;
928       break;
929     case 'F':
930       Kind = Float;
931       break;
932     case 'P':
933       Kind = Poly;
934       break;
935     case 'V':
936       Kind = FPM;
937       Bitwidth = ElementBitwidth = 64;
938       NumVectors = 0;
939       Immediate = Constant = Pointer = false;
940       ScalarForMangling = NoManglingQ = true;
941       break;
942     case '>':
943       assert(ElementBitwidth < 128);
944       ElementBitwidth *= 2;
945       break;
946     case '<':
947       assert(ElementBitwidth > 8);
948       ElementBitwidth /= 2;
949       break;
950     case '1':
951       NumVectors = 0;
952       break;
953     case '2':
954       NumVectors = 2;
955       break;
956     case '3':
957       NumVectors = 3;
958       break;
959     case '4':
960       NumVectors = 4;
961       break;
962     case '*':
963       Pointer = true;
964       break;
965     case 'c':
966       Constant = true;
967       break;
968     case 'Q':
969       Bitwidth = 128;
970       break;
971     case 'q':
972       Bitwidth = 64;
973       break;
974     case 'I':
975       Kind = SInt;
976       ElementBitwidth = Bitwidth = 32;
977       NumVectors = 0;
978       Immediate = true;
979       break;
980     case 'p':
981       if (isPoly())
982         Kind = UInt;
983       break;
984     case '!':
985       // Key type, handled elsewhere.
986       break;
987     default:
988       llvm_unreachable("Unhandled character!");
989     }
990   }
991 }
992 
993 //===----------------------------------------------------------------------===//
994 // Intrinsic implementation
995 //===----------------------------------------------------------------------===//
996 
getNextModifiers(StringRef Proto,unsigned & Pos) const997 StringRef Intrinsic::getNextModifiers(StringRef Proto, unsigned &Pos) const {
998   if (Proto.size() == Pos)
999     return StringRef();
1000   else if (Proto[Pos] != '(')
1001     return Proto.substr(Pos++, 1);
1002 
1003   size_t Start = Pos + 1;
1004   size_t End = Proto.find(')', Start);
1005   assert_with_loc(End != StringRef::npos, "unmatched modifier group paren");
1006   Pos = End + 1;
1007   return Proto.slice(Start, End);
1008 }
1009 
getInstTypeCode(Type T,ClassKind CK) const1010 std::string Intrinsic::getInstTypeCode(Type T, ClassKind CK) const {
1011   char typeCode = '\0';
1012   bool printNumber = true;
1013 
1014   if (CK == ClassB && TargetGuard == "neon")
1015     return "";
1016 
1017   if (this->CK == ClassV)
1018     return "";
1019 
1020   if (T.isBFloat16())
1021     return "bf16";
1022 
1023   if (T.isMFloat8())
1024     return "mf8";
1025 
1026   if (T.isPoly())
1027     typeCode = 'p';
1028   else if (T.isInteger())
1029     typeCode = T.isSigned() ? 's' : 'u';
1030   else
1031     typeCode = 'f';
1032 
1033   if (CK == ClassI) {
1034     switch (typeCode) {
1035     default:
1036       break;
1037     case 's':
1038     case 'u':
1039     case 'p':
1040       typeCode = 'i';
1041       break;
1042     }
1043   }
1044   if (CK == ClassB && TargetGuard == "neon") {
1045     typeCode = '\0';
1046   }
1047 
1048   std::string S;
1049   if (typeCode != '\0')
1050     S.push_back(typeCode);
1051   if (printNumber)
1052     S += utostr(T.getElementSizeInBits());
1053 
1054   return S;
1055 }
1056 
getBuiltinTypeStr()1057 std::string Intrinsic::getBuiltinTypeStr() {
1058   ClassKind LocalCK = getClassKind(true);
1059   std::string S;
1060 
1061   Type RetT = getReturnType();
1062   if ((LocalCK == ClassI || LocalCK == ClassW) && RetT.isScalar() &&
1063       !RetT.isFloating() && !RetT.isBFloat16() && !RetT.isMFloat8())
1064     RetT.makeInteger(RetT.getElementSizeInBits(), false);
1065 
1066   // Since the return value must be one type, return a vector type of the
1067   // appropriate width which we will bitcast.  An exception is made for
1068   // returning structs of 2, 3, or 4 vectors which are returned in a sret-like
1069   // fashion, storing them to a pointer arg.
1070   if (RetT.getNumVectors() > 1) {
1071     S += "vv*"; // void result with void* first argument
1072   } else {
1073     if (RetT.isPoly())
1074       RetT.makeInteger(RetT.getElementSizeInBits(), false);
1075     if (!RetT.isScalar() && RetT.isInteger() && !RetT.isSigned())
1076       RetT.makeSigned();
1077 
1078     if (LocalCK == ClassB && RetT.isValue() && !RetT.isScalar())
1079       // Cast to vector of 8-bit elements.
1080       RetT.makeInteger(8, true);
1081 
1082     S += RetT.builtin_str();
1083   }
1084 
1085   for (unsigned I = 0; I < getNumParams(); ++I) {
1086     Type T = getParamType(I);
1087     if (T.isPoly())
1088       T.makeInteger(T.getElementSizeInBits(), false);
1089 
1090     if (LocalCK == ClassB && !T.isScalar())
1091       T.makeInteger(8, true);
1092     // Halves always get converted to 8-bit elements.
1093     if (T.isHalf() && T.isVector() && !T.isScalarForMangling())
1094       T.makeInteger(8, true);
1095 
1096     if (LocalCK == ClassI && T.isInteger())
1097       T.makeSigned();
1098 
1099     if (isArgImmediate(I))
1100       T.makeImmediate(32);
1101 
1102     S += T.builtin_str();
1103   }
1104 
1105   // Extra constant integer to hold type class enum for this function, e.g. s8
1106   if (LocalCK == ClassB)
1107     S += "i";
1108 
1109   return S;
1110 }
1111 
getMangledName(bool ForceClassS) const1112 std::string Intrinsic::getMangledName(bool ForceClassS) const {
1113   // Check if the prototype has a scalar operand with the type of the vector
1114   // elements.  If not, bitcasting the args will take care of arg checking.
1115   // The actual signedness etc. will be taken care of with special enums.
1116   ClassKind LocalCK = CK;
1117   if (!protoHasScalar())
1118     LocalCK = ClassB;
1119 
1120   return mangleName(Name, ForceClassS ? ClassS : LocalCK);
1121 }
1122 
mangleName(std::string Name,ClassKind LocalCK) const1123 std::string Intrinsic::mangleName(std::string Name, ClassKind LocalCK) const {
1124   std::string typeCode = getInstTypeCode(BaseType, LocalCK);
1125   std::string S = Name;
1126 
1127   if (Name == "vcvt_f16_f32" || Name == "vcvt_f32_f16" ||
1128       Name == "vcvt_f32_f64" || Name == "vcvt_f64_f32" ||
1129       Name == "vcvt_f32_bf16")
1130     return Name;
1131 
1132   if (!typeCode.empty()) {
1133     // If the name ends with _xN (N = 2,3,4), insert the typeCode before _xN.
1134     if (Name.size() >= 3 && isdigit(Name.back()) &&
1135         Name[Name.length() - 2] == 'x' && Name[Name.length() - 3] == '_')
1136       S.insert(S.length() - 3, "_" + typeCode);
1137     else
1138       S += "_" + typeCode;
1139   }
1140 
1141   if (BaseType != InBaseType) {
1142     // A reinterpret - out the input base type at the end.
1143     S += "_" + getInstTypeCode(InBaseType, LocalCK);
1144   }
1145 
1146   if (LocalCK == ClassB && TargetGuard == "neon")
1147     S += "_v";
1148 
1149   // Insert a 'q' before the first '_' character so that it ends up before
1150   // _lane or _n on vector-scalar operations.
1151   if (BaseType.getSizeInBits() == 128 && !BaseType.noManglingQ()) {
1152     size_t Pos = S.find('_');
1153     S.insert(Pos, "q");
1154   }
1155 
1156   char Suffix = '\0';
1157   if (BaseType.isScalarForMangling()) {
1158     switch (BaseType.getElementSizeInBits()) {
1159     case 8: Suffix = 'b'; break;
1160     case 16: Suffix = 'h'; break;
1161     case 32: Suffix = 's'; break;
1162     case 64: Suffix = 'd'; break;
1163     default: llvm_unreachable("Bad suffix!");
1164     }
1165   }
1166   if (Suffix != '\0') {
1167     size_t Pos = S.find('_');
1168     S.insert(Pos, &Suffix, 1);
1169   }
1170 
1171   return S;
1172 }
1173 
replaceParamsIn(std::string S)1174 std::string Intrinsic::replaceParamsIn(std::string S) {
1175   while (S.find('$') != std::string::npos) {
1176     size_t Pos = S.find('$');
1177     size_t End = Pos + 1;
1178     while (isalpha(S[End]))
1179       ++End;
1180 
1181     std::string VarName = S.substr(Pos + 1, End - Pos - 1);
1182     assert_with_loc(Variables.find(VarName) != Variables.end(),
1183                     "Variable not defined!");
1184     S.replace(Pos, End - Pos, Variables.find(VarName)->second.getName());
1185   }
1186 
1187   return S;
1188 }
1189 
initVariables()1190 void Intrinsic::initVariables() {
1191   Variables.clear();
1192 
1193   // Modify the TypeSpec per-argument to get a concrete Type, and create
1194   // known variables for each.
1195   for (unsigned I = 1; I < Types.size(); ++I) {
1196     char NameC = '0' + (I - 1);
1197     std::string Name = "p";
1198     Name.push_back(NameC);
1199 
1200     Variables[Name] = Variable(Types[I], Name + VariablePostfix);
1201   }
1202   RetVar = Variable(Types[0], "ret" + VariablePostfix);
1203 }
1204 
emitPrototype(StringRef NamePrefix)1205 void Intrinsic::emitPrototype(StringRef NamePrefix) {
1206   if (UseMacro) {
1207     OS << "#define ";
1208   } else {
1209     OS << "__ai ";
1210     if (TargetGuard != "")
1211       OS << "__attribute__((target(\"" << TargetGuard << "\"))) ";
1212     OS << Types[0].str() << " ";
1213   }
1214 
1215   OS << NamePrefix.str() << mangleName(Name, ClassS) << "(";
1216 
1217   for (unsigned I = 0; I < getNumParams(); ++I) {
1218     if (I != 0)
1219       OS << ", ";
1220 
1221     char NameC = '0' + I;
1222     std::string Name = "p";
1223     Name.push_back(NameC);
1224     assert(Variables.find(Name) != Variables.end());
1225     Variable &V = Variables[Name];
1226 
1227     if (!UseMacro)
1228       OS << V.getType().str() << " ";
1229     OS << V.getName();
1230   }
1231 
1232   OS << ")";
1233 }
1234 
emitOpeningBrace()1235 void Intrinsic::emitOpeningBrace() {
1236   if (UseMacro)
1237     OS << " __extension__ ({";
1238   else
1239     OS << " {";
1240   emitNewLine();
1241 }
1242 
emitClosingBrace()1243 void Intrinsic::emitClosingBrace() {
1244   if (UseMacro)
1245     OS << "})";
1246   else
1247     OS << "}";
1248 }
1249 
emitNewLine()1250 void Intrinsic::emitNewLine() {
1251   if (UseMacro)
1252     OS << " \\\n";
1253   else
1254     OS << "\n";
1255 }
1256 
emitReverseVariable(Variable & Dest,Variable & Src)1257 void Intrinsic::emitReverseVariable(Variable &Dest, Variable &Src) {
1258   if (Dest.getType().getNumVectors() > 1) {
1259     emitNewLine();
1260 
1261     for (unsigned K = 0; K < Dest.getType().getNumVectors(); ++K) {
1262       OS << "  " << Dest.getName() << ".val[" << K << "] = "
1263          << "__builtin_shufflevector(" << Src.getName() << ".val[" << K << "], "
1264          << Src.getName() << ".val[" << K << "], __lane_reverse_"
1265          << Dest.getType().getSizeInBits() << "_"
1266          << Dest.getType().getElementSizeInBits() << ");";
1267       emitNewLine();
1268     }
1269   } else {
1270     OS << "  " << Dest.getName() << " = __builtin_shufflevector("
1271        << Src.getName() << ", " << Src.getName() << ", __lane_reverse_"
1272        << Dest.getType().getSizeInBits() << "_"
1273        << Dest.getType().getElementSizeInBits() << ");";
1274     emitNewLine();
1275   }
1276 }
1277 
emitArgumentReversal()1278 void Intrinsic::emitArgumentReversal() {
1279   if (isBigEndianSafe())
1280     return;
1281 
1282   // Reverse all vector arguments.
1283   for (unsigned I = 0; I < getNumParams(); ++I) {
1284     std::string Name = "p" + utostr(I);
1285     std::string NewName = "rev" + utostr(I);
1286 
1287     Variable &V = Variables[Name];
1288     Variable NewV(V.getType(), NewName + VariablePostfix);
1289 
1290     if (!NewV.getType().isVector() || NewV.getType().getNumElements() == 1)
1291       continue;
1292 
1293     OS << "  " << NewV.getType().str() << " " << NewV.getName() << ";";
1294     emitReverseVariable(NewV, V);
1295     V = NewV;
1296   }
1297 }
1298 
emitReturnVarDecl()1299 void Intrinsic::emitReturnVarDecl() {
1300   assert(RetVar.getType() == Types[0]);
1301   // Create a return variable, if we're not void.
1302   if (!RetVar.getType().isVoid()) {
1303     OS << "  " << RetVar.getType().str() << " " << RetVar.getName() << ";";
1304     emitNewLine();
1305   }
1306 }
1307 
emitReturnReversal()1308 void Intrinsic::emitReturnReversal() {
1309   if (isBigEndianSafe())
1310     return;
1311   if (!getReturnType().isVector() || getReturnType().isVoid() ||
1312       getReturnType().getNumElements() == 1)
1313     return;
1314   emitReverseVariable(RetVar, RetVar);
1315 }
1316 
emitShadowedArgs()1317 void Intrinsic::emitShadowedArgs() {
1318   // Macro arguments are not type-checked like inline function arguments,
1319   // so assign them to local temporaries to get the right type checking.
1320   if (!UseMacro)
1321     return;
1322 
1323   for (unsigned I = 0; I < getNumParams(); ++I) {
1324     // Do not create a temporary for an immediate argument.
1325     // That would defeat the whole point of using a macro!
1326     if (getParamType(I).isImmediate())
1327       continue;
1328     // Do not create a temporary for pointer arguments. The input
1329     // pointer may have an alignment hint.
1330     if (getParamType(I).isPointer())
1331       continue;
1332 
1333     std::string Name = "p" + utostr(I);
1334 
1335     assert(Variables.find(Name) != Variables.end());
1336     Variable &V = Variables[Name];
1337 
1338     std::string NewName = "s" + utostr(I);
1339     Variable V2(V.getType(), NewName + VariablePostfix);
1340 
1341     OS << "  " << V2.getType().str() << " " << V2.getName() << " = "
1342        << V.getName() << ";";
1343     emitNewLine();
1344 
1345     V = V2;
1346   }
1347 }
1348 
protoHasScalar() const1349 bool Intrinsic::protoHasScalar() const {
1350   return any_of(Types,
1351                 [](const Type &T) { return T.isScalar() && !T.isImmediate(); });
1352 }
1353 
emitBodyAsBuiltinCall()1354 void Intrinsic::emitBodyAsBuiltinCall() {
1355   std::string S;
1356 
1357   // If this builtin returns a struct 2, 3, or 4 vectors, pass it as an implicit
1358   // sret-like argument.
1359   bool SRet = getReturnType().getNumVectors() >= 2;
1360 
1361   StringRef N = Name;
1362   ClassKind LocalCK = CK;
1363   if (!protoHasScalar())
1364     LocalCK = ClassB;
1365 
1366   if (!getReturnType().isVoid() && !SRet)
1367     S += "__builtin_bit_cast(" + RetVar.getType().str() + ", ";
1368 
1369   S += "__builtin_neon_" + mangleName(std::string(N), LocalCK) + "(";
1370 
1371   if (SRet)
1372     S += "&" + RetVar.getName() + ", ";
1373 
1374   for (unsigned I = 0; I < getNumParams(); ++I) {
1375     Variable &V = Variables["p" + utostr(I)];
1376     Type T = V.getType();
1377 
1378     // Handle multiple-vector values specially, emitting each subvector as an
1379     // argument to the builtin.
1380     if (T.getNumVectors() > 1) {
1381       // Check if an explicit cast is needed.
1382       std::string Cast;
1383       if (LocalCK == ClassB) {
1384         Type T2 = T;
1385         T2.makeOneVector();
1386         T2.makeInteger(8, /*Sign=*/true);
1387         Cast = "__builtin_bit_cast(" + T2.str() + ", ";
1388       }
1389 
1390       for (unsigned J = 0; J < T.getNumVectors(); ++J)
1391         S += Cast + V.getName() + ".val[" + utostr(J) + "]" +
1392              (Cast.empty() ? ", " : "), ");
1393       continue;
1394     }
1395 
1396     std::string Arg = V.getName();
1397     Type CastToType = T;
1398 
1399     // Check if an explicit cast is needed.
1400     if (CastToType.isVector()) {
1401       if (LocalCK == ClassB || (T.isHalf() && !T.isScalarForMangling())) {
1402         CastToType.makeInteger(8, true);
1403         Arg = "__builtin_bit_cast(" + CastToType.str() + ", " + Arg + ")";
1404       } else if (LocalCK == ClassI) {
1405         if (CastToType.isInteger()) {
1406           CastToType.makeSigned();
1407           Arg = "__builtin_bit_cast(" + CastToType.str() + ", " + Arg + ")";
1408         }
1409       }
1410     }
1411 
1412     S += Arg + ", ";
1413   }
1414 
1415   // Extra constant integer to hold type class enum for this function, e.g. s8
1416   if (getClassKind(true) == ClassB) {
1417     S += utostr(getPolymorphicKeyType().getNeonEnum());
1418   } else {
1419     // Remove extraneous ", ".
1420     S.pop_back();
1421     S.pop_back();
1422   }
1423 
1424   if (!getReturnType().isVoid() && !SRet)
1425     S += ")";
1426   S += ");";
1427 
1428   std::string RetExpr;
1429   if (!SRet && !RetVar.getType().isVoid())
1430     RetExpr = RetVar.getName() + " = ";
1431 
1432   OS << "  " << RetExpr << S;
1433   emitNewLine();
1434 }
1435 
emitBody(StringRef CallPrefix)1436 void Intrinsic::emitBody(StringRef CallPrefix) {
1437   std::vector<std::string> Lines;
1438 
1439   if (!Body || Body->empty()) {
1440     // Nothing specific to output - must output a builtin.
1441     emitBodyAsBuiltinCall();
1442     return;
1443   }
1444 
1445   // We have a list of "things to output". The last should be returned.
1446   for (auto *I : Body->getElements()) {
1447     if (const auto *SI = dyn_cast<StringInit>(I)) {
1448       Lines.push_back(replaceParamsIn(SI->getAsString()));
1449     } else if (const auto *DI = dyn_cast<DagInit>(I)) {
1450       DagEmitter DE(*this, CallPrefix);
1451       Lines.push_back(DE.emitDag(DI).second + ";");
1452     }
1453   }
1454 
1455   assert(!Lines.empty() && "Empty def?");
1456   if (!RetVar.getType().isVoid())
1457     Lines.back().insert(0, RetVar.getName() + " = ");
1458 
1459   for (auto &L : Lines) {
1460     OS << "  " << L;
1461     emitNewLine();
1462   }
1463 }
1464 
emitReturn()1465 void Intrinsic::emitReturn() {
1466   if (RetVar.getType().isVoid())
1467     return;
1468   if (UseMacro)
1469     OS << "  " << RetVar.getName() << ";";
1470   else
1471     OS << "  return " << RetVar.getName() << ";";
1472   emitNewLine();
1473 }
1474 
emitDag(const DagInit * DI)1475 std::pair<Type, std::string> Intrinsic::DagEmitter::emitDag(const DagInit *DI) {
1476   // At this point we should only be seeing a def.
1477   const DefInit *DefI = cast<DefInit>(DI->getOperator());
1478   std::string Op = DefI->getAsString();
1479 
1480   if (Op == "cast" || Op == "bitcast")
1481     return emitDagCast(DI, Op == "bitcast");
1482   if (Op == "shuffle")
1483     return emitDagShuffle(DI);
1484   if (Op == "dup")
1485     return emitDagDup(DI);
1486   if (Op == "dup_typed")
1487     return emitDagDupTyped(DI);
1488   if (Op == "splat")
1489     return emitDagSplat(DI);
1490   if (Op == "save_temp")
1491     return emitDagSaveTemp(DI);
1492   if (Op == "op")
1493     return emitDagOp(DI);
1494   if (Op == "call" || Op == "call_mangled")
1495     return emitDagCall(DI, Op == "call_mangled");
1496   if (Op == "name_replace")
1497     return emitDagNameReplace(DI);
1498   if (Op == "literal")
1499     return emitDagLiteral(DI);
1500   assert_with_loc(false, "Unknown operation!");
1501   return std::make_pair(Type::getVoid(), "");
1502 }
1503 
1504 std::pair<Type, std::string>
emitDagOp(const DagInit * DI)1505 Intrinsic::DagEmitter::emitDagOp(const DagInit *DI) {
1506   std::string Op = cast<StringInit>(DI->getArg(0))->getAsUnquotedString();
1507   if (DI->getNumArgs() == 2) {
1508     // Unary op.
1509     std::pair<Type, std::string> R =
1510         emitDagArg(DI->getArg(1), std::string(DI->getArgNameStr(1)));
1511     return std::make_pair(R.first, Op + R.second);
1512   } else {
1513     assert(DI->getNumArgs() == 3 && "Can only handle unary and binary ops!");
1514     std::pair<Type, std::string> R1 =
1515         emitDagArg(DI->getArg(1), std::string(DI->getArgNameStr(1)));
1516     std::pair<Type, std::string> R2 =
1517         emitDagArg(DI->getArg(2), std::string(DI->getArgNameStr(2)));
1518     assert_with_loc(R1.first == R2.first, "Argument type mismatch!");
1519     return std::make_pair(R1.first, R1.second + " " + Op + " " + R2.second);
1520   }
1521 }
1522 
1523 std::pair<Type, std::string>
emitDagCall(const DagInit * DI,bool MatchMangledName)1524 Intrinsic::DagEmitter::emitDagCall(const DagInit *DI, bool MatchMangledName) {
1525   std::vector<Type> Types;
1526   std::vector<std::string> Values;
1527   for (unsigned I = 0; I < DI->getNumArgs() - 1; ++I) {
1528     std::pair<Type, std::string> R =
1529         emitDagArg(DI->getArg(I + 1), std::string(DI->getArgNameStr(I + 1)));
1530     Types.push_back(R.first);
1531     Values.push_back(R.second);
1532   }
1533 
1534   // Look up the called intrinsic.
1535   std::string N;
1536   if (const auto *SI = dyn_cast<StringInit>(DI->getArg(0)))
1537     N = SI->getAsUnquotedString();
1538   else
1539     N = emitDagArg(DI->getArg(0), "").second;
1540   std::optional<std::string> MangledName;
1541   if (MatchMangledName) {
1542     if (Intr.getRecord()->getValueAsString("Name").contains("laneq"))
1543       N += "q";
1544     MangledName = Intr.mangleName(N, ClassS);
1545   }
1546   Intrinsic &Callee = Intr.Emitter.getIntrinsic(N, Types, MangledName);
1547 
1548   // Make sure the callee is known as an early def.
1549   Callee.setNeededEarly();
1550   Intr.Dependencies.insert(&Callee);
1551 
1552   // Now create the call itself.
1553   std::string S;
1554   if (!Callee.isBigEndianSafe())
1555     S += CallPrefix.str();
1556   S += Callee.getMangledName(true) + "(";
1557   for (unsigned I = 0; I < DI->getNumArgs() - 1; ++I) {
1558     if (I != 0)
1559       S += ", ";
1560     S += Values[I];
1561   }
1562   S += ")";
1563 
1564   return std::make_pair(Callee.getReturnType(), S);
1565 }
1566 
1567 std::pair<Type, std::string>
emitDagCast(const DagInit * DI,bool IsBitCast)1568 Intrinsic::DagEmitter::emitDagCast(const DagInit *DI, bool IsBitCast) {
1569   // (cast MOD* VAL) -> cast VAL to type given by MOD.
1570   std::pair<Type, std::string> R =
1571       emitDagArg(DI->getArg(DI->getNumArgs() - 1),
1572                  std::string(DI->getArgNameStr(DI->getNumArgs() - 1)));
1573   Type castToType = R.first;
1574   for (unsigned ArgIdx = 0; ArgIdx < DI->getNumArgs() - 1; ++ArgIdx) {
1575 
1576     // MOD can take several forms:
1577     //   1. $X - take the type of parameter / variable X.
1578     //   2. The value "R" - take the type of the return type.
1579     //   3. a type string
1580     //   4. The value "U" or "S" to switch the signedness.
1581     //   5. The value "H" or "D" to half or double the bitwidth.
1582     //   6. The value "8" to convert to 8-bit (signed) integer lanes.
1583     if (!DI->getArgNameStr(ArgIdx).empty()) {
1584       assert_with_loc(Intr.Variables.find(DI->getArgNameStr(ArgIdx)) !=
1585                           Intr.Variables.end(),
1586                       "Variable not found");
1587       castToType =
1588           Intr.Variables[std::string(DI->getArgNameStr(ArgIdx))].getType();
1589     } else {
1590       const auto *SI = dyn_cast<StringInit>(DI->getArg(ArgIdx));
1591       assert_with_loc(SI, "Expected string type or $Name for cast type");
1592 
1593       if (SI->getAsUnquotedString() == "R") {
1594         castToType = Intr.getReturnType();
1595       } else if (SI->getAsUnquotedString() == "U") {
1596         castToType.makeUnsigned();
1597       } else if (SI->getAsUnquotedString() == "S") {
1598         castToType.makeSigned();
1599       } else if (SI->getAsUnquotedString() == "H") {
1600         castToType.halveLanes();
1601       } else if (SI->getAsUnquotedString() == "D") {
1602         castToType.doubleLanes();
1603       } else if (SI->getAsUnquotedString() == "8") {
1604         castToType.makeInteger(8, true);
1605       } else if (SI->getAsUnquotedString() == "32") {
1606         castToType.make32BitElement();
1607       } else {
1608         castToType = Type::fromTypedefName(SI->getAsUnquotedString());
1609         assert_with_loc(!castToType.isVoid(), "Unknown typedef");
1610       }
1611     }
1612   }
1613 
1614   std::string S;
1615   if (IsBitCast)
1616     S = "__builtin_bit_cast(" + castToType.str() + ", " + R.second + ")";
1617   else
1618     S = "(" + castToType.str() + ")(" + R.second + ")";
1619 
1620   return std::make_pair(castToType, S);
1621 }
1622 
1623 std::pair<Type, std::string>
emitDagShuffle(const DagInit * DI)1624 Intrinsic::DagEmitter::emitDagShuffle(const DagInit *DI) {
1625   // See the documentation in arm_neon.td for a description of these operators.
1626   class LowHalf : public SetTheory::Operator {
1627   public:
1628     void apply(SetTheory &ST, const DagInit *Expr, SetTheory::RecSet &Elts,
1629                ArrayRef<SMLoc> Loc) override {
1630       SetTheory::RecSet Elts2;
1631       ST.evaluate(Expr->arg_begin(), Expr->arg_end(), Elts2, Loc);
1632       Elts.insert(Elts2.begin(), Elts2.begin() + (Elts2.size() / 2));
1633     }
1634   };
1635 
1636   class HighHalf : public SetTheory::Operator {
1637   public:
1638     void apply(SetTheory &ST, const DagInit *Expr, SetTheory::RecSet &Elts,
1639                ArrayRef<SMLoc> Loc) override {
1640       SetTheory::RecSet Elts2;
1641       ST.evaluate(Expr->arg_begin(), Expr->arg_end(), Elts2, Loc);
1642       Elts.insert(Elts2.begin() + (Elts2.size() / 2), Elts2.end());
1643     }
1644   };
1645 
1646   class Rev : public SetTheory::Operator {
1647     unsigned ElementSize;
1648 
1649   public:
1650     Rev(unsigned ElementSize) : ElementSize(ElementSize) {}
1651 
1652     void apply(SetTheory &ST, const DagInit *Expr, SetTheory::RecSet &Elts,
1653                ArrayRef<SMLoc> Loc) override {
1654       SetTheory::RecSet Elts2;
1655       ST.evaluate(Expr->arg_begin() + 1, Expr->arg_end(), Elts2, Loc);
1656 
1657       int64_t VectorSize = cast<IntInit>(Expr->getArg(0))->getValue();
1658       VectorSize /= ElementSize;
1659 
1660       std::vector<const Record *> Revved;
1661       for (unsigned VI = 0; VI < Elts2.size(); VI += VectorSize) {
1662         for (int LI = VectorSize - 1; LI >= 0; --LI) {
1663           Revved.push_back(Elts2[VI + LI]);
1664         }
1665       }
1666 
1667       Elts.insert_range(Revved);
1668     }
1669   };
1670 
1671   class MaskExpander : public SetTheory::Expander {
1672     unsigned N;
1673 
1674   public:
1675     MaskExpander(unsigned N) : N(N) {}
1676 
1677     void expand(SetTheory &ST, const Record *R,
1678                 SetTheory::RecSet &Elts) override {
1679       unsigned Addend = 0;
1680       if (R->getName() == "mask0")
1681         Addend = 0;
1682       else if (R->getName() == "mask1")
1683         Addend = N;
1684       else
1685         return;
1686       for (unsigned I = 0; I < N; ++I)
1687         Elts.insert(R->getRecords().getDef("sv" + utostr(I + Addend)));
1688     }
1689   };
1690 
1691   // (shuffle arg1, arg2, sequence)
1692   std::pair<Type, std::string> Arg1 =
1693       emitDagArg(DI->getArg(0), std::string(DI->getArgNameStr(0)));
1694   std::pair<Type, std::string> Arg2 =
1695       emitDagArg(DI->getArg(1), std::string(DI->getArgNameStr(1)));
1696   assert_with_loc(Arg1.first == Arg2.first,
1697                   "Different types in arguments to shuffle!");
1698 
1699   SetTheory ST;
1700   SetTheory::RecSet Elts;
1701   ST.addOperator("lowhalf", std::make_unique<LowHalf>());
1702   ST.addOperator("highhalf", std::make_unique<HighHalf>());
1703   ST.addOperator("rev",
1704                  std::make_unique<Rev>(Arg1.first.getElementSizeInBits()));
1705   ST.addExpander("MaskExpand",
1706                  std::make_unique<MaskExpander>(Arg1.first.getNumElements()));
1707   ST.evaluate(DI->getArg(2), Elts, {});
1708 
1709   std::string S = "__builtin_shufflevector(" + Arg1.second + ", " + Arg2.second;
1710   for (auto &E : Elts) {
1711     StringRef Name = E->getName();
1712     assert_with_loc(Name.starts_with("sv"),
1713                     "Incorrect element kind in shuffle mask!");
1714     S += ", " + Name.drop_front(2).str();
1715   }
1716   S += ")";
1717 
1718   // Recalculate the return type - the shuffle may have halved or doubled it.
1719   Type T(Arg1.first);
1720   if (Elts.size() > T.getNumElements()) {
1721     assert_with_loc(
1722         Elts.size() == T.getNumElements() * 2,
1723         "Can only double or half the number of elements in a shuffle!");
1724     T.doubleLanes();
1725   } else if (Elts.size() < T.getNumElements()) {
1726     assert_with_loc(
1727         Elts.size() == T.getNumElements() / 2,
1728         "Can only double or half the number of elements in a shuffle!");
1729     T.halveLanes();
1730   }
1731 
1732   return std::make_pair(T, S);
1733 }
1734 
1735 std::pair<Type, std::string>
emitDagDup(const DagInit * DI)1736 Intrinsic::DagEmitter::emitDagDup(const DagInit *DI) {
1737   assert_with_loc(DI->getNumArgs() == 1, "dup() expects one argument");
1738   std::pair<Type, std::string> A =
1739       emitDagArg(DI->getArg(0), std::string(DI->getArgNameStr(0)));
1740   assert_with_loc(A.first.isScalar(), "dup() expects a scalar argument");
1741 
1742   Type T = Intr.getBaseType();
1743   assert_with_loc(T.isVector(), "dup() used but default type is scalar!");
1744   std::string S = "(" + T.str() + ") {";
1745   for (unsigned I = 0; I < T.getNumElements(); ++I) {
1746     if (I != 0)
1747       S += ", ";
1748     S += A.second;
1749   }
1750   S += "}";
1751 
1752   return std::make_pair(T, S);
1753 }
1754 
1755 std::pair<Type, std::string>
emitDagDupTyped(const DagInit * DI)1756 Intrinsic::DagEmitter::emitDagDupTyped(const DagInit *DI) {
1757   assert_with_loc(DI->getNumArgs() == 2, "dup_typed() expects two arguments");
1758   std::pair<Type, std::string> B =
1759       emitDagArg(DI->getArg(1), std::string(DI->getArgNameStr(1)));
1760   assert_with_loc(B.first.isScalar(),
1761                   "dup_typed() requires a scalar as the second argument");
1762   Type T;
1763   // If the type argument is a constant string, construct the type directly.
1764   if (const auto *SI = dyn_cast<StringInit>(DI->getArg(0))) {
1765     T = Type::fromTypedefName(SI->getAsUnquotedString());
1766     assert_with_loc(!T.isVoid(), "Unknown typedef");
1767   } else
1768     T = emitDagArg(DI->getArg(0), std::string(DI->getArgNameStr(0))).first;
1769 
1770   assert_with_loc(T.isVector(), "dup_typed() used but target type is scalar!");
1771   std::string S = "(" + T.str() + ") {";
1772   for (unsigned I = 0; I < T.getNumElements(); ++I) {
1773     if (I != 0)
1774       S += ", ";
1775     S += B.second;
1776   }
1777   S += "}";
1778 
1779   return std::make_pair(T, S);
1780 }
1781 
1782 std::pair<Type, std::string>
emitDagSplat(const DagInit * DI)1783 Intrinsic::DagEmitter::emitDagSplat(const DagInit *DI) {
1784   assert_with_loc(DI->getNumArgs() == 2, "splat() expects two arguments");
1785   std::pair<Type, std::string> A =
1786       emitDagArg(DI->getArg(0), std::string(DI->getArgNameStr(0)));
1787   std::pair<Type, std::string> B =
1788       emitDagArg(DI->getArg(1), std::string(DI->getArgNameStr(1)));
1789 
1790   assert_with_loc(B.first.isScalar(),
1791                   "splat() requires a scalar int as the second argument");
1792 
1793   std::string S = "__builtin_shufflevector(" + A.second + ", " + A.second;
1794   for (unsigned I = 0; I < Intr.getBaseType().getNumElements(); ++I) {
1795     S += ", " + B.second;
1796   }
1797   S += ")";
1798 
1799   return std::make_pair(Intr.getBaseType(), S);
1800 }
1801 
1802 std::pair<Type, std::string>
emitDagSaveTemp(const DagInit * DI)1803 Intrinsic::DagEmitter::emitDagSaveTemp(const DagInit *DI) {
1804   assert_with_loc(DI->getNumArgs() == 2, "save_temp() expects two arguments");
1805   std::pair<Type, std::string> A =
1806       emitDagArg(DI->getArg(1), std::string(DI->getArgNameStr(1)));
1807 
1808   assert_with_loc(!A.first.isVoid(),
1809                   "Argument to save_temp() must have non-void type!");
1810 
1811   std::string N = std::string(DI->getArgNameStr(0));
1812   assert_with_loc(!N.empty(),
1813                   "save_temp() expects a name as the first argument");
1814 
1815   auto [It, Inserted] =
1816       Intr.Variables.try_emplace(N, A.first, N + Intr.VariablePostfix);
1817   assert_with_loc(Inserted, "Variable already defined!");
1818 
1819   std::string S = A.first.str() + " " + It->second.getName() + " = " + A.second;
1820 
1821   return std::make_pair(Type::getVoid(), S);
1822 }
1823 
1824 std::pair<Type, std::string>
emitDagNameReplace(const DagInit * DI)1825 Intrinsic::DagEmitter::emitDagNameReplace(const DagInit *DI) {
1826   std::string S = Intr.Name;
1827 
1828   assert_with_loc(DI->getNumArgs() == 2, "name_replace requires 2 arguments!");
1829   std::string ToReplace = cast<StringInit>(DI->getArg(0))->getAsUnquotedString();
1830   std::string ReplaceWith = cast<StringInit>(DI->getArg(1))->getAsUnquotedString();
1831 
1832   size_t Idx = S.find(ToReplace);
1833 
1834   assert_with_loc(Idx != std::string::npos, "name should contain '" + ToReplace + "'!");
1835   S.replace(Idx, ToReplace.size(), ReplaceWith);
1836 
1837   return std::make_pair(Type::getVoid(), S);
1838 }
1839 
1840 std::pair<Type, std::string>
emitDagLiteral(const DagInit * DI)1841 Intrinsic::DagEmitter::emitDagLiteral(const DagInit *DI) {
1842   std::string Ty = cast<StringInit>(DI->getArg(0))->getAsUnquotedString();
1843   std::string Value = cast<StringInit>(DI->getArg(1))->getAsUnquotedString();
1844   return std::make_pair(Type::fromTypedefName(Ty), Value);
1845 }
1846 
1847 std::pair<Type, std::string>
emitDagArg(const Init * Arg,std::string ArgName)1848 Intrinsic::DagEmitter::emitDagArg(const Init *Arg, std::string ArgName) {
1849   if (!ArgName.empty()) {
1850     assert_with_loc(!Arg->isComplete(),
1851                     "Arguments must either be DAGs or names, not both!");
1852     assert_with_loc(Intr.Variables.find(ArgName) != Intr.Variables.end(),
1853                     "Variable not defined!");
1854     Variable &V = Intr.Variables[ArgName];
1855     return std::make_pair(V.getType(), V.getName());
1856   }
1857 
1858   assert(Arg && "Neither ArgName nor Arg?!");
1859   const auto *DI = dyn_cast<DagInit>(Arg);
1860   assert_with_loc(DI, "Arguments must either be DAGs or names!");
1861 
1862   return emitDag(DI);
1863 }
1864 
generate()1865 std::string Intrinsic::generate() {
1866   // Avoid duplicated code for big and little endian
1867   if (isBigEndianSafe()) {
1868     generateImpl(false, "", "");
1869     return OS.str();
1870   }
1871   // Little endian intrinsics are simple and don't require any argument
1872   // swapping.
1873   OS << "#ifdef __LITTLE_ENDIAN__\n";
1874 
1875   generateImpl(false, "", "");
1876 
1877   OS << "#else\n";
1878 
1879   // Big endian intrinsics are more complex. The user intended these intrinsics
1880   // to operate on a vector "as-if" loaded by LDR (for AArch64), VLDR (for
1881   // 64-bit vectors on AArch32), or VLDM (for 128-bit vectors on AArch32) but
1882   // we load as-if LD1 (for AArch64) or VLD1 (for AArch32). So we should swap
1883   // all arguments and swap the return value too.
1884   //
1885   // If we call sub-intrinsics, we should call a version that does
1886   // not re-swap the arguments!
1887   generateImpl(true, "", "__noswap_");
1888 
1889   // If we're needed early, create a non-swapping variant for
1890   // big-endian.
1891   if (NeededEarly) {
1892     generateImpl(false, "__noswap_", "__noswap_");
1893   }
1894   OS << "#endif\n\n";
1895 
1896   return OS.str();
1897 }
1898 
generateImpl(bool ReverseArguments,StringRef NamePrefix,StringRef CallPrefix)1899 void Intrinsic::generateImpl(bool ReverseArguments,
1900                              StringRef NamePrefix, StringRef CallPrefix) {
1901   CurrentRecord = R;
1902 
1903   // If we call a macro, our local variables may be corrupted due to
1904   // lack of proper lexical scoping. So, add a globally unique postfix
1905   // to every variable.
1906   //
1907   // indexBody() should have set up the Dependencies set by now.
1908   for (auto *I : Dependencies)
1909     if (I->UseMacro) {
1910       VariablePostfix = "_" + utostr(Emitter.getUniqueNumber());
1911       break;
1912     }
1913 
1914   initVariables();
1915 
1916   emitPrototype(NamePrefix);
1917 
1918   if (IsUnavailable) {
1919     OS << " __attribute__((unavailable));";
1920   } else {
1921     emitOpeningBrace();
1922     // Emit return variable declaration first as to not trigger
1923     // -Wdeclaration-after-statement.
1924     emitReturnVarDecl();
1925     emitShadowedArgs();
1926     if (ReverseArguments)
1927       emitArgumentReversal();
1928     emitBody(CallPrefix);
1929     if (ReverseArguments)
1930       emitReturnReversal();
1931     emitReturn();
1932     emitClosingBrace();
1933   }
1934   OS << "\n";
1935 
1936   CurrentRecord = nullptr;
1937 }
1938 
indexBody()1939 void Intrinsic::indexBody() {
1940   CurrentRecord = R;
1941 
1942   initVariables();
1943   // Emit return variable declaration first as to not trigger
1944   // -Wdeclaration-after-statement.
1945   emitReturnVarDecl();
1946   emitBody("");
1947   OS.str("");
1948 
1949   CurrentRecord = nullptr;
1950 }
1951 
1952 //===----------------------------------------------------------------------===//
1953 // NeonEmitter implementation
1954 //===----------------------------------------------------------------------===//
1955 
getIntrinsic(StringRef Name,ArrayRef<Type> Types,std::optional<std::string> MangledName)1956 Intrinsic &NeonEmitter::getIntrinsic(StringRef Name, ArrayRef<Type> Types,
1957                                      std::optional<std::string> MangledName) {
1958   // First, look up the name in the intrinsic map.
1959   assert_with_loc(IntrinsicMap.find(Name) != IntrinsicMap.end(),
1960                   ("Intrinsic '" + Name + "' not found!").str());
1961   auto &V = IntrinsicMap.find(Name)->second;
1962   std::vector<Intrinsic *> GoodVec;
1963 
1964   // Create a string to print if we end up failing.
1965   std::string ErrMsg = "looking up intrinsic '" + Name.str() + "(";
1966   for (unsigned I = 0; I < Types.size(); ++I) {
1967     if (I != 0)
1968       ErrMsg += ", ";
1969     ErrMsg += Types[I].str();
1970   }
1971   ErrMsg += ")'\n";
1972   ErrMsg += "Available overloads:\n";
1973 
1974   // Now, look through each intrinsic implementation and see if the types are
1975   // compatible.
1976   for (auto &I : V) {
1977     ErrMsg += "  - " + I.getReturnType().str() + " " + I.getMangledName();
1978     ErrMsg += "(";
1979     for (unsigned A = 0; A < I.getNumParams(); ++A) {
1980       if (A != 0)
1981         ErrMsg += ", ";
1982       ErrMsg += I.getParamType(A).str();
1983     }
1984     ErrMsg += ")\n";
1985 
1986     if (MangledName && MangledName != I.getMangledName(true))
1987       continue;
1988 
1989     if (I.getNumParams() != Types.size())
1990       continue;
1991 
1992     unsigned ArgNum = 0;
1993     bool MatchingArgumentTypes = all_of(Types, [&](const auto &Type) {
1994       return Type == I.getParamType(ArgNum++);
1995     });
1996 
1997     if (MatchingArgumentTypes)
1998       GoodVec.push_back(&I);
1999   }
2000 
2001   assert_with_loc(!GoodVec.empty(),
2002                   "No compatible intrinsic found - " + ErrMsg);
2003   assert_with_loc(GoodVec.size() == 1, "Multiple overloads found - " + ErrMsg);
2004 
2005   return *GoodVec.front();
2006 }
2007 
createIntrinsic(const Record * R,SmallVectorImpl<Intrinsic * > & Out)2008 void NeonEmitter::createIntrinsic(const Record *R,
2009                                   SmallVectorImpl<Intrinsic *> &Out) {
2010   std::string Name = std::string(R->getValueAsString("Name"));
2011   std::string Proto = std::string(R->getValueAsString("Prototype"));
2012   std::string Types = std::string(R->getValueAsString("Types"));
2013   const Record *OperationRec = R->getValueAsDef("Operation");
2014   bool BigEndianSafe  = R->getValueAsBit("BigEndianSafe");
2015   std::string ArchGuard = std::string(R->getValueAsString("ArchGuard"));
2016   std::string TargetGuard = std::string(R->getValueAsString("TargetGuard"));
2017   bool IsUnavailable = OperationRec->getValueAsBit("Unavailable");
2018   std::string CartesianProductWith = std::string(R->getValueAsString("CartesianProductWith"));
2019 
2020   // Set the global current record. This allows assert_with_loc to produce
2021   // decent location information even when highly nested.
2022   CurrentRecord = R;
2023 
2024   const ListInit *Body = OperationRec->getValueAsListInit("Ops");
2025 
2026   std::vector<TypeSpec> TypeSpecs = TypeSpec::fromTypeSpecs(Types);
2027 
2028   ClassKind CK = ClassNone;
2029   if (!R->getDirectSuperClasses().empty())
2030     CK = ClassMap[R->getDirectSuperClasses()[0].first];
2031 
2032   std::vector<std::pair<TypeSpec, TypeSpec>> NewTypeSpecs;
2033   if (!CartesianProductWith.empty()) {
2034     std::vector<TypeSpec> ProductTypeSpecs = TypeSpec::fromTypeSpecs(CartesianProductWith);
2035     for (auto TS : TypeSpecs) {
2036       Type DefaultT(TS, ".");
2037       for (auto SrcTS : ProductTypeSpecs) {
2038         Type DefaultSrcT(SrcTS, ".");
2039         if (TS == SrcTS ||
2040             DefaultSrcT.getSizeInBits() != DefaultT.getSizeInBits())
2041           continue;
2042         NewTypeSpecs.push_back(std::make_pair(TS, SrcTS));
2043       }
2044     }
2045   } else {
2046     for (auto TS : TypeSpecs) {
2047       NewTypeSpecs.push_back(std::make_pair(TS, TS));
2048     }
2049   }
2050 
2051   sort(NewTypeSpecs);
2052   NewTypeSpecs.erase(llvm::unique(NewTypeSpecs), NewTypeSpecs.end());
2053   auto &Entry = IntrinsicMap[Name];
2054 
2055   for (auto &I : NewTypeSpecs) {
2056 
2057     // MFloat8 type is only available on AArch64. If encountered set ArchGuard
2058     // correctly.
2059     std::string NewArchGuard = ArchGuard;
2060     if (Type(I.first, ".").isMFloat8()) {
2061       if (NewArchGuard.empty()) {
2062         NewArchGuard = "defined(__aarch64__)";
2063       } else if (NewArchGuard.find("defined(__aarch64__)") ==
2064                  std::string::npos) {
2065         NewArchGuard = "defined(__aarch64__) && (" + NewArchGuard + ")";
2066       }
2067     }
2068     Entry.emplace_back(R, Name, Proto, I.first, I.second, CK, Body, *this,
2069                        NewArchGuard, TargetGuard, IsUnavailable, BigEndianSafe);
2070     Out.push_back(&Entry.back());
2071   }
2072 
2073   CurrentRecord = nullptr;
2074 }
2075 
2076 /// genBuiltinsDef: Generate the builtin infos, checking for unique builtin
2077 /// declarations.
genBuiltinsDef(raw_ostream & OS,SmallVectorImpl<Intrinsic * > & Defs)2078 void NeonEmitter::genBuiltinsDef(raw_ostream &OS,
2079                                  SmallVectorImpl<Intrinsic *> &Defs) {
2080   // We only want to emit a builtin once, and in order of its name.
2081   std::map<std::string, Intrinsic *> Builtins;
2082 
2083   llvm::StringToOffsetTable Table;
2084   Table.GetOrAddStringOffset("");
2085   Table.GetOrAddStringOffset("n");
2086 
2087   for (auto *Def : Defs) {
2088     if (Def->hasBody())
2089       continue;
2090 
2091     if (Builtins.insert({Def->getMangledName(), Def}).second) {
2092       Table.GetOrAddStringOffset(Def->getMangledName());
2093       Table.GetOrAddStringOffset(Def->getBuiltinTypeStr());
2094       Table.GetOrAddStringOffset(Def->getTargetGuard());
2095     }
2096   }
2097 
2098   OS << "#ifdef GET_NEON_BUILTIN_ENUMERATORS\n";
2099   for (const auto &[Name, Def] : Builtins) {
2100     OS << "  BI__builtin_neon_" << Name << ",\n";
2101   }
2102   OS << "#endif // GET_NEON_BUILTIN_ENUMERATORS\n\n";
2103 
2104   OS << "#ifdef GET_NEON_BUILTIN_STR_TABLE\n";
2105   Table.EmitStringTableDef(OS, "BuiltinStrings");
2106   OS << "#endif // GET_NEON_BUILTIN_STR_TABLE\n\n";
2107 
2108   OS << "#ifdef GET_NEON_BUILTIN_INFOS\n";
2109   for (const auto &[Name, Def] : Builtins) {
2110     OS << "    Builtin::Info{Builtin::Info::StrOffsets{"
2111        << Table.GetStringOffset(Def->getMangledName()) << " /* "
2112        << Def->getMangledName() << " */, ";
2113     OS << Table.GetStringOffset(Def->getBuiltinTypeStr()) << " /* "
2114        << Def->getBuiltinTypeStr() << " */, ";
2115     OS << Table.GetStringOffset("n") << " /* n */, ";
2116     OS << Table.GetStringOffset(Def->getTargetGuard()) << " /* "
2117        << Def->getTargetGuard() << " */}, ";
2118     OS << "HeaderDesc::NO_HEADER, ALL_LANGUAGES},\n";
2119   }
2120   OS << "#endif // GET_NEON_BUILTIN_INFOS\n\n";
2121 }
2122 
genStreamingSVECompatibleList(raw_ostream & OS,SmallVectorImpl<Intrinsic * > & Defs)2123 void NeonEmitter::genStreamingSVECompatibleList(
2124     raw_ostream &OS, SmallVectorImpl<Intrinsic *> &Defs) {
2125   OS << "#ifdef GET_NEON_STREAMING_COMPAT_FLAG\n";
2126 
2127   std::set<std::string> Emitted;
2128   for (auto *Def : Defs) {
2129     // If the def has a body (that is, it has Operation DAGs), it won't call
2130     // __builtin_neon_* so we don't need to generate a definition for it.
2131     if (Def->hasBody())
2132       continue;
2133 
2134     std::string Name = Def->getMangledName();
2135     if (Emitted.find(Name) != Emitted.end())
2136       continue;
2137 
2138     // FIXME: We should make exceptions here for some NEON builtins that are
2139     // permitted in streaming mode.
2140     OS << "case NEON::BI__builtin_neon_" << Name
2141        << ": BuiltinType = ArmNonStreaming; break;\n";
2142     Emitted.insert(Name);
2143   }
2144   OS << "#endif\n\n";
2145 }
2146 
2147 /// Generate the ARM and AArch64 overloaded type checking code for
2148 /// SemaChecking.cpp, checking for unique builtin declarations.
genOverloadTypeCheckCode(raw_ostream & OS,SmallVectorImpl<Intrinsic * > & Defs)2149 void NeonEmitter::genOverloadTypeCheckCode(raw_ostream &OS,
2150                                            SmallVectorImpl<Intrinsic *> &Defs) {
2151   OS << "#ifdef GET_NEON_OVERLOAD_CHECK\n";
2152 
2153   // We record each overload check line before emitting because subsequent Inst
2154   // definitions may extend the number of permitted types (i.e. augment the
2155   // Mask). Use std::map to avoid sorting the table by hash number.
2156   struct OverloadInfo {
2157     uint64_t Mask = 0ULL;
2158     int PtrArgNum = 0;
2159     bool HasConstPtr = false;
2160     OverloadInfo() = default;
2161   };
2162   std::map<std::string, OverloadInfo> OverloadMap;
2163 
2164   for (auto *Def : Defs) {
2165     // If the def has a body (that is, it has Operation DAGs), it won't call
2166     // __builtin_neon_* so we don't need to generate a definition for it.
2167     if (Def->hasBody())
2168       continue;
2169     // Functions which have a scalar argument cannot be overloaded, no need to
2170     // check them if we are emitting the type checking code.
2171     if (Def->protoHasScalar())
2172       continue;
2173 
2174     uint64_t Mask = 0ULL;
2175     Mask |= 1ULL << Def->getPolymorphicKeyType().getNeonEnum();
2176 
2177     // Check if the function has a pointer or const pointer argument.
2178     int PtrArgNum = -1;
2179     bool HasConstPtr = false;
2180     for (unsigned I = 0; I < Def->getNumParams(); ++I) {
2181       const auto &Type = Def->getParamType(I);
2182       if (Type.isPointer()) {
2183         PtrArgNum = I;
2184         HasConstPtr = Type.isConstPointer();
2185       }
2186     }
2187 
2188     // For sret builtins, adjust the pointer argument index.
2189     if (PtrArgNum >= 0 && Def->getReturnType().getNumVectors() > 1)
2190       PtrArgNum += 1;
2191 
2192     std::string Name = Def->getName();
2193     // Omit type checking for the pointer arguments of vld1_lane, vld1_dup,
2194     // vst1_lane, vldap1_lane, and vstl1_lane intrinsics.  Using a pointer to
2195     // the vector element type with one of those operations causes codegen to
2196     // select an aligned load/store instruction.  If you want an unaligned
2197     // operation, the pointer argument needs to have less alignment than element
2198     // type, so just accept any pointer type.
2199     if (Name == "vld1_lane" || Name == "vld1_dup" || Name == "vst1_lane" ||
2200         Name == "vldap1_lane" || Name == "vstl1_lane") {
2201       PtrArgNum = -1;
2202       HasConstPtr = false;
2203     }
2204 
2205     if (Mask) {
2206       OverloadInfo &OI = OverloadMap[Def->getMangledName()];
2207       OI.Mask |= Mask;
2208       OI.PtrArgNum |= PtrArgNum;
2209       OI.HasConstPtr = HasConstPtr;
2210     }
2211   }
2212 
2213   for (auto &I : OverloadMap) {
2214     OverloadInfo &OI = I.second;
2215 
2216     OS << "case NEON::BI__builtin_neon_" << I.first << ": ";
2217     OS << "mask = 0x" << Twine::utohexstr(OI.Mask) << "ULL";
2218     if (OI.PtrArgNum >= 0)
2219       OS << "; PtrArgNum = " << OI.PtrArgNum;
2220     if (OI.HasConstPtr)
2221       OS << "; HasConstPtr = true";
2222     OS << "; break;\n";
2223   }
2224   OS << "#endif\n\n";
2225 }
2226 
2227 inline bool
areRangeChecksCompatible(const ArrayRef<ImmCheck> ChecksA,const ArrayRef<ImmCheck> ChecksB)2228 NeonEmitter::areRangeChecksCompatible(const ArrayRef<ImmCheck> ChecksA,
2229                                       const ArrayRef<ImmCheck> ChecksB) {
2230   // If multiple intrinsics map to the same builtin, we must ensure that the
2231   // intended range checks performed in SemaArm.cpp do not contradict each
2232   // other, as these are emitted once per-buitlin.
2233   //
2234   // The arguments to be checked and type of each check to be performed must be
2235   // the same. The element types may differ as they will be resolved
2236   // per-intrinsic as overloaded types by SemaArm.cpp, though the vector sizes
2237   // are not and so must be the same.
2238   bool compat =
2239       std::equal(ChecksA.begin(), ChecksA.end(), ChecksB.begin(), ChecksB.end(),
2240                  [](const auto &A, const auto &B) {
2241                    return A.getImmArgIdx() == B.getImmArgIdx() &&
2242                           A.getKind() == B.getKind() &&
2243                           A.getVecSizeInBits() == B.getVecSizeInBits();
2244                  });
2245 
2246   return compat;
2247 }
2248 
genIntrinsicRangeCheckCode(raw_ostream & OS,SmallVectorImpl<Intrinsic * > & Defs)2249 void NeonEmitter::genIntrinsicRangeCheckCode(
2250     raw_ostream &OS, SmallVectorImpl<Intrinsic *> &Defs) {
2251   std::unordered_map<std::string, ArrayRef<ImmCheck>> Emitted;
2252 
2253   OS << "#ifdef GET_NEON_IMMEDIATE_CHECK\n";
2254   for (auto &Def : Defs) {
2255     // If the Def has a body (operation DAGs), it is not a __builtin_neon_
2256     if (Def->hasBody() || !Def->hasImmediate())
2257       continue;
2258 
2259     // Sorted by immediate argument index
2260     ArrayRef<ImmCheck> Checks = Def->getImmChecks();
2261 
2262     auto [It, Inserted] = Emitted.try_emplace(Def->getMangledName(), Checks);
2263     if (!Inserted) {
2264       assert(areRangeChecksCompatible(Checks, It->second) &&
2265              "Neon intrinsics with incompatible immediate range checks cannot "
2266              "share a builtin.");
2267       continue; // Ensure this is emitted only once
2268     }
2269 
2270     // Emit builtin's range checks
2271     OS << "case NEON::BI__builtin_neon_" << Def->getMangledName() << ":\n";
2272     for (const auto &Check : Checks) {
2273       OS << " ImmChecks.emplace_back(" << Check.getImmArgIdx() << ", "
2274          << Check.getKind() << ", " << Check.getElementSizeInBits() << ", "
2275          << Check.getVecSizeInBits() << ");\n"
2276          << " break;\n";
2277     }
2278   }
2279 
2280   OS << "#endif\n\n";
2281 }
2282 
2283 /// runHeader - Emit a file with sections defining:
2284 /// 1. the NEON section of BuiltinsARM.def and BuiltinsAArch64.def.
2285 /// 2. the SemaChecking code for the type overload checking.
2286 /// 3. the SemaChecking code for validation of intrinsic immediate arguments.
runHeader(raw_ostream & OS)2287 void NeonEmitter::runHeader(raw_ostream &OS) {
2288   SmallVector<Intrinsic *, 128> Defs;
2289   for (const Record *R : Records.getAllDerivedDefinitions("Inst"))
2290     createIntrinsic(R, Defs);
2291 
2292   // Generate shared BuiltinsXXX.def
2293   genBuiltinsDef(OS, Defs);
2294 
2295   // Generate ARM overloaded type checking code for SemaChecking.cpp
2296   genOverloadTypeCheckCode(OS, Defs);
2297 
2298   genStreamingSVECompatibleList(OS, Defs);
2299 
2300   // Generate ARM range checking code for shift/lane immediates.
2301   genIntrinsicRangeCheckCode(OS, Defs);
2302 }
2303 
emitNeonTypeDefs(const std::string & types,raw_ostream & OS)2304 static void emitNeonTypeDefs(const std::string& types, raw_ostream &OS) {
2305   std::string TypedefTypes(types);
2306   std::vector<TypeSpec> TDTypeVec = TypeSpec::fromTypeSpecs(TypedefTypes);
2307 
2308   // Emit vector typedefs.
2309   bool InIfdef = false;
2310   for (auto &TS : TDTypeVec) {
2311     bool IsA64 = false;
2312     Type T(TS, ".");
2313     if (T.isDouble() || T.isMFloat8())
2314       IsA64 = true;
2315 
2316     if (InIfdef && !IsA64) {
2317       OS << "#endif\n";
2318       InIfdef = false;
2319     }
2320     if (!InIfdef && IsA64) {
2321       OS << "#if defined(__aarch64__) || defined(__arm64ec__)\n";
2322       InIfdef = true;
2323     }
2324 
2325     if (T.isPoly())
2326       OS << "typedef __attribute__((neon_polyvector_type(";
2327     else
2328       OS << "typedef __attribute__((neon_vector_type(";
2329 
2330     Type T2 = T;
2331     T2.makeScalar();
2332     OS << T.getNumElements();
2333     OS << "))) " << T2.str();
2334     OS << " " << T.str() << ";\n";
2335   }
2336   if (InIfdef)
2337     OS << "#endif\n";
2338   OS << "\n";
2339 
2340   // Emit struct typedefs.
2341   InIfdef = false;
2342   for (unsigned NumMembers = 2; NumMembers <= 4; ++NumMembers) {
2343     for (auto &TS : TDTypeVec) {
2344       bool IsA64 = false;
2345       Type T(TS, ".");
2346       if (T.isDouble() || T.isMFloat8())
2347         IsA64 = true;
2348 
2349       if (InIfdef && !IsA64) {
2350         OS << "#endif\n";
2351         InIfdef = false;
2352       }
2353       if (!InIfdef && IsA64) {
2354         OS << "#if defined(__aarch64__) || defined(__arm64ec__)\n";
2355         InIfdef = true;
2356       }
2357 
2358       const char Mods[] = { static_cast<char>('2' + (NumMembers - 2)), 0};
2359       Type VT(TS, Mods);
2360       OS << "typedef struct " << VT.str() << " {\n";
2361       OS << "  " << T.str() << " val";
2362       OS << "[" << NumMembers << "]";
2363       OS << ";\n} ";
2364       OS << VT.str() << ";\n";
2365       OS << "\n";
2366     }
2367   }
2368   if (InIfdef)
2369     OS << "#endif\n";
2370 }
2371 
2372 /// run - Read the records in arm_neon.td and output arm_neon.h.  arm_neon.h
2373 /// is comprised of type definitions and function declarations.
run(raw_ostream & OS)2374 void NeonEmitter::run(raw_ostream &OS) {
2375   OS << "/*===---- arm_neon.h - ARM Neon intrinsics "
2376         "------------------------------"
2377         "---===\n"
2378         " *\n"
2379         " * Permission is hereby granted, free of charge, to any person "
2380         "obtaining "
2381         "a copy\n"
2382         " * of this software and associated documentation files (the "
2383         "\"Software\"),"
2384         " to deal\n"
2385         " * in the Software without restriction, including without limitation "
2386         "the "
2387         "rights\n"
2388         " * to use, copy, modify, merge, publish, distribute, sublicense, "
2389         "and/or sell\n"
2390         " * copies of the Software, and to permit persons to whom the Software "
2391         "is\n"
2392         " * furnished to do so, subject to the following conditions:\n"
2393         " *\n"
2394         " * The above copyright notice and this permission notice shall be "
2395         "included in\n"
2396         " * all copies or substantial portions of the Software.\n"
2397         " *\n"
2398         " * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, "
2399         "EXPRESS OR\n"
2400         " * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF "
2401         "MERCHANTABILITY,\n"
2402         " * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT "
2403         "SHALL THE\n"
2404         " * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR "
2405         "OTHER\n"
2406         " * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, "
2407         "ARISING FROM,\n"
2408         " * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER "
2409         "DEALINGS IN\n"
2410         " * THE SOFTWARE.\n"
2411         " *\n"
2412         " *===-----------------------------------------------------------------"
2413         "---"
2414         "---===\n"
2415         " */\n\n";
2416 
2417   OS << "#ifndef __ARM_NEON_H\n";
2418   OS << "#define __ARM_NEON_H\n\n";
2419 
2420   OS << "#if !defined(__arm__) && !defined(__aarch64__) && "
2421         "!defined(__arm64ec__)\n";
2422   OS << "#error \"<arm_neon.h> is intended only for ARM and AArch64 "
2423         "targets\"\n";
2424   OS << "#elif !defined(__ARM_FP)\n";
2425   OS << "#error \"NEON intrinsics not available with the soft-float ABI. "
2426         "Please use -mfloat-abi=softfp or -mfloat-abi=hard\"\n";
2427   OS << "#else\n\n";
2428 
2429   OS << "#include <stdint.h>\n\n";
2430 
2431   OS << "#include <arm_bf16.h>\n";
2432 
2433   OS << "#include <arm_vector_types.h>\n";
2434 
2435   // For now, signedness of polynomial types depends on target
2436   OS << "#if defined(__aarch64__) || defined(__arm64ec__)\n";
2437   OS << "typedef uint8_t poly8_t;\n";
2438   OS << "typedef uint16_t poly16_t;\n";
2439   OS << "typedef uint64_t poly64_t;\n";
2440   OS << "typedef __uint128_t poly128_t;\n";
2441   OS << "#else\n";
2442   OS << "typedef int8_t poly8_t;\n";
2443   OS << "typedef int16_t poly16_t;\n";
2444   OS << "typedef int64_t poly64_t;\n";
2445   OS << "#endif\n";
2446   emitNeonTypeDefs("PcQPcPsQPsPlQPl", OS);
2447 
2448   OS << "#define __ai static __inline__ __attribute__((__always_inline__, "
2449         "__nodebug__))\n\n";
2450 
2451   // Shufflevector arguments lists for endian-swapping vectors for big-endian
2452   // targets. For AArch64, we need to reverse every lane in the vector, but for
2453   // AArch32 we need to reverse the lanes within each 64-bit chunk of the
2454   // vector. The naming convention here is __lane_reverse_<n>_<m>, where <n> is
2455   // the length of the vector in bits, and <m> is length of each lane in bits.
2456   OS << "#if !defined(__LITTLE_ENDIAN__)\n";
2457   OS << "#if defined(__aarch64__) || defined(__arm64ec__)\n";
2458   OS << "#define __lane_reverse_64_32 1,0\n";
2459   OS << "#define __lane_reverse_64_16 3,2,1,0\n";
2460   OS << "#define __lane_reverse_64_8 7,6,5,4,3,2,1,0\n";
2461   OS << "#define __lane_reverse_128_64 1,0\n";
2462   OS << "#define __lane_reverse_128_32 3,2,1,0\n";
2463   OS << "#define __lane_reverse_128_16 7,6,5,4,3,2,1,0\n";
2464   OS << "#define __lane_reverse_128_8 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0\n";
2465   OS << "#else\n";
2466   OS << "#define __lane_reverse_64_32 1,0\n";
2467   OS << "#define __lane_reverse_64_16 3,2,1,0\n";
2468   OS << "#define __lane_reverse_64_8 7,6,5,4,3,2,1,0\n";
2469   OS << "#define __lane_reverse_128_64 0,1\n";
2470   OS << "#define __lane_reverse_128_32 1,0,3,2\n";
2471   OS << "#define __lane_reverse_128_16 3,2,1,0,7,6,5,4\n";
2472   OS << "#define __lane_reverse_128_8 7,6,5,4,3,2,1,0,15,14,13,12,11,10,9,8\n";
2473   OS << "#endif\n";
2474   OS << "#endif\n";
2475 
2476   SmallVector<Intrinsic *, 128> Defs;
2477   for (const Record *R : Records.getAllDerivedDefinitions("Inst"))
2478     createIntrinsic(R, Defs);
2479 
2480   for (auto *I : Defs)
2481     I->indexBody();
2482 
2483   stable_sort(Defs, deref<std::less<>>());
2484 
2485   // Only emit a def when its requirements have been met.
2486   // FIXME: This loop could be made faster, but it's fast enough for now.
2487   bool MadeProgress = true;
2488   std::string InGuard;
2489   while (!Defs.empty() && MadeProgress) {
2490     MadeProgress = false;
2491 
2492     for (SmallVector<Intrinsic *, 128>::iterator I = Defs.begin();
2493          I != Defs.end(); /*No step*/) {
2494       bool DependenciesSatisfied = true;
2495       for (auto *II : (*I)->getDependencies()) {
2496         if (is_contained(Defs, II))
2497           DependenciesSatisfied = false;
2498       }
2499       if (!DependenciesSatisfied) {
2500         // Try the next one.
2501         ++I;
2502         continue;
2503       }
2504 
2505       // Emit #endif/#if pair if needed.
2506       if ((*I)->getArchGuard() != InGuard) {
2507         if (!InGuard.empty())
2508           OS << "#endif\n";
2509         InGuard = (*I)->getArchGuard();
2510         if (!InGuard.empty())
2511           OS << "#if " << InGuard << "\n";
2512       }
2513 
2514       // Actually generate the intrinsic code.
2515       OS << (*I)->generate();
2516 
2517       MadeProgress = true;
2518       I = Defs.erase(I);
2519     }
2520   }
2521   assert(Defs.empty() && "Some requirements were not satisfied!");
2522   if (!InGuard.empty())
2523     OS << "#endif\n";
2524 
2525   OS << "\n";
2526   OS << "#undef __ai\n\n";
2527   OS << "#endif /* if !defined(__ARM_NEON) */\n";
2528   OS << "#endif /* ifndef __ARM_FP */\n";
2529 }
2530 
2531 /// run - Read the records in arm_fp16.td and output arm_fp16.h.  arm_fp16.h
2532 /// is comprised of type definitions and function declarations.
runFP16(raw_ostream & OS)2533 void NeonEmitter::runFP16(raw_ostream &OS) {
2534   OS << "/*===---- arm_fp16.h - ARM FP16 intrinsics "
2535         "------------------------------"
2536         "---===\n"
2537         " *\n"
2538         " * Permission is hereby granted, free of charge, to any person "
2539         "obtaining a copy\n"
2540         " * of this software and associated documentation files (the "
2541 				"\"Software\"), to deal\n"
2542         " * in the Software without restriction, including without limitation "
2543 				"the rights\n"
2544         " * to use, copy, modify, merge, publish, distribute, sublicense, "
2545 				"and/or sell\n"
2546         " * copies of the Software, and to permit persons to whom the Software "
2547 				"is\n"
2548         " * furnished to do so, subject to the following conditions:\n"
2549         " *\n"
2550         " * The above copyright notice and this permission notice shall be "
2551         "included in\n"
2552         " * all copies or substantial portions of the Software.\n"
2553         " *\n"
2554         " * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, "
2555         "EXPRESS OR\n"
2556         " * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF "
2557         "MERCHANTABILITY,\n"
2558         " * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT "
2559         "SHALL THE\n"
2560         " * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR "
2561         "OTHER\n"
2562         " * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, "
2563         "ARISING FROM,\n"
2564         " * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER "
2565         "DEALINGS IN\n"
2566         " * THE SOFTWARE.\n"
2567         " *\n"
2568         " *===-----------------------------------------------------------------"
2569         "---"
2570         "---===\n"
2571         " */\n\n";
2572 
2573   OS << "#ifndef __ARM_FP16_H\n";
2574   OS << "#define __ARM_FP16_H\n\n";
2575 
2576   OS << "#include <stdint.h>\n\n";
2577 
2578   OS << "typedef __fp16 float16_t;\n";
2579 
2580   OS << "#define __ai static __inline__ __attribute__((__always_inline__, "
2581         "__nodebug__))\n\n";
2582 
2583   SmallVector<Intrinsic *, 128> Defs;
2584   for (const Record *R : Records.getAllDerivedDefinitions("Inst"))
2585     createIntrinsic(R, Defs);
2586 
2587   for (auto *I : Defs)
2588     I->indexBody();
2589 
2590   stable_sort(Defs, deref<std::less<>>());
2591 
2592   // Only emit a def when its requirements have been met.
2593   // FIXME: This loop could be made faster, but it's fast enough for now.
2594   bool MadeProgress = true;
2595   std::string InGuard;
2596   while (!Defs.empty() && MadeProgress) {
2597     MadeProgress = false;
2598 
2599     for (SmallVector<Intrinsic *, 128>::iterator I = Defs.begin();
2600          I != Defs.end(); /*No step*/) {
2601       bool DependenciesSatisfied = true;
2602       for (auto *II : (*I)->getDependencies()) {
2603         if (is_contained(Defs, II))
2604           DependenciesSatisfied = false;
2605       }
2606       if (!DependenciesSatisfied) {
2607         // Try the next one.
2608         ++I;
2609         continue;
2610       }
2611 
2612       // Emit #endif/#if pair if needed.
2613       if ((*I)->getArchGuard() != InGuard) {
2614         if (!InGuard.empty())
2615           OS << "#endif\n";
2616         InGuard = (*I)->getArchGuard();
2617         if (!InGuard.empty())
2618           OS << "#if " << InGuard << "\n";
2619       }
2620 
2621       // Actually generate the intrinsic code.
2622       OS << (*I)->generate();
2623 
2624       MadeProgress = true;
2625       I = Defs.erase(I);
2626     }
2627   }
2628   assert(Defs.empty() && "Some requirements were not satisfied!");
2629   if (!InGuard.empty())
2630     OS << "#endif\n";
2631 
2632   OS << "\n";
2633   OS << "#undef __ai\n\n";
2634   OS << "#endif /* __ARM_FP16_H */\n";
2635 }
2636 
runVectorTypes(raw_ostream & OS)2637 void NeonEmitter::runVectorTypes(raw_ostream &OS) {
2638   OS << "/*===---- arm_vector_types - ARM vector type "
2639         "------===\n"
2640         " *\n"
2641         " *\n"
2642         " * Part of the LLVM Project, under the Apache License v2.0 with LLVM "
2643         "Exceptions.\n"
2644         " * See https://llvm.org/LICENSE.txt for license information.\n"
2645         " * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception\n"
2646         " *\n"
2647         " *===-----------------------------------------------------------------"
2648         "------===\n"
2649         " */\n\n";
2650   OS << "#if !defined(__ARM_NEON_H) && !defined(__ARM_SVE_H)\n";
2651   OS << "#error \"This file should not be used standalone. Please include"
2652         " arm_neon.h or arm_sve.h instead\"\n\n";
2653   OS << "#endif\n";
2654   OS << "#ifndef __ARM_NEON_TYPES_H\n";
2655   OS << "#define __ARM_NEON_TYPES_H\n";
2656   OS << "typedef float float32_t;\n";
2657   OS << "typedef __fp16 float16_t;\n";
2658 
2659   OS << "#if defined(__aarch64__) || defined(__arm64ec__)\n";
2660   OS << "typedef __mfp8 mfloat8_t;\n";
2661   OS << "typedef double float64_t;\n";
2662   OS << "#endif\n\n";
2663 
2664   OS << R"(
2665 typedef uint64_t fpm_t;
2666 
2667 enum __ARM_FPM_FORMAT { __ARM_FPM_E5M2, __ARM_FPM_E4M3 };
2668 
2669 enum __ARM_FPM_OVERFLOW { __ARM_FPM_INFNAN, __ARM_FPM_SATURATE };
2670 
2671 static __inline__ fpm_t __attribute__((__always_inline__, __nodebug__))
2672 __arm_fpm_init(void) {
2673   return 0;
2674 }
2675 
2676 static __inline__ fpm_t __attribute__((__always_inline__, __nodebug__))
2677 __arm_set_fpm_src1_format(fpm_t __fpm, enum __ARM_FPM_FORMAT __format) {
2678   return (__fpm & ~7ull) | (fpm_t)__format;
2679 }
2680 
2681 static __inline__ fpm_t __attribute__((__always_inline__, __nodebug__))
2682 __arm_set_fpm_src2_format(fpm_t __fpm, enum __ARM_FPM_FORMAT __format) {
2683   return (__fpm & ~0x38ull) | ((fpm_t)__format << 3u);
2684 }
2685 
2686 static __inline__ fpm_t __attribute__((__always_inline__, __nodebug__))
2687 __arm_set_fpm_dst_format(fpm_t __fpm, enum __ARM_FPM_FORMAT __format) {
2688   return (__fpm & ~0x1c0ull) | ((fpm_t)__format << 6u);
2689 }
2690 
2691 static __inline__ fpm_t __attribute__((__always_inline__, __nodebug__))
2692 __arm_set_fpm_overflow_mul(fpm_t __fpm, enum __ARM_FPM_OVERFLOW __behaviour) {
2693   return (__fpm & ~0x4000ull) | ((fpm_t)__behaviour << 14u);
2694 }
2695 
2696 static __inline__ fpm_t __attribute__((__always_inline__, __nodebug__))
2697 __arm_set_fpm_overflow_cvt(fpm_t __fpm, enum __ARM_FPM_OVERFLOW __behaviour) {
2698   return (__fpm & ~0x8000ull) | ((fpm_t)__behaviour << 15u);
2699 }
2700 
2701 static __inline__ fpm_t __attribute__((__always_inline__, __nodebug__))
2702 __arm_set_fpm_lscale(fpm_t __fpm, uint64_t __scale) {
2703   return (__fpm & ~0x7f0000ull) | (__scale << 16u);
2704 }
2705 
2706 static __inline__ fpm_t __attribute__((__always_inline__, __nodebug__))
2707 __arm_set_fpm_nscale(fpm_t __fpm, int64_t __scale) {
2708   return (__fpm & ~0xff000000ull) | (((fpm_t)__scale & 0xffu) << 24u);
2709 }
2710 
2711 static __inline__ fpm_t __attribute__((__always_inline__, __nodebug__))
2712 __arm_set_fpm_lscale2(fpm_t __fpm, uint64_t __scale) {
2713   return (uint32_t)__fpm | (__scale << 32u);
2714 }
2715 
2716 )";
2717 
2718   emitNeonTypeDefs("cQcsQsiQilQlUcQUcUsQUsUiQUiUlQUlmQmhQhfQfdQd", OS);
2719 
2720   emitNeonTypeDefs("bQb", OS);
2721   OS << "#endif // __ARM_NEON_TYPES_H\n";
2722 }
2723 
runBF16(raw_ostream & OS)2724 void NeonEmitter::runBF16(raw_ostream &OS) {
2725   OS << "/*===---- arm_bf16.h - ARM BF16 intrinsics "
2726         "-----------------------------------===\n"
2727         " *\n"
2728         " *\n"
2729         " * Part of the LLVM Project, under the Apache License v2.0 with LLVM "
2730         "Exceptions.\n"
2731         " * See https://llvm.org/LICENSE.txt for license information.\n"
2732         " * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception\n"
2733         " *\n"
2734         " *===-----------------------------------------------------------------"
2735         "------===\n"
2736         " */\n\n";
2737 
2738   OS << "#ifndef __ARM_BF16_H\n";
2739   OS << "#define __ARM_BF16_H\n\n";
2740 
2741   OS << "typedef __bf16 bfloat16_t;\n";
2742 
2743   OS << "#define __ai static __inline__ __attribute__((__always_inline__, "
2744         "__nodebug__))\n\n";
2745 
2746   SmallVector<Intrinsic *, 128> Defs;
2747   for (const Record *R : Records.getAllDerivedDefinitions("Inst"))
2748     createIntrinsic(R, Defs);
2749 
2750   for (auto *I : Defs)
2751     I->indexBody();
2752 
2753   stable_sort(Defs, deref<std::less<>>());
2754 
2755   // Only emit a def when its requirements have been met.
2756   // FIXME: This loop could be made faster, but it's fast enough for now.
2757   bool MadeProgress = true;
2758   std::string InGuard;
2759   while (!Defs.empty() && MadeProgress) {
2760     MadeProgress = false;
2761 
2762     for (SmallVector<Intrinsic *, 128>::iterator I = Defs.begin();
2763          I != Defs.end(); /*No step*/) {
2764       bool DependenciesSatisfied = true;
2765       for (auto *II : (*I)->getDependencies()) {
2766         if (is_contained(Defs, II))
2767           DependenciesSatisfied = false;
2768       }
2769       if (!DependenciesSatisfied) {
2770         // Try the next one.
2771         ++I;
2772         continue;
2773       }
2774 
2775       // Emit #endif/#if pair if needed.
2776       if ((*I)->getArchGuard() != InGuard) {
2777         if (!InGuard.empty())
2778           OS << "#endif\n";
2779         InGuard = (*I)->getArchGuard();
2780         if (!InGuard.empty())
2781           OS << "#if " << InGuard << "\n";
2782       }
2783 
2784       // Actually generate the intrinsic code.
2785       OS << (*I)->generate();
2786 
2787       MadeProgress = true;
2788       I = Defs.erase(I);
2789     }
2790   }
2791   assert(Defs.empty() && "Some requirements were not satisfied!");
2792   if (!InGuard.empty())
2793     OS << "#endif\n";
2794 
2795   OS << "\n";
2796   OS << "#undef __ai\n\n";
2797 
2798   OS << "#endif\n";
2799 }
2800 
EmitNeon(const RecordKeeper & Records,raw_ostream & OS)2801 void clang::EmitNeon(const RecordKeeper &Records, raw_ostream &OS) {
2802   NeonEmitter(Records).run(OS);
2803 }
2804 
EmitFP16(const RecordKeeper & Records,raw_ostream & OS)2805 void clang::EmitFP16(const RecordKeeper &Records, raw_ostream &OS) {
2806   NeonEmitter(Records).runFP16(OS);
2807 }
2808 
EmitBF16(const RecordKeeper & Records,raw_ostream & OS)2809 void clang::EmitBF16(const RecordKeeper &Records, raw_ostream &OS) {
2810   NeonEmitter(Records).runBF16(OS);
2811 }
2812 
EmitNeonSema(const RecordKeeper & Records,raw_ostream & OS)2813 void clang::EmitNeonSema(const RecordKeeper &Records, raw_ostream &OS) {
2814   NeonEmitter(Records).runHeader(OS);
2815 }
2816 
EmitVectorTypes(const RecordKeeper & Records,raw_ostream & OS)2817 void clang::EmitVectorTypes(const RecordKeeper &Records, raw_ostream &OS) {
2818   NeonEmitter(Records).runVectorTypes(OS);
2819 }
2820 
EmitNeonTest(const RecordKeeper & Records,raw_ostream & OS)2821 void clang::EmitNeonTest(const RecordKeeper &Records, raw_ostream &OS) {
2822   llvm_unreachable("Neon test generation no longer implemented!");
2823 }
2824