xref: /freebsd/contrib/llvm-project/llvm/lib/Transforms/IPO/LowerTypeTests.cpp (revision 700637cbb5e582861067a11aaca4d053546871d2)
1 //===- LowerTypeTests.cpp - type metadata lowering pass -------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This pass lowers type metadata and calls to the llvm.type.test intrinsic.
10 // It also ensures that globals are properly laid out for the
11 // llvm.icall.branch.funnel intrinsic.
12 // See http://llvm.org/docs/TypeMetadata.html for more information.
13 //
14 //===----------------------------------------------------------------------===//
15 
16 #include "llvm/Transforms/IPO/LowerTypeTests.h"
17 #include "llvm/ADT/APInt.h"
18 #include "llvm/ADT/ArrayRef.h"
19 #include "llvm/ADT/DenseMap.h"
20 #include "llvm/ADT/EquivalenceClasses.h"
21 #include "llvm/ADT/PointerUnion.h"
22 #include "llvm/ADT/STLExtras.h"
23 #include "llvm/ADT/SetVector.h"
24 #include "llvm/ADT/SmallVector.h"
25 #include "llvm/ADT/Statistic.h"
26 #include "llvm/ADT/StringRef.h"
27 #include "llvm/ADT/TinyPtrVector.h"
28 #include "llvm/Analysis/LoopInfo.h"
29 #include "llvm/Analysis/PostDominators.h"
30 #include "llvm/Analysis/TargetTransformInfo.h"
31 #include "llvm/Analysis/TypeMetadataUtils.h"
32 #include "llvm/Analysis/ValueTracking.h"
33 #include "llvm/IR/Attributes.h"
34 #include "llvm/IR/BasicBlock.h"
35 #include "llvm/IR/Constant.h"
36 #include "llvm/IR/Constants.h"
37 #include "llvm/IR/DataLayout.h"
38 #include "llvm/IR/DerivedTypes.h"
39 #include "llvm/IR/Function.h"
40 #include "llvm/IR/GlobalAlias.h"
41 #include "llvm/IR/GlobalObject.h"
42 #include "llvm/IR/GlobalValue.h"
43 #include "llvm/IR/GlobalVariable.h"
44 #include "llvm/IR/IRBuilder.h"
45 #include "llvm/IR/InlineAsm.h"
46 #include "llvm/IR/Instruction.h"
47 #include "llvm/IR/Instructions.h"
48 #include "llvm/IR/IntrinsicInst.h"
49 #include "llvm/IR/Intrinsics.h"
50 #include "llvm/IR/LLVMContext.h"
51 #include "llvm/IR/Metadata.h"
52 #include "llvm/IR/Module.h"
53 #include "llvm/IR/ModuleSummaryIndex.h"
54 #include "llvm/IR/ModuleSummaryIndexYAML.h"
55 #include "llvm/IR/Operator.h"
56 #include "llvm/IR/PassManager.h"
57 #include "llvm/IR/ReplaceConstant.h"
58 #include "llvm/IR/Type.h"
59 #include "llvm/IR/Use.h"
60 #include "llvm/IR/User.h"
61 #include "llvm/IR/Value.h"
62 #include "llvm/Support/Allocator.h"
63 #include "llvm/Support/Casting.h"
64 #include "llvm/Support/CommandLine.h"
65 #include "llvm/Support/Debug.h"
66 #include "llvm/Support/Error.h"
67 #include "llvm/Support/ErrorHandling.h"
68 #include "llvm/Support/FileSystem.h"
69 #include "llvm/Support/MathExtras.h"
70 #include "llvm/Support/MemoryBuffer.h"
71 #include "llvm/Support/TrailingObjects.h"
72 #include "llvm/Support/YAMLTraits.h"
73 #include "llvm/Support/raw_ostream.h"
74 #include "llvm/TargetParser/Triple.h"
75 #include "llvm/Transforms/IPO.h"
76 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
77 #include "llvm/Transforms/Utils/ModuleUtils.h"
78 #include <algorithm>
79 #include <cassert>
80 #include <cstdint>
81 #include <memory>
82 #include <set>
83 #include <string>
84 #include <system_error>
85 #include <utility>
86 #include <vector>
87 
88 using namespace llvm;
89 using namespace lowertypetests;
90 
91 #define DEBUG_TYPE "lowertypetests"
92 
93 STATISTIC(ByteArraySizeBits, "Byte array size in bits");
94 STATISTIC(ByteArraySizeBytes, "Byte array size in bytes");
95 STATISTIC(NumByteArraysCreated, "Number of byte arrays created");
96 STATISTIC(NumTypeTestCallsLowered, "Number of type test calls lowered");
97 STATISTIC(NumTypeIdDisjointSets, "Number of disjoint sets of type identifiers");
98 
99 static cl::opt<bool> AvoidReuse(
100     "lowertypetests-avoid-reuse",
101     cl::desc("Try to avoid reuse of byte array addresses using aliases"),
102     cl::Hidden, cl::init(true));
103 
104 static cl::opt<PassSummaryAction> ClSummaryAction(
105     "lowertypetests-summary-action",
106     cl::desc("What to do with the summary when running this pass"),
107     cl::values(clEnumValN(PassSummaryAction::None, "none", "Do nothing"),
108                clEnumValN(PassSummaryAction::Import, "import",
109                           "Import typeid resolutions from summary and globals"),
110                clEnumValN(PassSummaryAction::Export, "export",
111                           "Export typeid resolutions to summary and globals")),
112     cl::Hidden);
113 
114 static cl::opt<std::string> ClReadSummary(
115     "lowertypetests-read-summary",
116     cl::desc("Read summary from given YAML file before running pass"),
117     cl::Hidden);
118 
119 static cl::opt<std::string> ClWriteSummary(
120     "lowertypetests-write-summary",
121     cl::desc("Write summary to given YAML file after running pass"),
122     cl::Hidden);
123 
124 static cl::opt<DropTestKind>
125     ClDropTypeTests("lowertypetests-drop-type-tests",
126                     cl::desc("Simply drop type test sequences"),
127                     cl::values(clEnumValN(DropTestKind::None, "none",
128                                           "Do not drop any type tests"),
129                                clEnumValN(DropTestKind::Assume, "assume",
130                                           "Drop type test assume sequences"),
131                                clEnumValN(DropTestKind::All, "all",
132                                           "Drop all type test sequences")),
133                     cl::Hidden, cl::init(DropTestKind::None));
134 
containsGlobalOffset(uint64_t Offset) const135 bool BitSetInfo::containsGlobalOffset(uint64_t Offset) const {
136   if (Offset < ByteOffset)
137     return false;
138 
139   if ((Offset - ByteOffset) % (uint64_t(1) << AlignLog2) != 0)
140     return false;
141 
142   uint64_t BitOffset = (Offset - ByteOffset) >> AlignLog2;
143   if (BitOffset >= BitSize)
144     return false;
145 
146   return Bits.count(BitSize - 1 - BitOffset);
147 }
148 
print(raw_ostream & OS) const149 void BitSetInfo::print(raw_ostream &OS) const {
150   OS << "offset " << ByteOffset << " size " << BitSize << " align "
151      << (1 << AlignLog2);
152 
153   if (isAllOnes()) {
154     OS << " all-ones\n";
155     return;
156   }
157 
158   OS << " { ";
159   for (uint64_t B : Bits)
160     OS << B << ' ';
161   OS << "}\n";
162 }
163 
build()164 BitSetInfo BitSetBuilder::build() {
165   if (Min > Max)
166     Min = 0;
167 
168   // Normalize each offset against the minimum observed offset, and compute
169   // the bitwise OR of each of the offsets. The number of trailing zeros
170   // in the mask gives us the log2 of the alignment of all offsets, which
171   // allows us to compress the bitset by only storing one bit per aligned
172   // address.
173   uint64_t Mask = 0;
174   for (uint64_t &Offset : Offsets) {
175     Offset -= Min;
176     Mask |= Offset;
177   }
178 
179   BitSetInfo BSI;
180   BSI.ByteOffset = Min;
181 
182   BSI.AlignLog2 = 0;
183   if (Mask != 0)
184     BSI.AlignLog2 = llvm::countr_zero(Mask);
185 
186   // Build the compressed bitset while normalizing the offsets against the
187   // computed alignment.
188   BSI.BitSize = ((Max - Min) >> BSI.AlignLog2) + 1;
189   for (uint64_t Offset : Offsets) {
190     Offset >>= BSI.AlignLog2;
191     // We invert the order of bits when adding them to the bitset. This is
192     // because the offset that we test against is computed by subtracting the
193     // address that we are testing from the global's address, which means that
194     // the offset increases as the tested address decreases.
195     BSI.Bits.insert(BSI.BitSize - 1 - Offset);
196   }
197 
198   return BSI;
199 }
200 
addFragment(const std::set<uint64_t> & F)201 void GlobalLayoutBuilder::addFragment(const std::set<uint64_t> &F) {
202   // Create a new fragment to hold the layout for F.
203   Fragments.emplace_back();
204   std::vector<uint64_t> &Fragment = Fragments.back();
205   uint64_t FragmentIndex = Fragments.size() - 1;
206 
207   for (auto ObjIndex : F) {
208     uint64_t OldFragmentIndex = FragmentMap[ObjIndex];
209     if (OldFragmentIndex == 0) {
210       // We haven't seen this object index before, so just add it to the current
211       // fragment.
212       Fragment.push_back(ObjIndex);
213     } else {
214       // This index belongs to an existing fragment. Copy the elements of the
215       // old fragment into this one and clear the old fragment. We don't update
216       // the fragment map just yet, this ensures that any further references to
217       // indices from the old fragment in this fragment do not insert any more
218       // indices.
219       std::vector<uint64_t> &OldFragment = Fragments[OldFragmentIndex];
220       llvm::append_range(Fragment, OldFragment);
221       OldFragment.clear();
222     }
223   }
224 
225   // Update the fragment map to point our object indices to this fragment.
226   for (uint64_t ObjIndex : Fragment)
227     FragmentMap[ObjIndex] = FragmentIndex;
228 }
229 
allocate(const std::set<uint64_t> & Bits,uint64_t BitSize,uint64_t & AllocByteOffset,uint8_t & AllocMask)230 void ByteArrayBuilder::allocate(const std::set<uint64_t> &Bits,
231                                 uint64_t BitSize, uint64_t &AllocByteOffset,
232                                 uint8_t &AllocMask) {
233   // Find the smallest current allocation.
234   unsigned Bit = 0;
235   for (unsigned I = 1; I != BitsPerByte; ++I)
236     if (BitAllocs[I] < BitAllocs[Bit])
237       Bit = I;
238 
239   AllocByteOffset = BitAllocs[Bit];
240 
241   // Add our size to it.
242   unsigned ReqSize = AllocByteOffset + BitSize;
243   BitAllocs[Bit] = ReqSize;
244   if (Bytes.size() < ReqSize)
245     Bytes.resize(ReqSize);
246 
247   // Set our bits.
248   AllocMask = 1 << Bit;
249   for (uint64_t B : Bits)
250     Bytes[AllocByteOffset + B] |= AllocMask;
251 }
252 
isJumpTableCanonical(Function * F)253 bool lowertypetests::isJumpTableCanonical(Function *F) {
254   if (F->isDeclarationForLinker())
255     return false;
256   auto *CI = mdconst::extract_or_null<ConstantInt>(
257       F->getParent()->getModuleFlag("CFI Canonical Jump Tables"));
258   if (!CI || !CI->isZero())
259     return true;
260   return F->hasFnAttribute("cfi-canonical-jump-table");
261 }
262 
263 namespace {
264 
265 struct ByteArrayInfo {
266   std::set<uint64_t> Bits;
267   uint64_t BitSize;
268   GlobalVariable *ByteArray;
269   GlobalVariable *MaskGlobal;
270   uint8_t *MaskPtr = nullptr;
271 };
272 
273 /// A POD-like structure that we use to store a global reference together with
274 /// its metadata types. In this pass we frequently need to query the set of
275 /// metadata types referenced by a global, which at the IR level is an expensive
276 /// operation involving a map lookup; this data structure helps to reduce the
277 /// number of times we need to do this lookup.
278 class GlobalTypeMember final : TrailingObjects<GlobalTypeMember, MDNode *> {
279   friend TrailingObjects;
280 
281   GlobalObject *GO;
282   size_t NTypes;
283 
284   // For functions: true if the jump table is canonical. This essentially means
285   // whether the canonical address (i.e. the symbol table entry) of the function
286   // is provided by the local jump table. This is normally the same as whether
287   // the function is defined locally, but if canonical jump tables are disabled
288   // by the user then the jump table never provides a canonical definition.
289   bool IsJumpTableCanonical;
290 
291   // For functions: true if this function is either defined or used in a thinlto
292   // module and its jumptable entry needs to be exported to thinlto backends.
293   bool IsExported;
294 
295 public:
create(BumpPtrAllocator & Alloc,GlobalObject * GO,bool IsJumpTableCanonical,bool IsExported,ArrayRef<MDNode * > Types)296   static GlobalTypeMember *create(BumpPtrAllocator &Alloc, GlobalObject *GO,
297                                   bool IsJumpTableCanonical, bool IsExported,
298                                   ArrayRef<MDNode *> Types) {
299     auto *GTM = static_cast<GlobalTypeMember *>(Alloc.Allocate(
300         totalSizeToAlloc<MDNode *>(Types.size()), alignof(GlobalTypeMember)));
301     GTM->GO = GO;
302     GTM->NTypes = Types.size();
303     GTM->IsJumpTableCanonical = IsJumpTableCanonical;
304     GTM->IsExported = IsExported;
305     llvm::copy(Types, GTM->getTrailingObjects());
306     return GTM;
307   }
308 
getGlobal() const309   GlobalObject *getGlobal() const {
310     return GO;
311   }
312 
isJumpTableCanonical() const313   bool isJumpTableCanonical() const {
314     return IsJumpTableCanonical;
315   }
316 
isExported() const317   bool isExported() const {
318     return IsExported;
319   }
320 
types() const321   ArrayRef<MDNode *> types() const { return getTrailingObjects(NTypes); }
322 };
323 
324 struct ICallBranchFunnel final
325     : TrailingObjects<ICallBranchFunnel, GlobalTypeMember *> {
create__anonc00fd2a30111::ICallBranchFunnel326   static ICallBranchFunnel *create(BumpPtrAllocator &Alloc, CallInst *CI,
327                                    ArrayRef<GlobalTypeMember *> Targets,
328                                    unsigned UniqueId) {
329     auto *Call = static_cast<ICallBranchFunnel *>(
330         Alloc.Allocate(totalSizeToAlloc<GlobalTypeMember *>(Targets.size()),
331                        alignof(ICallBranchFunnel)));
332     Call->CI = CI;
333     Call->UniqueId = UniqueId;
334     Call->NTargets = Targets.size();
335     llvm::copy(Targets, Call->getTrailingObjects());
336     return Call;
337   }
338 
339   CallInst *CI;
targets__anonc00fd2a30111::ICallBranchFunnel340   ArrayRef<GlobalTypeMember *> targets() const {
341     return getTrailingObjects(NTargets);
342   }
343 
344   unsigned UniqueId;
345 
346 private:
347   size_t NTargets;
348 };
349 
350 struct ScopedSaveAliaseesAndUsed {
351   Module &M;
352   SmallVector<GlobalValue *, 4> Used, CompilerUsed;
353   std::vector<std::pair<GlobalAlias *, Function *>> FunctionAliases;
354   std::vector<std::pair<GlobalIFunc *, Function *>> ResolverIFuncs;
355 
356   // This function only removes functions from llvm.used and llvm.compiler.used.
357   // We cannot remove global variables because they need to follow RAUW, as
358   // they may be deleted by buildBitSetsFromGlobalVariables.
collectAndEraseUsedFunctions__anonc00fd2a30111::ScopedSaveAliaseesAndUsed359   void collectAndEraseUsedFunctions(Module &M,
360                                     SmallVectorImpl<GlobalValue *> &Vec,
361                                     bool CompilerUsed) {
362     auto *GV = collectUsedGlobalVariables(M, Vec, CompilerUsed);
363     if (!GV)
364       return;
365     // There's no API to only remove certain array elements from
366     // llvm.used/llvm.compiler.used, so we remove all of them and add back only
367     // the non-functions.
368     GV->eraseFromParent();
369     auto NonFuncBegin =
370         std::stable_partition(Vec.begin(), Vec.end(), [](GlobalValue *GV) {
371           return isa<Function>(GV);
372         });
373     if (CompilerUsed)
374       appendToCompilerUsed(M, {NonFuncBegin, Vec.end()});
375     else
376       appendToUsed(M, {NonFuncBegin, Vec.end()});
377     Vec.resize(NonFuncBegin - Vec.begin());
378   }
379 
ScopedSaveAliaseesAndUsed__anonc00fd2a30111::ScopedSaveAliaseesAndUsed380   ScopedSaveAliaseesAndUsed(Module &M) : M(M) {
381     // The users of this class want to replace all function references except
382     // for aliases and llvm.used/llvm.compiler.used with references to a jump
383     // table. We avoid replacing aliases in order to avoid introducing a double
384     // indirection (or an alias pointing to a declaration in ThinLTO mode), and
385     // we avoid replacing llvm.used/llvm.compiler.used because these global
386     // variables describe properties of the global, not the jump table (besides,
387     // offseted references to the jump table in llvm.used are invalid).
388     // Unfortunately, LLVM doesn't have a "RAUW except for these (possibly
389     // indirect) users", so what we do is save the list of globals referenced by
390     // llvm.used/llvm.compiler.used and aliases, erase the used lists, let RAUW
391     // replace the aliasees and then set them back to their original values at
392     // the end.
393     collectAndEraseUsedFunctions(M, Used, false);
394     collectAndEraseUsedFunctions(M, CompilerUsed, true);
395 
396     for (auto &GA : M.aliases()) {
397       // FIXME: This should look past all aliases not just interposable ones,
398       // see discussion on D65118.
399       if (auto *F = dyn_cast<Function>(GA.getAliasee()->stripPointerCasts()))
400         FunctionAliases.push_back({&GA, F});
401     }
402 
403     for (auto &GI : M.ifuncs())
404       if (auto *F = dyn_cast<Function>(GI.getResolver()->stripPointerCasts()))
405         ResolverIFuncs.push_back({&GI, F});
406   }
407 
~ScopedSaveAliaseesAndUsed__anonc00fd2a30111::ScopedSaveAliaseesAndUsed408   ~ScopedSaveAliaseesAndUsed() {
409     appendToUsed(M, Used);
410     appendToCompilerUsed(M, CompilerUsed);
411 
412     for (auto P : FunctionAliases)
413       P.first->setAliasee(P.second);
414 
415     for (auto P : ResolverIFuncs) {
416       // This does not preserve pointer casts that may have been stripped by the
417       // constructor, but the resolver's type is different from that of the
418       // ifunc anyway.
419       P.first->setResolver(P.second);
420     }
421   }
422 };
423 
424 class LowerTypeTestsModule {
425   Module &M;
426 
427   ModuleSummaryIndex *ExportSummary;
428   const ModuleSummaryIndex *ImportSummary;
429   // Set when the client has invoked this to simply drop all type test assume
430   // sequences.
431   DropTestKind DropTypeTests;
432 
433   Triple::ArchType Arch;
434   Triple::OSType OS;
435   Triple::ObjectFormatType ObjectFormat;
436 
437   // Determines which kind of Thumb jump table we generate. If arch is
438   // either 'arm' or 'thumb' we need to find this out, because
439   // selectJumpTableArmEncoding may decide to use Thumb in either case.
440   bool CanUseArmJumpTable = false, CanUseThumbBWJumpTable = false;
441 
442   // Cache variable used by hasBranchTargetEnforcement().
443   int HasBranchTargetEnforcement = -1;
444 
445   IntegerType *Int1Ty = Type::getInt1Ty(M.getContext());
446   IntegerType *Int8Ty = Type::getInt8Ty(M.getContext());
447   PointerType *PtrTy = PointerType::getUnqual(M.getContext());
448   ArrayType *Int8Arr0Ty = ArrayType::get(Type::getInt8Ty(M.getContext()), 0);
449   IntegerType *Int32Ty = Type::getInt32Ty(M.getContext());
450   IntegerType *Int64Ty = Type::getInt64Ty(M.getContext());
451   IntegerType *IntPtrTy = M.getDataLayout().getIntPtrType(M.getContext(), 0);
452 
453   // Indirect function call index assignment counter for WebAssembly
454   uint64_t IndirectIndex = 1;
455 
456   // Mapping from type identifiers to the call sites that test them, as well as
457   // whether the type identifier needs to be exported to ThinLTO backends as
458   // part of the regular LTO phase of the ThinLTO pipeline (see exportTypeId).
459   struct TypeIdUserInfo {
460     std::vector<CallInst *> CallSites;
461     bool IsExported = false;
462   };
463   DenseMap<Metadata *, TypeIdUserInfo> TypeIdUsers;
464 
465   /// This structure describes how to lower type tests for a particular type
466   /// identifier. It is either built directly from the global analysis (during
467   /// regular LTO or the regular LTO phase of ThinLTO), or indirectly using type
468   /// identifier summaries and external symbol references (in ThinLTO backends).
469   struct TypeIdLowering {
470     TypeTestResolution::Kind TheKind = TypeTestResolution::Unsat;
471 
472     /// All except Unsat: the address of the last element within the combined
473     /// global.
474     Constant *OffsetedGlobal;
475 
476     /// ByteArray, Inline, AllOnes: log2 of the required global alignment
477     /// relative to the start address.
478     Constant *AlignLog2;
479 
480     /// ByteArray, Inline, AllOnes: one less than the size of the memory region
481     /// covering members of this type identifier as a multiple of 2^AlignLog2.
482     Constant *SizeM1;
483 
484     /// ByteArray: the byte array to test the address against.
485     Constant *TheByteArray;
486 
487     /// ByteArray: the bit mask to apply to bytes loaded from the byte array.
488     Constant *BitMask;
489 
490     /// Inline: the bit mask to test the address against.
491     Constant *InlineBits;
492   };
493 
494   std::vector<ByteArrayInfo> ByteArrayInfos;
495 
496   Function *WeakInitializerFn = nullptr;
497 
498   GlobalVariable *GlobalAnnotation;
499   DenseSet<Value *> FunctionAnnotations;
500 
501   bool shouldExportConstantsAsAbsoluteSymbols();
502   uint8_t *exportTypeId(StringRef TypeId, const TypeIdLowering &TIL);
503   TypeIdLowering importTypeId(StringRef TypeId);
504   void importTypeTest(CallInst *CI);
505   void importFunction(Function *F, bool isJumpTableCanonical,
506                       std::vector<GlobalAlias *> &AliasesToErase);
507 
508   BitSetInfo
509   buildBitSet(Metadata *TypeId,
510               const DenseMap<GlobalTypeMember *, uint64_t> &GlobalLayout);
511   ByteArrayInfo *createByteArray(BitSetInfo &BSI);
512   void allocateByteArrays();
513   Value *createBitSetTest(IRBuilder<> &B, const TypeIdLowering &TIL,
514                           Value *BitOffset);
515   void lowerTypeTestCalls(
516       ArrayRef<Metadata *> TypeIds, Constant *CombinedGlobalAddr,
517       const DenseMap<GlobalTypeMember *, uint64_t> &GlobalLayout);
518   Value *lowerTypeTestCall(Metadata *TypeId, CallInst *CI,
519                            const TypeIdLowering &TIL);
520 
521   void buildBitSetsFromGlobalVariables(ArrayRef<Metadata *> TypeIds,
522                                        ArrayRef<GlobalTypeMember *> Globals);
523   Triple::ArchType
524   selectJumpTableArmEncoding(ArrayRef<GlobalTypeMember *> Functions);
525   bool hasBranchTargetEnforcement();
526   unsigned getJumpTableEntrySize(Triple::ArchType JumpTableArch);
527   InlineAsm *createJumpTableEntryAsm(Triple::ArchType JumpTableArch);
528   void verifyTypeMDNode(GlobalObject *GO, MDNode *Type);
529   void buildBitSetsFromFunctions(ArrayRef<Metadata *> TypeIds,
530                                  ArrayRef<GlobalTypeMember *> Functions);
531   void buildBitSetsFromFunctionsNative(ArrayRef<Metadata *> TypeIds,
532                                        ArrayRef<GlobalTypeMember *> Functions);
533   void buildBitSetsFromFunctionsWASM(ArrayRef<Metadata *> TypeIds,
534                                      ArrayRef<GlobalTypeMember *> Functions);
535   void
536   buildBitSetsFromDisjointSet(ArrayRef<Metadata *> TypeIds,
537                               ArrayRef<GlobalTypeMember *> Globals,
538                               ArrayRef<ICallBranchFunnel *> ICallBranchFunnels);
539 
540   void replaceWeakDeclarationWithJumpTablePtr(Function *F, Constant *JT,
541                                               bool IsJumpTableCanonical);
542   void moveInitializerToModuleConstructor(GlobalVariable *GV);
543   void findGlobalVariableUsersOf(Constant *C,
544                                  SmallSetVector<GlobalVariable *, 8> &Out);
545 
546   void createJumpTable(Function *F, ArrayRef<GlobalTypeMember *> Functions,
547                        Triple::ArchType JumpTableArch);
548 
549   /// replaceCfiUses - Go through the uses list for this definition
550   /// and make each use point to "V" instead of "this" when the use is outside
551   /// the block. 'This's use list is expected to have at least one element.
552   /// Unlike replaceAllUsesWith this function skips blockaddr and direct call
553   /// uses.
554   void replaceCfiUses(Function *Old, Value *New, bool IsJumpTableCanonical);
555 
556   /// replaceDirectCalls - Go through the uses list for this definition and
557   /// replace each use, which is a direct function call.
558   void replaceDirectCalls(Value *Old, Value *New);
559 
isFunctionAnnotation(Value * V) const560   bool isFunctionAnnotation(Value *V) const {
561     return FunctionAnnotations.contains(V);
562   }
563 
564   void maybeReplaceComdat(Function *F, StringRef OriginalName);
565 
566 public:
567   LowerTypeTestsModule(Module &M, ModuleAnalysisManager &AM,
568                        ModuleSummaryIndex *ExportSummary,
569                        const ModuleSummaryIndex *ImportSummary,
570                        DropTestKind DropTypeTests);
571 
572   bool lower();
573 
574   // Lower the module using the action and summary passed as command line
575   // arguments. For testing purposes only.
576   static bool runForTesting(Module &M, ModuleAnalysisManager &AM);
577 };
578 } // end anonymous namespace
579 
580 /// Build a bit set for TypeId using the object layouts in
581 /// GlobalLayout.
buildBitSet(Metadata * TypeId,const DenseMap<GlobalTypeMember *,uint64_t> & GlobalLayout)582 BitSetInfo LowerTypeTestsModule::buildBitSet(
583     Metadata *TypeId,
584     const DenseMap<GlobalTypeMember *, uint64_t> &GlobalLayout) {
585   BitSetBuilder BSB;
586 
587   // Compute the byte offset of each address associated with this type
588   // identifier.
589   for (const auto &GlobalAndOffset : GlobalLayout) {
590     for (MDNode *Type : GlobalAndOffset.first->types()) {
591       if (Type->getOperand(1) != TypeId)
592         continue;
593       uint64_t Offset =
594           cast<ConstantInt>(
595               cast<ConstantAsMetadata>(Type->getOperand(0))->getValue())
596               ->getZExtValue();
597       BSB.addOffset(GlobalAndOffset.second + Offset);
598     }
599   }
600 
601   return BSB.build();
602 }
603 
604 /// Build a test that bit BitOffset mod sizeof(Bits)*8 is set in
605 /// Bits. This pattern matches to the bt instruction on x86.
createMaskedBitTest(IRBuilder<> & B,Value * Bits,Value * BitOffset)606 static Value *createMaskedBitTest(IRBuilder<> &B, Value *Bits,
607                                   Value *BitOffset) {
608   auto BitsType = cast<IntegerType>(Bits->getType());
609   unsigned BitWidth = BitsType->getBitWidth();
610 
611   BitOffset = B.CreateZExtOrTrunc(BitOffset, BitsType);
612   Value *BitIndex =
613       B.CreateAnd(BitOffset, ConstantInt::get(BitsType, BitWidth - 1));
614   Value *BitMask = B.CreateShl(ConstantInt::get(BitsType, 1), BitIndex);
615   Value *MaskedBits = B.CreateAnd(Bits, BitMask);
616   return B.CreateICmpNE(MaskedBits, ConstantInt::get(BitsType, 0));
617 }
618 
createByteArray(BitSetInfo & BSI)619 ByteArrayInfo *LowerTypeTestsModule::createByteArray(BitSetInfo &BSI) {
620   // Create globals to stand in for byte arrays and masks. These never actually
621   // get initialized, we RAUW and erase them later in allocateByteArrays() once
622   // we know the offset and mask to use.
623   auto ByteArrayGlobal = new GlobalVariable(
624       M, Int8Ty, /*isConstant=*/true, GlobalValue::PrivateLinkage, nullptr);
625   auto MaskGlobal = new GlobalVariable(M, Int8Ty, /*isConstant=*/true,
626                                        GlobalValue::PrivateLinkage, nullptr);
627 
628   ByteArrayInfos.emplace_back();
629   ByteArrayInfo *BAI = &ByteArrayInfos.back();
630 
631   BAI->Bits = BSI.Bits;
632   BAI->BitSize = BSI.BitSize;
633   BAI->ByteArray = ByteArrayGlobal;
634   BAI->MaskGlobal = MaskGlobal;
635   return BAI;
636 }
637 
allocateByteArrays()638 void LowerTypeTestsModule::allocateByteArrays() {
639   llvm::stable_sort(ByteArrayInfos,
640                     [](const ByteArrayInfo &BAI1, const ByteArrayInfo &BAI2) {
641                       return BAI1.BitSize > BAI2.BitSize;
642                     });
643 
644   std::vector<uint64_t> ByteArrayOffsets(ByteArrayInfos.size());
645 
646   ByteArrayBuilder BAB;
647   for (unsigned I = 0; I != ByteArrayInfos.size(); ++I) {
648     ByteArrayInfo *BAI = &ByteArrayInfos[I];
649 
650     uint8_t Mask;
651     BAB.allocate(BAI->Bits, BAI->BitSize, ByteArrayOffsets[I], Mask);
652 
653     BAI->MaskGlobal->replaceAllUsesWith(
654         ConstantExpr::getIntToPtr(ConstantInt::get(Int8Ty, Mask), PtrTy));
655     BAI->MaskGlobal->eraseFromParent();
656     if (BAI->MaskPtr)
657       *BAI->MaskPtr = Mask;
658   }
659 
660   Constant *ByteArrayConst = ConstantDataArray::get(M.getContext(), BAB.Bytes);
661   auto ByteArray =
662       new GlobalVariable(M, ByteArrayConst->getType(), /*isConstant=*/true,
663                          GlobalValue::PrivateLinkage, ByteArrayConst);
664 
665   for (unsigned I = 0; I != ByteArrayInfos.size(); ++I) {
666     ByteArrayInfo *BAI = &ByteArrayInfos[I];
667 
668     Constant *Idxs[] = {ConstantInt::get(IntPtrTy, 0),
669                         ConstantInt::get(IntPtrTy, ByteArrayOffsets[I])};
670     Constant *GEP = ConstantExpr::getInBoundsGetElementPtr(
671         ByteArrayConst->getType(), ByteArray, Idxs);
672 
673     // Create an alias instead of RAUW'ing the gep directly. On x86 this ensures
674     // that the pc-relative displacement is folded into the lea instead of the
675     // test instruction getting another displacement.
676     GlobalAlias *Alias = GlobalAlias::create(
677         Int8Ty, 0, GlobalValue::PrivateLinkage, "bits", GEP, &M);
678     BAI->ByteArray->replaceAllUsesWith(Alias);
679     BAI->ByteArray->eraseFromParent();
680   }
681 
682   ByteArraySizeBits = BAB.BitAllocs[0] + BAB.BitAllocs[1] + BAB.BitAllocs[2] +
683                       BAB.BitAllocs[3] + BAB.BitAllocs[4] + BAB.BitAllocs[5] +
684                       BAB.BitAllocs[6] + BAB.BitAllocs[7];
685   ByteArraySizeBytes = BAB.Bytes.size();
686 }
687 
688 /// Build a test that bit BitOffset is set in the type identifier that was
689 /// lowered to TIL, which must be either an Inline or a ByteArray.
createBitSetTest(IRBuilder<> & B,const TypeIdLowering & TIL,Value * BitOffset)690 Value *LowerTypeTestsModule::createBitSetTest(IRBuilder<> &B,
691                                               const TypeIdLowering &TIL,
692                                               Value *BitOffset) {
693   if (TIL.TheKind == TypeTestResolution::Inline) {
694     // If the bit set is sufficiently small, we can avoid a load by bit testing
695     // a constant.
696     return createMaskedBitTest(B, TIL.InlineBits, BitOffset);
697   } else {
698     Constant *ByteArray = TIL.TheByteArray;
699     if (AvoidReuse && !ImportSummary) {
700       // Each use of the byte array uses a different alias. This makes the
701       // backend less likely to reuse previously computed byte array addresses,
702       // improving the security of the CFI mechanism based on this pass.
703       // This won't work when importing because TheByteArray is external.
704       ByteArray = GlobalAlias::create(Int8Ty, 0, GlobalValue::PrivateLinkage,
705                                       "bits_use", ByteArray, &M);
706     }
707 
708     Value *ByteAddr = B.CreateGEP(Int8Ty, ByteArray, BitOffset);
709     Value *Byte = B.CreateLoad(Int8Ty, ByteAddr);
710 
711     Value *ByteAndMask =
712         B.CreateAnd(Byte, ConstantExpr::getPtrToInt(TIL.BitMask, Int8Ty));
713     return B.CreateICmpNE(ByteAndMask, ConstantInt::get(Int8Ty, 0));
714   }
715 }
716 
isKnownTypeIdMember(Metadata * TypeId,const DataLayout & DL,Value * V,uint64_t COffset)717 static bool isKnownTypeIdMember(Metadata *TypeId, const DataLayout &DL,
718                                 Value *V, uint64_t COffset) {
719   if (auto GV = dyn_cast<GlobalObject>(V)) {
720     SmallVector<MDNode *, 2> Types;
721     GV->getMetadata(LLVMContext::MD_type, Types);
722     for (MDNode *Type : Types) {
723       if (Type->getOperand(1) != TypeId)
724         continue;
725       uint64_t Offset =
726           cast<ConstantInt>(
727               cast<ConstantAsMetadata>(Type->getOperand(0))->getValue())
728               ->getZExtValue();
729       if (COffset == Offset)
730         return true;
731     }
732     return false;
733   }
734 
735   if (auto GEP = dyn_cast<GEPOperator>(V)) {
736     APInt APOffset(DL.getIndexSizeInBits(0), 0);
737     bool Result = GEP->accumulateConstantOffset(DL, APOffset);
738     if (!Result)
739       return false;
740     COffset += APOffset.getZExtValue();
741     return isKnownTypeIdMember(TypeId, DL, GEP->getPointerOperand(), COffset);
742   }
743 
744   if (auto Op = dyn_cast<Operator>(V)) {
745     if (Op->getOpcode() == Instruction::BitCast)
746       return isKnownTypeIdMember(TypeId, DL, Op->getOperand(0), COffset);
747 
748     if (Op->getOpcode() == Instruction::Select)
749       return isKnownTypeIdMember(TypeId, DL, Op->getOperand(1), COffset) &&
750              isKnownTypeIdMember(TypeId, DL, Op->getOperand(2), COffset);
751   }
752 
753   return false;
754 }
755 
756 /// Lower a llvm.type.test call to its implementation. Returns the value to
757 /// replace the call with.
lowerTypeTestCall(Metadata * TypeId,CallInst * CI,const TypeIdLowering & TIL)758 Value *LowerTypeTestsModule::lowerTypeTestCall(Metadata *TypeId, CallInst *CI,
759                                                const TypeIdLowering &TIL) {
760   // Delay lowering if the resolution is currently unknown.
761   if (TIL.TheKind == TypeTestResolution::Unknown)
762     return nullptr;
763   if (TIL.TheKind == TypeTestResolution::Unsat)
764     return ConstantInt::getFalse(M.getContext());
765 
766   Value *Ptr = CI->getArgOperand(0);
767   const DataLayout &DL = M.getDataLayout();
768   if (isKnownTypeIdMember(TypeId, DL, Ptr, 0))
769     return ConstantInt::getTrue(M.getContext());
770 
771   BasicBlock *InitialBB = CI->getParent();
772 
773   IRBuilder<> B(CI);
774 
775   Value *PtrAsInt = B.CreatePtrToInt(Ptr, IntPtrTy);
776 
777   Constant *OffsetedGlobalAsInt =
778       ConstantExpr::getPtrToInt(TIL.OffsetedGlobal, IntPtrTy);
779   if (TIL.TheKind == TypeTestResolution::Single)
780     return B.CreateICmpEQ(PtrAsInt, OffsetedGlobalAsInt);
781 
782   // Here we compute `last element - address`. The reason why we do this instead
783   // of computing `address - first element` is that it leads to a slightly
784   // shorter instruction sequence on x86. Because it doesn't matter how we do
785   // the subtraction on other architectures, we do so unconditionally.
786   Value *PtrOffset = B.CreateSub(OffsetedGlobalAsInt, PtrAsInt);
787 
788   // We need to check that the offset both falls within our range and is
789   // suitably aligned. We can check both properties at the same time by
790   // performing a right rotate by log2(alignment) followed by an integer
791   // comparison against the bitset size. The rotate will move the lower
792   // order bits that need to be zero into the higher order bits of the
793   // result, causing the comparison to fail if they are nonzero. The rotate
794   // also conveniently gives us a bit offset to use during the load from
795   // the bitset.
796   Value *BitOffset = B.CreateIntrinsic(IntPtrTy, Intrinsic::fshr,
797                                        {PtrOffset, PtrOffset, TIL.AlignLog2});
798 
799   Value *OffsetInRange = B.CreateICmpULE(BitOffset, TIL.SizeM1);
800 
801   // If the bit set is all ones, testing against it is unnecessary.
802   if (TIL.TheKind == TypeTestResolution::AllOnes)
803     return OffsetInRange;
804 
805   // See if the intrinsic is used in the following common pattern:
806   //   br(llvm.type.test(...), thenbb, elsebb)
807   // where nothing happens between the type test and the br.
808   // If so, create slightly simpler IR.
809   if (CI->hasOneUse())
810     if (auto *Br = dyn_cast<BranchInst>(*CI->user_begin()))
811       if (CI->getNextNode() == Br) {
812         BasicBlock *Then = InitialBB->splitBasicBlock(CI->getIterator());
813         BasicBlock *Else = Br->getSuccessor(1);
814         BranchInst *NewBr = BranchInst::Create(Then, Else, OffsetInRange);
815         NewBr->setMetadata(LLVMContext::MD_prof,
816                            Br->getMetadata(LLVMContext::MD_prof));
817         ReplaceInstWithInst(InitialBB->getTerminator(), NewBr);
818 
819         // Update phis in Else resulting from InitialBB being split
820         for (auto &Phi : Else->phis())
821           Phi.addIncoming(Phi.getIncomingValueForBlock(Then), InitialBB);
822 
823         IRBuilder<> ThenB(CI);
824         return createBitSetTest(ThenB, TIL, BitOffset);
825       }
826 
827   IRBuilder<> ThenB(SplitBlockAndInsertIfThen(OffsetInRange, CI, false));
828 
829   // Now that we know that the offset is in range and aligned, load the
830   // appropriate bit from the bitset.
831   Value *Bit = createBitSetTest(ThenB, TIL, BitOffset);
832 
833   // The value we want is 0 if we came directly from the initial block
834   // (having failed the range or alignment checks), or the loaded bit if
835   // we came from the block in which we loaded it.
836   B.SetInsertPoint(CI);
837   PHINode *P = B.CreatePHI(Int1Ty, 2);
838   P->addIncoming(ConstantInt::get(Int1Ty, 0), InitialBB);
839   P->addIncoming(Bit, ThenB.GetInsertBlock());
840   return P;
841 }
842 
843 /// Given a disjoint set of type identifiers and globals, lay out the globals,
844 /// build the bit sets and lower the llvm.type.test calls.
buildBitSetsFromGlobalVariables(ArrayRef<Metadata * > TypeIds,ArrayRef<GlobalTypeMember * > Globals)845 void LowerTypeTestsModule::buildBitSetsFromGlobalVariables(
846     ArrayRef<Metadata *> TypeIds, ArrayRef<GlobalTypeMember *> Globals) {
847   // Build a new global with the combined contents of the referenced globals.
848   // This global is a struct whose even-indexed elements contain the original
849   // contents of the referenced globals and whose odd-indexed elements contain
850   // any padding required to align the next element to the next power of 2 plus
851   // any additional padding required to meet its alignment requirements.
852   std::vector<Constant *> GlobalInits;
853   const DataLayout &DL = M.getDataLayout();
854   DenseMap<GlobalTypeMember *, uint64_t> GlobalLayout;
855   Align MaxAlign;
856   uint64_t CurOffset = 0;
857   uint64_t DesiredPadding = 0;
858   for (GlobalTypeMember *G : Globals) {
859     auto *GV = cast<GlobalVariable>(G->getGlobal());
860     Align Alignment =
861         DL.getValueOrABITypeAlignment(GV->getAlign(), GV->getValueType());
862     MaxAlign = std::max(MaxAlign, Alignment);
863     uint64_t GVOffset = alignTo(CurOffset + DesiredPadding, Alignment);
864     GlobalLayout[G] = GVOffset;
865     if (GVOffset != 0) {
866       uint64_t Padding = GVOffset - CurOffset;
867       GlobalInits.push_back(
868           ConstantAggregateZero::get(ArrayType::get(Int8Ty, Padding)));
869     }
870 
871     GlobalInits.push_back(GV->getInitializer());
872     uint64_t InitSize = DL.getTypeAllocSize(GV->getValueType());
873     CurOffset = GVOffset + InitSize;
874 
875     // Compute the amount of padding that we'd like for the next element.
876     DesiredPadding = NextPowerOf2(InitSize - 1) - InitSize;
877 
878     // Experiments of different caps with Chromium on both x64 and ARM64
879     // have shown that the 32-byte cap generates the smallest binary on
880     // both platforms while different caps yield similar performance.
881     // (see https://lists.llvm.org/pipermail/llvm-dev/2018-July/124694.html)
882     if (DesiredPadding > 32)
883       DesiredPadding = alignTo(InitSize, 32) - InitSize;
884   }
885 
886   Constant *NewInit = ConstantStruct::getAnon(M.getContext(), GlobalInits);
887   auto *CombinedGlobal =
888       new GlobalVariable(M, NewInit->getType(), /*isConstant=*/true,
889                          GlobalValue::PrivateLinkage, NewInit);
890   CombinedGlobal->setAlignment(MaxAlign);
891 
892   StructType *NewTy = cast<StructType>(NewInit->getType());
893   lowerTypeTestCalls(TypeIds, CombinedGlobal, GlobalLayout);
894 
895   // Build aliases pointing to offsets into the combined global for each
896   // global from which we built the combined global, and replace references
897   // to the original globals with references to the aliases.
898   for (unsigned I = 0; I != Globals.size(); ++I) {
899     GlobalVariable *GV = cast<GlobalVariable>(Globals[I]->getGlobal());
900 
901     // Multiply by 2 to account for padding elements.
902     Constant *CombinedGlobalIdxs[] = {ConstantInt::get(Int32Ty, 0),
903                                       ConstantInt::get(Int32Ty, I * 2)};
904     Constant *CombinedGlobalElemPtr = ConstantExpr::getInBoundsGetElementPtr(
905         NewInit->getType(), CombinedGlobal, CombinedGlobalIdxs);
906     assert(GV->getType()->getAddressSpace() == 0);
907     GlobalAlias *GAlias =
908         GlobalAlias::create(NewTy->getElementType(I * 2), 0, GV->getLinkage(),
909                             "", CombinedGlobalElemPtr, &M);
910     GAlias->setVisibility(GV->getVisibility());
911     GAlias->takeName(GV);
912     GV->replaceAllUsesWith(GAlias);
913     GV->eraseFromParent();
914   }
915 }
916 
shouldExportConstantsAsAbsoluteSymbols()917 bool LowerTypeTestsModule::shouldExportConstantsAsAbsoluteSymbols() {
918   return (Arch == Triple::x86 || Arch == Triple::x86_64) &&
919          ObjectFormat == Triple::ELF;
920 }
921 
922 /// Export the given type identifier so that ThinLTO backends may import it.
923 /// Type identifiers are exported by adding coarse-grained information about how
924 /// to test the type identifier to the summary, and creating symbols in the
925 /// object file (aliases and absolute symbols) containing fine-grained
926 /// information about the type identifier.
927 ///
928 /// Returns a pointer to the location in which to store the bitmask, if
929 /// applicable.
exportTypeId(StringRef TypeId,const TypeIdLowering & TIL)930 uint8_t *LowerTypeTestsModule::exportTypeId(StringRef TypeId,
931                                             const TypeIdLowering &TIL) {
932   TypeTestResolution &TTRes =
933       ExportSummary->getOrInsertTypeIdSummary(TypeId).TTRes;
934   TTRes.TheKind = TIL.TheKind;
935 
936   auto ExportGlobal = [&](StringRef Name, Constant *C) {
937     GlobalAlias *GA =
938         GlobalAlias::create(Int8Ty, 0, GlobalValue::ExternalLinkage,
939                             "__typeid_" + TypeId + "_" + Name, C, &M);
940     GA->setVisibility(GlobalValue::HiddenVisibility);
941   };
942 
943   auto ExportConstant = [&](StringRef Name, uint64_t &Storage, Constant *C) {
944     if (shouldExportConstantsAsAbsoluteSymbols())
945       ExportGlobal(Name, ConstantExpr::getIntToPtr(C, PtrTy));
946     else
947       Storage = cast<ConstantInt>(C)->getZExtValue();
948   };
949 
950   if (TIL.TheKind != TypeTestResolution::Unsat)
951     ExportGlobal("global_addr", TIL.OffsetedGlobal);
952 
953   if (TIL.TheKind == TypeTestResolution::ByteArray ||
954       TIL.TheKind == TypeTestResolution::Inline ||
955       TIL.TheKind == TypeTestResolution::AllOnes) {
956     ExportConstant("align", TTRes.AlignLog2, TIL.AlignLog2);
957     ExportConstant("size_m1", TTRes.SizeM1, TIL.SizeM1);
958 
959     uint64_t BitSize = cast<ConstantInt>(TIL.SizeM1)->getZExtValue() + 1;
960     if (TIL.TheKind == TypeTestResolution::Inline)
961       TTRes.SizeM1BitWidth = (BitSize <= 32) ? 5 : 6;
962     else
963       TTRes.SizeM1BitWidth = (BitSize <= 128) ? 7 : 32;
964   }
965 
966   if (TIL.TheKind == TypeTestResolution::ByteArray) {
967     ExportGlobal("byte_array", TIL.TheByteArray);
968     if (shouldExportConstantsAsAbsoluteSymbols())
969       ExportGlobal("bit_mask", TIL.BitMask);
970     else
971       return &TTRes.BitMask;
972   }
973 
974   if (TIL.TheKind == TypeTestResolution::Inline)
975     ExportConstant("inline_bits", TTRes.InlineBits, TIL.InlineBits);
976 
977   return nullptr;
978 }
979 
980 LowerTypeTestsModule::TypeIdLowering
importTypeId(StringRef TypeId)981 LowerTypeTestsModule::importTypeId(StringRef TypeId) {
982   const TypeIdSummary *TidSummary = ImportSummary->getTypeIdSummary(TypeId);
983   if (!TidSummary)
984     return {}; // Unsat: no globals match this type id.
985   const TypeTestResolution &TTRes = TidSummary->TTRes;
986 
987   TypeIdLowering TIL;
988   TIL.TheKind = TTRes.TheKind;
989 
990   auto ImportGlobal = [&](StringRef Name) {
991     // Give the global a type of length 0 so that it is not assumed not to alias
992     // with any other global.
993     GlobalVariable *GV = M.getOrInsertGlobal(
994         ("__typeid_" + TypeId + "_" + Name).str(), Int8Arr0Ty);
995     GV->setVisibility(GlobalValue::HiddenVisibility);
996     return GV;
997   };
998 
999   auto ImportConstant = [&](StringRef Name, uint64_t Const, unsigned AbsWidth,
1000                             Type *Ty) {
1001     if (!shouldExportConstantsAsAbsoluteSymbols()) {
1002       Constant *C =
1003           ConstantInt::get(isa<IntegerType>(Ty) ? Ty : Int64Ty, Const);
1004       if (!isa<IntegerType>(Ty))
1005         C = ConstantExpr::getIntToPtr(C, Ty);
1006       return C;
1007     }
1008 
1009     Constant *C = ImportGlobal(Name);
1010     auto *GV = cast<GlobalVariable>(C->stripPointerCasts());
1011     if (isa<IntegerType>(Ty))
1012       C = ConstantExpr::getPtrToInt(C, Ty);
1013     if (GV->getMetadata(LLVMContext::MD_absolute_symbol))
1014       return C;
1015 
1016     auto SetAbsRange = [&](uint64_t Min, uint64_t Max) {
1017       auto *MinC = ConstantAsMetadata::get(ConstantInt::get(IntPtrTy, Min));
1018       auto *MaxC = ConstantAsMetadata::get(ConstantInt::get(IntPtrTy, Max));
1019       GV->setMetadata(LLVMContext::MD_absolute_symbol,
1020                       MDNode::get(M.getContext(), {MinC, MaxC}));
1021     };
1022     if (AbsWidth == IntPtrTy->getBitWidth())
1023       SetAbsRange(~0ull, ~0ull); // Full set.
1024     else
1025       SetAbsRange(0, 1ull << AbsWidth);
1026     return C;
1027   };
1028 
1029   if (TIL.TheKind != TypeTestResolution::Unsat) {
1030     auto *GV = ImportGlobal("global_addr");
1031     // This is either a vtable (in .data.rel.ro) or a jump table (in .text).
1032     // Either way it's expected to be in the low 2 GiB, so set the small code
1033     // model.
1034     //
1035     // For .data.rel.ro, we currently place all such sections in the low 2 GiB
1036     // [1], and for .text the sections are expected to be in the low 2 GiB under
1037     // the small and medium code models [2] and this pass only supports those
1038     // code models (e.g. jump tables use jmp instead of movabs/jmp).
1039     //
1040     // [1]https://github.com/llvm/llvm-project/pull/137742
1041     // [2]https://maskray.me/blog/2023-05-14-relocation-overflow-and-code-models
1042     GV->setCodeModel(CodeModel::Small);
1043     TIL.OffsetedGlobal = GV;
1044   }
1045 
1046   if (TIL.TheKind == TypeTestResolution::ByteArray ||
1047       TIL.TheKind == TypeTestResolution::Inline ||
1048       TIL.TheKind == TypeTestResolution::AllOnes) {
1049     TIL.AlignLog2 = ImportConstant("align", TTRes.AlignLog2, 8, IntPtrTy);
1050     TIL.SizeM1 =
1051         ImportConstant("size_m1", TTRes.SizeM1, TTRes.SizeM1BitWidth, IntPtrTy);
1052   }
1053 
1054   if (TIL.TheKind == TypeTestResolution::ByteArray) {
1055     TIL.TheByteArray = ImportGlobal("byte_array");
1056     TIL.BitMask = ImportConstant("bit_mask", TTRes.BitMask, 8, PtrTy);
1057   }
1058 
1059   if (TIL.TheKind == TypeTestResolution::Inline)
1060     TIL.InlineBits = ImportConstant(
1061         "inline_bits", TTRes.InlineBits, 1 << TTRes.SizeM1BitWidth,
1062         TTRes.SizeM1BitWidth <= 5 ? Int32Ty : Int64Ty);
1063 
1064   return TIL;
1065 }
1066 
importTypeTest(CallInst * CI)1067 void LowerTypeTestsModule::importTypeTest(CallInst *CI) {
1068   auto TypeIdMDVal = dyn_cast<MetadataAsValue>(CI->getArgOperand(1));
1069   if (!TypeIdMDVal)
1070     report_fatal_error("Second argument of llvm.type.test must be metadata");
1071 
1072   auto TypeIdStr = dyn_cast<MDString>(TypeIdMDVal->getMetadata());
1073   // If this is a local unpromoted type, which doesn't have a metadata string,
1074   // treat as Unknown and delay lowering, so that we can still utilize it for
1075   // later optimizations.
1076   if (!TypeIdStr)
1077     return;
1078 
1079   TypeIdLowering TIL = importTypeId(TypeIdStr->getString());
1080   Value *Lowered = lowerTypeTestCall(TypeIdStr, CI, TIL);
1081   if (Lowered) {
1082     CI->replaceAllUsesWith(Lowered);
1083     CI->eraseFromParent();
1084   }
1085 }
1086 
maybeReplaceComdat(Function * F,StringRef OriginalName)1087 void LowerTypeTestsModule::maybeReplaceComdat(Function *F,
1088                                               StringRef OriginalName) {
1089   // For COFF we should also rename the comdat if this function also
1090   // happens to be the key function. Even if the comdat name changes, this
1091   // should still be fine since comdat and symbol resolution happens
1092   // before LTO, so all symbols which would prevail have been selected.
1093   if (F->hasComdat() && ObjectFormat == Triple::COFF &&
1094       F->getComdat()->getName() == OriginalName) {
1095     Comdat *OldComdat = F->getComdat();
1096     Comdat *NewComdat = M.getOrInsertComdat(F->getName());
1097     for (GlobalObject &GO : M.global_objects()) {
1098       if (GO.getComdat() == OldComdat)
1099         GO.setComdat(NewComdat);
1100     }
1101   }
1102 }
1103 
1104 // ThinLTO backend: the function F has a jump table entry; update this module
1105 // accordingly. isJumpTableCanonical describes the type of the jump table entry.
importFunction(Function * F,bool isJumpTableCanonical,std::vector<GlobalAlias * > & AliasesToErase)1106 void LowerTypeTestsModule::importFunction(
1107     Function *F, bool isJumpTableCanonical,
1108     std::vector<GlobalAlias *> &AliasesToErase) {
1109   assert(F->getType()->getAddressSpace() == 0);
1110 
1111   GlobalValue::VisibilityTypes Visibility = F->getVisibility();
1112   std::string Name = std::string(F->getName());
1113 
1114   if (F->isDeclarationForLinker() && isJumpTableCanonical) {
1115     // Non-dso_local functions may be overriden at run time,
1116     // don't short curcuit them
1117     if (F->isDSOLocal()) {
1118       Function *RealF = Function::Create(F->getFunctionType(),
1119                                          GlobalValue::ExternalLinkage,
1120                                          F->getAddressSpace(),
1121                                          Name + ".cfi", &M);
1122       RealF->setVisibility(GlobalVariable::HiddenVisibility);
1123       replaceDirectCalls(F, RealF);
1124     }
1125     return;
1126   }
1127 
1128   Function *FDecl;
1129   if (!isJumpTableCanonical) {
1130     // Either a declaration of an external function or a reference to a locally
1131     // defined jump table.
1132     FDecl = Function::Create(F->getFunctionType(), GlobalValue::ExternalLinkage,
1133                              F->getAddressSpace(), Name + ".cfi_jt", &M);
1134     FDecl->setVisibility(GlobalValue::HiddenVisibility);
1135   } else {
1136     F->setName(Name + ".cfi");
1137     maybeReplaceComdat(F, Name);
1138     F->setLinkage(GlobalValue::ExternalLinkage);
1139     FDecl = Function::Create(F->getFunctionType(), GlobalValue::ExternalLinkage,
1140                              F->getAddressSpace(), Name, &M);
1141     FDecl->setVisibility(Visibility);
1142     Visibility = GlobalValue::HiddenVisibility;
1143 
1144     // Delete aliases pointing to this function, they'll be re-created in the
1145     // merged output. Don't do it yet though because ScopedSaveAliaseesAndUsed
1146     // will want to reset the aliasees first.
1147     for (auto &U : F->uses()) {
1148       if (auto *A = dyn_cast<GlobalAlias>(U.getUser())) {
1149         Function *AliasDecl = Function::Create(
1150             F->getFunctionType(), GlobalValue::ExternalLinkage,
1151             F->getAddressSpace(), "", &M);
1152         AliasDecl->takeName(A);
1153         A->replaceAllUsesWith(AliasDecl);
1154         AliasesToErase.push_back(A);
1155       }
1156     }
1157   }
1158 
1159   if (F->hasExternalWeakLinkage())
1160     replaceWeakDeclarationWithJumpTablePtr(F, FDecl, isJumpTableCanonical);
1161   else
1162     replaceCfiUses(F, FDecl, isJumpTableCanonical);
1163 
1164   // Set visibility late because it's used in replaceCfiUses() to determine
1165   // whether uses need to be replaced.
1166   F->setVisibility(Visibility);
1167 }
1168 
lowerTypeTestCalls(ArrayRef<Metadata * > TypeIds,Constant * CombinedGlobalAddr,const DenseMap<GlobalTypeMember *,uint64_t> & GlobalLayout)1169 void LowerTypeTestsModule::lowerTypeTestCalls(
1170     ArrayRef<Metadata *> TypeIds, Constant *CombinedGlobalAddr,
1171     const DenseMap<GlobalTypeMember *, uint64_t> &GlobalLayout) {
1172   // For each type identifier in this disjoint set...
1173   for (Metadata *TypeId : TypeIds) {
1174     // Build the bitset.
1175     BitSetInfo BSI = buildBitSet(TypeId, GlobalLayout);
1176     LLVM_DEBUG({
1177       if (auto MDS = dyn_cast<MDString>(TypeId))
1178         dbgs() << MDS->getString() << ": ";
1179       else
1180         dbgs() << "<unnamed>: ";
1181       BSI.print(dbgs());
1182     });
1183 
1184     ByteArrayInfo *BAI = nullptr;
1185     TypeIdLowering TIL;
1186 
1187     uint64_t GlobalOffset =
1188         BSI.ByteOffset + ((BSI.BitSize - 1) << BSI.AlignLog2);
1189     TIL.OffsetedGlobal = ConstantExpr::getGetElementPtr(
1190         Int8Ty, CombinedGlobalAddr, ConstantInt::get(IntPtrTy, GlobalOffset)),
1191     TIL.AlignLog2 = ConstantInt::get(IntPtrTy, BSI.AlignLog2);
1192     TIL.SizeM1 = ConstantInt::get(IntPtrTy, BSI.BitSize - 1);
1193     if (BSI.isAllOnes()) {
1194       TIL.TheKind = (BSI.BitSize == 1) ? TypeTestResolution::Single
1195                                        : TypeTestResolution::AllOnes;
1196     } else if (BSI.BitSize <= IntPtrTy->getBitWidth()) {
1197       TIL.TheKind = TypeTestResolution::Inline;
1198       uint64_t InlineBits = 0;
1199       for (auto Bit : BSI.Bits)
1200         InlineBits |= uint64_t(1) << Bit;
1201       if (InlineBits == 0)
1202         TIL.TheKind = TypeTestResolution::Unsat;
1203       else
1204         TIL.InlineBits = ConstantInt::get(
1205             (BSI.BitSize <= 32) ? Int32Ty : Int64Ty, InlineBits);
1206     } else {
1207       TIL.TheKind = TypeTestResolution::ByteArray;
1208       ++NumByteArraysCreated;
1209       BAI = createByteArray(BSI);
1210       TIL.TheByteArray = BAI->ByteArray;
1211       TIL.BitMask = BAI->MaskGlobal;
1212     }
1213 
1214     TypeIdUserInfo &TIUI = TypeIdUsers[TypeId];
1215 
1216     if (TIUI.IsExported) {
1217       uint8_t *MaskPtr = exportTypeId(cast<MDString>(TypeId)->getString(), TIL);
1218       if (BAI)
1219         BAI->MaskPtr = MaskPtr;
1220     }
1221 
1222     // Lower each call to llvm.type.test for this type identifier.
1223     for (CallInst *CI : TIUI.CallSites) {
1224       ++NumTypeTestCallsLowered;
1225       Value *Lowered = lowerTypeTestCall(TypeId, CI, TIL);
1226       if (Lowered) {
1227         CI->replaceAllUsesWith(Lowered);
1228         CI->eraseFromParent();
1229       }
1230     }
1231   }
1232 }
1233 
verifyTypeMDNode(GlobalObject * GO,MDNode * Type)1234 void LowerTypeTestsModule::verifyTypeMDNode(GlobalObject *GO, MDNode *Type) {
1235   if (Type->getNumOperands() != 2)
1236     report_fatal_error("All operands of type metadata must have 2 elements");
1237 
1238   if (GO->isThreadLocal())
1239     report_fatal_error("Bit set element may not be thread-local");
1240   if (isa<GlobalVariable>(GO) && GO->hasSection())
1241     report_fatal_error(
1242         "A member of a type identifier may not have an explicit section");
1243 
1244   // FIXME: We previously checked that global var member of a type identifier
1245   // must be a definition, but the IR linker may leave type metadata on
1246   // declarations. We should restore this check after fixing PR31759.
1247 
1248   auto OffsetConstMD = dyn_cast<ConstantAsMetadata>(Type->getOperand(0));
1249   if (!OffsetConstMD)
1250     report_fatal_error("Type offset must be a constant");
1251   auto OffsetInt = dyn_cast<ConstantInt>(OffsetConstMD->getValue());
1252   if (!OffsetInt)
1253     report_fatal_error("Type offset must be an integer constant");
1254 }
1255 
1256 static const unsigned kX86JumpTableEntrySize = 8;
1257 static const unsigned kX86IBTJumpTableEntrySize = 16;
1258 static const unsigned kARMJumpTableEntrySize = 4;
1259 static const unsigned kARMBTIJumpTableEntrySize = 8;
1260 static const unsigned kARMv6MJumpTableEntrySize = 16;
1261 static const unsigned kRISCVJumpTableEntrySize = 8;
1262 static const unsigned kLOONGARCH64JumpTableEntrySize = 8;
1263 
hasBranchTargetEnforcement()1264 bool LowerTypeTestsModule::hasBranchTargetEnforcement() {
1265   if (HasBranchTargetEnforcement == -1) {
1266     // First time this query has been called. Find out the answer by checking
1267     // the module flags.
1268     if (const auto *BTE = mdconst::extract_or_null<ConstantInt>(
1269           M.getModuleFlag("branch-target-enforcement")))
1270       HasBranchTargetEnforcement = (BTE->getZExtValue() != 0);
1271     else
1272       HasBranchTargetEnforcement = 0;
1273   }
1274   return HasBranchTargetEnforcement;
1275 }
1276 
1277 unsigned
getJumpTableEntrySize(Triple::ArchType JumpTableArch)1278 LowerTypeTestsModule::getJumpTableEntrySize(Triple::ArchType JumpTableArch) {
1279   switch (JumpTableArch) {
1280   case Triple::x86:
1281   case Triple::x86_64:
1282     if (const auto *MD = mdconst::extract_or_null<ConstantInt>(
1283             M.getModuleFlag("cf-protection-branch")))
1284       if (MD->getZExtValue())
1285         return kX86IBTJumpTableEntrySize;
1286     return kX86JumpTableEntrySize;
1287   case Triple::arm:
1288     return kARMJumpTableEntrySize;
1289   case Triple::thumb:
1290     if (CanUseThumbBWJumpTable) {
1291       if (hasBranchTargetEnforcement())
1292         return kARMBTIJumpTableEntrySize;
1293       return kARMJumpTableEntrySize;
1294     } else {
1295       return kARMv6MJumpTableEntrySize;
1296     }
1297   case Triple::aarch64:
1298     if (hasBranchTargetEnforcement())
1299       return kARMBTIJumpTableEntrySize;
1300     return kARMJumpTableEntrySize;
1301   case Triple::riscv32:
1302   case Triple::riscv64:
1303     return kRISCVJumpTableEntrySize;
1304   case Triple::loongarch64:
1305     return kLOONGARCH64JumpTableEntrySize;
1306   default:
1307     report_fatal_error("Unsupported architecture for jump tables");
1308   }
1309 }
1310 
1311 // Create an inline asm constant representing a jump table entry for the target.
1312 // This consists of an instruction sequence containing a relative branch to
1313 // Dest.
1314 InlineAsm *
createJumpTableEntryAsm(Triple::ArchType JumpTableArch)1315 LowerTypeTestsModule::createJumpTableEntryAsm(Triple::ArchType JumpTableArch) {
1316   std::string Asm;
1317   raw_string_ostream AsmOS(Asm);
1318 
1319   if (JumpTableArch == Triple::x86 || JumpTableArch == Triple::x86_64) {
1320     bool Endbr = false;
1321     if (const auto *MD = mdconst::extract_or_null<ConstantInt>(
1322             M.getModuleFlag("cf-protection-branch")))
1323       Endbr = !MD->isZero();
1324     if (Endbr)
1325       AsmOS << (JumpTableArch == Triple::x86 ? "endbr32\n" : "endbr64\n");
1326     AsmOS << "jmp ${0:c}@plt\n";
1327     if (Endbr)
1328       AsmOS << ".balign 16, 0xcc\n";
1329     else
1330       AsmOS << "int3\nint3\nint3\n";
1331   } else if (JumpTableArch == Triple::arm) {
1332     AsmOS << "b $0\n";
1333   } else if (JumpTableArch == Triple::aarch64) {
1334     if (hasBranchTargetEnforcement())
1335       AsmOS << "bti c\n";
1336     AsmOS << "b $0\n";
1337   } else if (JumpTableArch == Triple::thumb) {
1338     if (!CanUseThumbBWJumpTable) {
1339       // In Armv6-M, this sequence will generate a branch without corrupting
1340       // any registers. We use two stack words; in the second, we construct the
1341       // address we'll pop into pc, and the first is used to save and restore
1342       // r0 which we use as a temporary register.
1343       //
1344       // To support position-independent use cases, the offset of the target
1345       // function is stored as a relative offset (which will expand into an
1346       // R_ARM_REL32 relocation in ELF, and presumably the equivalent in other
1347       // object file types), and added to pc after we load it. (The alternative
1348       // B.W is automatically pc-relative.)
1349       //
1350       // There are five 16-bit Thumb instructions here, so the .balign 4 adds a
1351       // sixth halfword of padding, and then the offset consumes a further 4
1352       // bytes, for a total of 16, which is very convenient since entries in
1353       // this jump table need to have power-of-two size.
1354       AsmOS << "push {r0,r1}\n"
1355             << "ldr r0, 1f\n"
1356             << "0: add r0, r0, pc\n"
1357             << "str r0, [sp, #4]\n"
1358             << "pop {r0,pc}\n"
1359             << ".balign 4\n"
1360             << "1: .word $0 - (0b + 4)\n";
1361     } else {
1362       if (hasBranchTargetEnforcement())
1363         AsmOS << "bti\n";
1364       AsmOS << "b.w $0\n";
1365     }
1366   } else if (JumpTableArch == Triple::riscv32 ||
1367              JumpTableArch == Triple::riscv64) {
1368     AsmOS << "tail $0@plt\n";
1369   } else if (JumpTableArch == Triple::loongarch64) {
1370     AsmOS << "pcalau12i $$t0, %pc_hi20($0)\n"
1371           << "jirl $$r0, $$t0, %pc_lo12($0)\n";
1372   } else {
1373     report_fatal_error("Unsupported architecture for jump tables");
1374   }
1375 
1376   return InlineAsm::get(
1377       FunctionType::get(Type::getVoidTy(M.getContext()), PtrTy, false),
1378       AsmOS.str(), "s",
1379       /*hasSideEffects=*/true);
1380 }
1381 
1382 /// Given a disjoint set of type identifiers and functions, build the bit sets
1383 /// and lower the llvm.type.test calls, architecture dependently.
buildBitSetsFromFunctions(ArrayRef<Metadata * > TypeIds,ArrayRef<GlobalTypeMember * > Functions)1384 void LowerTypeTestsModule::buildBitSetsFromFunctions(
1385     ArrayRef<Metadata *> TypeIds, ArrayRef<GlobalTypeMember *> Functions) {
1386   if (Arch == Triple::x86 || Arch == Triple::x86_64 || Arch == Triple::arm ||
1387       Arch == Triple::thumb || Arch == Triple::aarch64 ||
1388       Arch == Triple::riscv32 || Arch == Triple::riscv64 ||
1389       Arch == Triple::loongarch64)
1390     buildBitSetsFromFunctionsNative(TypeIds, Functions);
1391   else if (Arch == Triple::wasm32 || Arch == Triple::wasm64)
1392     buildBitSetsFromFunctionsWASM(TypeIds, Functions);
1393   else
1394     report_fatal_error("Unsupported architecture for jump tables");
1395 }
1396 
moveInitializerToModuleConstructor(GlobalVariable * GV)1397 void LowerTypeTestsModule::moveInitializerToModuleConstructor(
1398     GlobalVariable *GV) {
1399   if (WeakInitializerFn == nullptr) {
1400     WeakInitializerFn = Function::Create(
1401         FunctionType::get(Type::getVoidTy(M.getContext()),
1402                           /* IsVarArg */ false),
1403         GlobalValue::InternalLinkage,
1404         M.getDataLayout().getProgramAddressSpace(),
1405         "__cfi_global_var_init", &M);
1406     BasicBlock *BB =
1407         BasicBlock::Create(M.getContext(), "entry", WeakInitializerFn);
1408     ReturnInst::Create(M.getContext(), BB);
1409     WeakInitializerFn->setSection(
1410         ObjectFormat == Triple::MachO
1411             ? "__TEXT,__StaticInit,regular,pure_instructions"
1412             : ".text.startup");
1413     // This code is equivalent to relocation application, and should run at the
1414     // earliest possible time (i.e. with the highest priority).
1415     appendToGlobalCtors(M, WeakInitializerFn, /* Priority */ 0);
1416   }
1417 
1418   IRBuilder<> IRB(WeakInitializerFn->getEntryBlock().getTerminator());
1419   GV->setConstant(false);
1420   IRB.CreateAlignedStore(GV->getInitializer(), GV, GV->getAlign());
1421   GV->setInitializer(Constant::getNullValue(GV->getValueType()));
1422 }
1423 
findGlobalVariableUsersOf(Constant * C,SmallSetVector<GlobalVariable *,8> & Out)1424 void LowerTypeTestsModule::findGlobalVariableUsersOf(
1425     Constant *C, SmallSetVector<GlobalVariable *, 8> &Out) {
1426   for (auto *U : C->users()){
1427     if (auto *GV = dyn_cast<GlobalVariable>(U))
1428       Out.insert(GV);
1429     else if (auto *C2 = dyn_cast<Constant>(U))
1430       findGlobalVariableUsersOf(C2, Out);
1431   }
1432 }
1433 
1434 // Replace all uses of F with (F ? JT : 0).
replaceWeakDeclarationWithJumpTablePtr(Function * F,Constant * JT,bool IsJumpTableCanonical)1435 void LowerTypeTestsModule::replaceWeakDeclarationWithJumpTablePtr(
1436     Function *F, Constant *JT, bool IsJumpTableCanonical) {
1437   // The target expression can not appear in a constant initializer on most
1438   // (all?) targets. Switch to a runtime initializer.
1439   SmallSetVector<GlobalVariable *, 8> GlobalVarUsers;
1440   findGlobalVariableUsersOf(F, GlobalVarUsers);
1441   for (auto *GV : GlobalVarUsers) {
1442     if (GV == GlobalAnnotation)
1443       continue;
1444     moveInitializerToModuleConstructor(GV);
1445   }
1446 
1447   // Can not RAUW F with an expression that uses F. Replace with a temporary
1448   // placeholder first.
1449   Function *PlaceholderFn =
1450       Function::Create(cast<FunctionType>(F->getValueType()),
1451                        GlobalValue::ExternalWeakLinkage,
1452                        F->getAddressSpace(), "", &M);
1453   replaceCfiUses(F, PlaceholderFn, IsJumpTableCanonical);
1454 
1455   convertUsersOfConstantsToInstructions(PlaceholderFn);
1456   // Don't use range based loop, because use list will be modified.
1457   while (!PlaceholderFn->use_empty()) {
1458     Use &U = *PlaceholderFn->use_begin();
1459     auto *InsertPt = dyn_cast<Instruction>(U.getUser());
1460     assert(InsertPt && "Non-instruction users should have been eliminated");
1461     auto *PN = dyn_cast<PHINode>(InsertPt);
1462     if (PN)
1463       InsertPt = PN->getIncomingBlock(U)->getTerminator();
1464     IRBuilder Builder(InsertPt);
1465     Value *ICmp = Builder.CreateICmp(CmpInst::ICMP_NE, F,
1466                                      Constant::getNullValue(F->getType()));
1467     Value *Select = Builder.CreateSelect(ICmp, JT,
1468                                          Constant::getNullValue(F->getType()));
1469     // For phi nodes, we need to update the incoming value for all operands
1470     // with the same predecessor.
1471     if (PN)
1472       PN->setIncomingValueForBlock(InsertPt->getParent(), Select);
1473     else
1474       U.set(Select);
1475   }
1476   PlaceholderFn->eraseFromParent();
1477 }
1478 
isThumbFunction(Function * F,Triple::ArchType ModuleArch)1479 static bool isThumbFunction(Function *F, Triple::ArchType ModuleArch) {
1480   Attribute TFAttr = F->getFnAttribute("target-features");
1481   if (TFAttr.isValid()) {
1482     SmallVector<StringRef, 6> Features;
1483     TFAttr.getValueAsString().split(Features, ',');
1484     for (StringRef Feature : Features) {
1485       if (Feature == "-thumb-mode")
1486         return false;
1487       else if (Feature == "+thumb-mode")
1488         return true;
1489     }
1490   }
1491 
1492   return ModuleArch == Triple::thumb;
1493 }
1494 
1495 // Each jump table must be either ARM or Thumb as a whole for the bit-test math
1496 // to work. Pick one that matches the majority of members to minimize interop
1497 // veneers inserted by the linker.
selectJumpTableArmEncoding(ArrayRef<GlobalTypeMember * > Functions)1498 Triple::ArchType LowerTypeTestsModule::selectJumpTableArmEncoding(
1499     ArrayRef<GlobalTypeMember *> Functions) {
1500   if (Arch != Triple::arm && Arch != Triple::thumb)
1501     return Arch;
1502 
1503   if (!CanUseThumbBWJumpTable && CanUseArmJumpTable) {
1504     // In architectures that provide Arm and Thumb-1 but not Thumb-2,
1505     // we should always prefer the Arm jump table format, because the
1506     // Thumb-1 one is larger and slower.
1507     return Triple::arm;
1508   }
1509 
1510   // Otherwise, go with majority vote.
1511   unsigned ArmCount = 0, ThumbCount = 0;
1512   for (const auto GTM : Functions) {
1513     if (!GTM->isJumpTableCanonical()) {
1514       // PLT stubs are always ARM.
1515       // FIXME: This is the wrong heuristic for non-canonical jump tables.
1516       ++ArmCount;
1517       continue;
1518     }
1519 
1520     Function *F = cast<Function>(GTM->getGlobal());
1521     ++(isThumbFunction(F, Arch) ? ThumbCount : ArmCount);
1522   }
1523 
1524   return ArmCount > ThumbCount ? Triple::arm : Triple::thumb;
1525 }
1526 
createJumpTable(Function * F,ArrayRef<GlobalTypeMember * > Functions,Triple::ArchType JumpTableArch)1527 void LowerTypeTestsModule::createJumpTable(
1528     Function *F, ArrayRef<GlobalTypeMember *> Functions,
1529     Triple::ArchType JumpTableArch) {
1530   BasicBlock *BB = BasicBlock::Create(M.getContext(), "entry", F);
1531   IRBuilder<> IRB(BB);
1532 
1533   InlineAsm *JumpTableAsm = createJumpTableEntryAsm(JumpTableArch);
1534 
1535   // Check if all entries have the NoUnwind attribute.
1536   // If all entries have it, we can safely mark the
1537   // cfi.jumptable as NoUnwind, otherwise, direct calls
1538   // to the jump table will not handle exceptions properly
1539   bool areAllEntriesNounwind = true;
1540   for (GlobalTypeMember *GTM : Functions) {
1541     if (!llvm::cast<llvm::Function>(GTM->getGlobal())
1542              ->hasFnAttribute(llvm::Attribute::NoUnwind)) {
1543       areAllEntriesNounwind = false;
1544     }
1545     IRB.CreateCall(JumpTableAsm, GTM->getGlobal());
1546   }
1547   IRB.CreateUnreachable();
1548 
1549   // Align the whole table by entry size.
1550   F->setAlignment(Align(getJumpTableEntrySize(JumpTableArch)));
1551   // Skip prologue.
1552   // Disabled on win32 due to https://llvm.org/bugs/show_bug.cgi?id=28641#c3.
1553   // Luckily, this function does not get any prologue even without the
1554   // attribute.
1555   if (OS != Triple::Win32)
1556     F->addFnAttr(Attribute::Naked);
1557   if (JumpTableArch == Triple::arm)
1558     F->addFnAttr("target-features", "-thumb-mode");
1559   if (JumpTableArch == Triple::thumb) {
1560     if (hasBranchTargetEnforcement()) {
1561       // If we're generating a Thumb jump table with BTI, add a target-features
1562       // setting to ensure BTI can be assembled.
1563       F->addFnAttr("target-features", "+thumb-mode,+pacbti");
1564     } else {
1565       F->addFnAttr("target-features", "+thumb-mode");
1566       if (CanUseThumbBWJumpTable) {
1567         // Thumb jump table assembly needs Thumb2. The following attribute is
1568         // added by Clang for -march=armv7.
1569         F->addFnAttr("target-cpu", "cortex-a8");
1570       }
1571     }
1572   }
1573   // When -mbranch-protection= is used, the inline asm adds a BTI. Suppress BTI
1574   // for the function to avoid double BTI. This is a no-op without
1575   // -mbranch-protection=.
1576   if (JumpTableArch == Triple::aarch64 || JumpTableArch == Triple::thumb) {
1577     if (F->hasFnAttribute("branch-target-enforcement"))
1578       F->removeFnAttr("branch-target-enforcement");
1579     if (F->hasFnAttribute("sign-return-address"))
1580       F->removeFnAttr("sign-return-address");
1581   }
1582   if (JumpTableArch == Triple::riscv32 || JumpTableArch == Triple::riscv64) {
1583     // Make sure the jump table assembly is not modified by the assembler or
1584     // the linker.
1585     F->addFnAttr("target-features", "-c,-relax");
1586   }
1587   // When -fcf-protection= is used, the inline asm adds an ENDBR. Suppress ENDBR
1588   // for the function to avoid double ENDBR. This is a no-op without
1589   // -fcf-protection=.
1590   if (JumpTableArch == Triple::x86 || JumpTableArch == Triple::x86_64)
1591     F->addFnAttr(Attribute::NoCfCheck);
1592 
1593   // Make sure we don't emit .eh_frame for this function if it isn't needed.
1594   if (areAllEntriesNounwind)
1595     F->addFnAttr(Attribute::NoUnwind);
1596 
1597   // Make sure we do not inline any calls to the cfi.jumptable.
1598   F->addFnAttr(Attribute::NoInline);
1599 }
1600 
1601 /// Given a disjoint set of type identifiers and functions, build a jump table
1602 /// for the functions, build the bit sets and lower the llvm.type.test calls.
buildBitSetsFromFunctionsNative(ArrayRef<Metadata * > TypeIds,ArrayRef<GlobalTypeMember * > Functions)1603 void LowerTypeTestsModule::buildBitSetsFromFunctionsNative(
1604     ArrayRef<Metadata *> TypeIds, ArrayRef<GlobalTypeMember *> Functions) {
1605   // Unlike the global bitset builder, the function bitset builder cannot
1606   // re-arrange functions in a particular order and base its calculations on the
1607   // layout of the functions' entry points, as we have no idea how large a
1608   // particular function will end up being (the size could even depend on what
1609   // this pass does!) Instead, we build a jump table, which is a block of code
1610   // consisting of one branch instruction for each of the functions in the bit
1611   // set that branches to the target function, and redirect any taken function
1612   // addresses to the corresponding jump table entry. In the object file's
1613   // symbol table, the symbols for the target functions also refer to the jump
1614   // table entries, so that addresses taken outside the module will pass any
1615   // verification done inside the module.
1616   //
1617   // In more concrete terms, suppose we have three functions f, g, h which are
1618   // of the same type, and a function foo that returns their addresses:
1619   //
1620   // f:
1621   // mov 0, %eax
1622   // ret
1623   //
1624   // g:
1625   // mov 1, %eax
1626   // ret
1627   //
1628   // h:
1629   // mov 2, %eax
1630   // ret
1631   //
1632   // foo:
1633   // mov f, %eax
1634   // mov g, %edx
1635   // mov h, %ecx
1636   // ret
1637   //
1638   // We output the jump table as module-level inline asm string. The end result
1639   // will (conceptually) look like this:
1640   //
1641   // f = .cfi.jumptable
1642   // g = .cfi.jumptable + 4
1643   // h = .cfi.jumptable + 8
1644   // .cfi.jumptable:
1645   // jmp f.cfi  ; 5 bytes
1646   // int3       ; 1 byte
1647   // int3       ; 1 byte
1648   // int3       ; 1 byte
1649   // jmp g.cfi  ; 5 bytes
1650   // int3       ; 1 byte
1651   // int3       ; 1 byte
1652   // int3       ; 1 byte
1653   // jmp h.cfi  ; 5 bytes
1654   // int3       ; 1 byte
1655   // int3       ; 1 byte
1656   // int3       ; 1 byte
1657   //
1658   // f.cfi:
1659   // mov 0, %eax
1660   // ret
1661   //
1662   // g.cfi:
1663   // mov 1, %eax
1664   // ret
1665   //
1666   // h.cfi:
1667   // mov 2, %eax
1668   // ret
1669   //
1670   // foo:
1671   // mov f, %eax
1672   // mov g, %edx
1673   // mov h, %ecx
1674   // ret
1675   //
1676   // Because the addresses of f, g, h are evenly spaced at a power of 2, in the
1677   // normal case the check can be carried out using the same kind of simple
1678   // arithmetic that we normally use for globals.
1679 
1680   // FIXME: find a better way to represent the jumptable in the IR.
1681   assert(!Functions.empty());
1682 
1683   // Decide on the jump table encoding, so that we know how big the
1684   // entries will be.
1685   Triple::ArchType JumpTableArch = selectJumpTableArmEncoding(Functions);
1686 
1687   // Build a simple layout based on the regular layout of jump tables.
1688   DenseMap<GlobalTypeMember *, uint64_t> GlobalLayout;
1689   unsigned EntrySize = getJumpTableEntrySize(JumpTableArch);
1690   for (unsigned I = 0; I != Functions.size(); ++I)
1691     GlobalLayout[Functions[I]] = I * EntrySize;
1692 
1693   Function *JumpTableFn =
1694       Function::Create(FunctionType::get(Type::getVoidTy(M.getContext()),
1695                                          /* IsVarArg */ false),
1696                        GlobalValue::PrivateLinkage,
1697                        M.getDataLayout().getProgramAddressSpace(),
1698                        ".cfi.jumptable", &M);
1699   ArrayType *JumpTableEntryType = ArrayType::get(Int8Ty, EntrySize);
1700   ArrayType *JumpTableType =
1701       ArrayType::get(JumpTableEntryType, Functions.size());
1702   auto JumpTable = ConstantExpr::getPointerCast(
1703       JumpTableFn, PointerType::getUnqual(M.getContext()));
1704 
1705   lowerTypeTestCalls(TypeIds, JumpTable, GlobalLayout);
1706 
1707   // Build aliases pointing to offsets into the jump table, and replace
1708   // references to the original functions with references to the aliases.
1709   for (unsigned I = 0; I != Functions.size(); ++I) {
1710     Function *F = cast<Function>(Functions[I]->getGlobal());
1711     bool IsJumpTableCanonical = Functions[I]->isJumpTableCanonical();
1712 
1713     Constant *CombinedGlobalElemPtr = ConstantExpr::getInBoundsGetElementPtr(
1714         JumpTableType, JumpTable,
1715         ArrayRef<Constant *>{ConstantInt::get(IntPtrTy, 0),
1716                              ConstantInt::get(IntPtrTy, I)});
1717 
1718     const bool IsExported = Functions[I]->isExported();
1719     if (!IsJumpTableCanonical) {
1720       GlobalValue::LinkageTypes LT = IsExported ? GlobalValue::ExternalLinkage
1721                                                 : GlobalValue::InternalLinkage;
1722       GlobalAlias *JtAlias = GlobalAlias::create(JumpTableEntryType, 0, LT,
1723                                                  F->getName() + ".cfi_jt",
1724                                                  CombinedGlobalElemPtr, &M);
1725       if (IsExported)
1726         JtAlias->setVisibility(GlobalValue::HiddenVisibility);
1727       else
1728         appendToUsed(M, {JtAlias});
1729     }
1730 
1731     if (IsExported) {
1732       if (IsJumpTableCanonical)
1733         ExportSummary->cfiFunctionDefs().emplace(F->getName());
1734       else
1735         ExportSummary->cfiFunctionDecls().emplace(F->getName());
1736     }
1737 
1738     if (!IsJumpTableCanonical) {
1739       if (F->hasExternalWeakLinkage())
1740         replaceWeakDeclarationWithJumpTablePtr(F, CombinedGlobalElemPtr,
1741                                                IsJumpTableCanonical);
1742       else
1743         replaceCfiUses(F, CombinedGlobalElemPtr, IsJumpTableCanonical);
1744     } else {
1745       assert(F->getType()->getAddressSpace() == 0);
1746 
1747       GlobalAlias *FAlias =
1748           GlobalAlias::create(JumpTableEntryType, 0, F->getLinkage(), "",
1749                               CombinedGlobalElemPtr, &M);
1750       FAlias->setVisibility(F->getVisibility());
1751       FAlias->takeName(F);
1752       if (FAlias->hasName()) {
1753         F->setName(FAlias->getName() + ".cfi");
1754         maybeReplaceComdat(F, FAlias->getName());
1755       }
1756       replaceCfiUses(F, FAlias, IsJumpTableCanonical);
1757       if (!F->hasLocalLinkage())
1758         F->setVisibility(GlobalVariable::HiddenVisibility);
1759     }
1760   }
1761 
1762   createJumpTable(JumpTableFn, Functions, JumpTableArch);
1763 }
1764 
1765 /// Assign a dummy layout using an incrementing counter, tag each function
1766 /// with its index represented as metadata, and lower each type test to an
1767 /// integer range comparison. During generation of the indirect function call
1768 /// table in the backend, it will assign the given indexes.
1769 /// Note: Dynamic linking is not supported, as the WebAssembly ABI has not yet
1770 /// been finalized.
buildBitSetsFromFunctionsWASM(ArrayRef<Metadata * > TypeIds,ArrayRef<GlobalTypeMember * > Functions)1771 void LowerTypeTestsModule::buildBitSetsFromFunctionsWASM(
1772     ArrayRef<Metadata *> TypeIds, ArrayRef<GlobalTypeMember *> Functions) {
1773   assert(!Functions.empty());
1774 
1775   // Build consecutive monotonic integer ranges for each call target set
1776   DenseMap<GlobalTypeMember *, uint64_t> GlobalLayout;
1777 
1778   for (GlobalTypeMember *GTM : Functions) {
1779     Function *F = cast<Function>(GTM->getGlobal());
1780 
1781     // Skip functions that are not address taken, to avoid bloating the table
1782     if (!F->hasAddressTaken())
1783       continue;
1784 
1785     // Store metadata with the index for each function
1786     MDNode *MD = MDNode::get(F->getContext(),
1787                              ArrayRef<Metadata *>(ConstantAsMetadata::get(
1788                                  ConstantInt::get(Int64Ty, IndirectIndex))));
1789     F->setMetadata("wasm.index", MD);
1790 
1791     // Assign the counter value
1792     GlobalLayout[GTM] = IndirectIndex++;
1793   }
1794 
1795   // The indirect function table index space starts at zero, so pass a NULL
1796   // pointer as the subtracted "jump table" offset.
1797   lowerTypeTestCalls(TypeIds, ConstantPointerNull::get(PtrTy),
1798                      GlobalLayout);
1799 }
1800 
buildBitSetsFromDisjointSet(ArrayRef<Metadata * > TypeIds,ArrayRef<GlobalTypeMember * > Globals,ArrayRef<ICallBranchFunnel * > ICallBranchFunnels)1801 void LowerTypeTestsModule::buildBitSetsFromDisjointSet(
1802     ArrayRef<Metadata *> TypeIds, ArrayRef<GlobalTypeMember *> Globals,
1803     ArrayRef<ICallBranchFunnel *> ICallBranchFunnels) {
1804   DenseMap<Metadata *, uint64_t> TypeIdIndices;
1805   for (unsigned I = 0; I != TypeIds.size(); ++I)
1806     TypeIdIndices[TypeIds[I]] = I;
1807 
1808   // For each type identifier, build a set of indices that refer to members of
1809   // the type identifier.
1810   std::vector<std::set<uint64_t>> TypeMembers(TypeIds.size());
1811   unsigned GlobalIndex = 0;
1812   DenseMap<GlobalTypeMember *, uint64_t> GlobalIndices;
1813   for (GlobalTypeMember *GTM : Globals) {
1814     for (MDNode *Type : GTM->types()) {
1815       // Type = { offset, type identifier }
1816       auto I = TypeIdIndices.find(Type->getOperand(1));
1817       if (I != TypeIdIndices.end())
1818         TypeMembers[I->second].insert(GlobalIndex);
1819     }
1820     GlobalIndices[GTM] = GlobalIndex;
1821     GlobalIndex++;
1822   }
1823 
1824   for (ICallBranchFunnel *JT : ICallBranchFunnels) {
1825     TypeMembers.emplace_back();
1826     std::set<uint64_t> &TMSet = TypeMembers.back();
1827     for (GlobalTypeMember *T : JT->targets())
1828       TMSet.insert(GlobalIndices[T]);
1829   }
1830 
1831   // Order the sets of indices by size. The GlobalLayoutBuilder works best
1832   // when given small index sets first.
1833   llvm::stable_sort(TypeMembers, [](const std::set<uint64_t> &O1,
1834                                     const std::set<uint64_t> &O2) {
1835     return O1.size() < O2.size();
1836   });
1837 
1838   // Create a GlobalLayoutBuilder and provide it with index sets as layout
1839   // fragments. The GlobalLayoutBuilder tries to lay out members of fragments as
1840   // close together as possible.
1841   GlobalLayoutBuilder GLB(Globals.size());
1842   for (auto &&MemSet : TypeMembers)
1843     GLB.addFragment(MemSet);
1844 
1845   // Build a vector of globals with the computed layout.
1846   bool IsGlobalSet =
1847       Globals.empty() || isa<GlobalVariable>(Globals[0]->getGlobal());
1848   std::vector<GlobalTypeMember *> OrderedGTMs(Globals.size());
1849   auto OGTMI = OrderedGTMs.begin();
1850   for (auto &&F : GLB.Fragments) {
1851     for (auto &&Offset : F) {
1852       if (IsGlobalSet != isa<GlobalVariable>(Globals[Offset]->getGlobal()))
1853         report_fatal_error("Type identifier may not contain both global "
1854                            "variables and functions");
1855       *OGTMI++ = Globals[Offset];
1856     }
1857   }
1858 
1859   // Build the bitsets from this disjoint set.
1860   if (IsGlobalSet)
1861     buildBitSetsFromGlobalVariables(TypeIds, OrderedGTMs);
1862   else
1863     buildBitSetsFromFunctions(TypeIds, OrderedGTMs);
1864 }
1865 
1866 /// Lower all type tests in this module.
LowerTypeTestsModule(Module & M,ModuleAnalysisManager & AM,ModuleSummaryIndex * ExportSummary,const ModuleSummaryIndex * ImportSummary,DropTestKind DropTypeTests)1867 LowerTypeTestsModule::LowerTypeTestsModule(
1868     Module &M, ModuleAnalysisManager &AM, ModuleSummaryIndex *ExportSummary,
1869     const ModuleSummaryIndex *ImportSummary, DropTestKind DropTypeTests)
1870     : M(M), ExportSummary(ExportSummary), ImportSummary(ImportSummary),
1871       DropTypeTests(ClDropTypeTests > DropTypeTests ? ClDropTypeTests
1872                                                     : DropTypeTests) {
1873   assert(!(ExportSummary && ImportSummary));
1874   Triple TargetTriple(M.getTargetTriple());
1875   Arch = TargetTriple.getArch();
1876   if (Arch == Triple::arm)
1877     CanUseArmJumpTable = true;
1878   if (Arch == Triple::arm || Arch == Triple::thumb) {
1879     auto &FAM =
1880         AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
1881     for (Function &F : M) {
1882       // Skip declarations since we should not query the TTI for them.
1883       if (F.isDeclaration())
1884         continue;
1885       auto &TTI = FAM.getResult<TargetIRAnalysis>(F);
1886       if (TTI.hasArmWideBranch(false))
1887         CanUseArmJumpTable = true;
1888       if (TTI.hasArmWideBranch(true))
1889         CanUseThumbBWJumpTable = true;
1890     }
1891   }
1892   OS = TargetTriple.getOS();
1893   ObjectFormat = TargetTriple.getObjectFormat();
1894 
1895   // Function annotation describes or applies to function itself, and
1896   // shouldn't be associated with jump table thunk generated for CFI.
1897   GlobalAnnotation = M.getGlobalVariable("llvm.global.annotations");
1898   if (GlobalAnnotation && GlobalAnnotation->hasInitializer()) {
1899     const ConstantArray *CA =
1900         cast<ConstantArray>(GlobalAnnotation->getInitializer());
1901     FunctionAnnotations.insert_range(CA->operands());
1902   }
1903 }
1904 
runForTesting(Module & M,ModuleAnalysisManager & AM)1905 bool LowerTypeTestsModule::runForTesting(Module &M, ModuleAnalysisManager &AM) {
1906   ModuleSummaryIndex Summary(/*HaveGVs=*/false);
1907 
1908   // Handle the command-line summary arguments. This code is for testing
1909   // purposes only, so we handle errors directly.
1910   if (!ClReadSummary.empty()) {
1911     ExitOnError ExitOnErr("-lowertypetests-read-summary: " + ClReadSummary +
1912                           ": ");
1913     auto ReadSummaryFile = ExitOnErr(errorOrToExpected(
1914         MemoryBuffer::getFile(ClReadSummary, /*IsText=*/true)));
1915 
1916     yaml::Input In(ReadSummaryFile->getBuffer());
1917     In >> Summary;
1918     ExitOnErr(errorCodeToError(In.error()));
1919   }
1920 
1921   bool Changed =
1922       LowerTypeTestsModule(
1923           M, AM,
1924           ClSummaryAction == PassSummaryAction::Export ? &Summary : nullptr,
1925           ClSummaryAction == PassSummaryAction::Import ? &Summary : nullptr,
1926           /*DropTypeTests=*/DropTestKind::None)
1927           .lower();
1928 
1929   if (!ClWriteSummary.empty()) {
1930     ExitOnError ExitOnErr("-lowertypetests-write-summary: " + ClWriteSummary +
1931                           ": ");
1932     std::error_code EC;
1933     raw_fd_ostream OS(ClWriteSummary, EC, sys::fs::OF_TextWithCRLF);
1934     ExitOnErr(errorCodeToError(EC));
1935 
1936     yaml::Output Out(OS);
1937     Out << Summary;
1938   }
1939 
1940   return Changed;
1941 }
1942 
isDirectCall(Use & U)1943 static bool isDirectCall(Use& U) {
1944   auto *Usr = dyn_cast<CallInst>(U.getUser());
1945   if (Usr) {
1946     auto *CB = dyn_cast<CallBase>(Usr);
1947     if (CB && CB->isCallee(&U))
1948       return true;
1949   }
1950   return false;
1951 }
1952 
replaceCfiUses(Function * Old,Value * New,bool IsJumpTableCanonical)1953 void LowerTypeTestsModule::replaceCfiUses(Function *Old, Value *New,
1954                                           bool IsJumpTableCanonical) {
1955   SmallSetVector<Constant *, 4> Constants;
1956   for (Use &U : llvm::make_early_inc_range(Old->uses())) {
1957     // Skip no_cfi values, which refer to the function body instead of the jump
1958     // table.
1959     if (isa<NoCFIValue>(U.getUser()))
1960       continue;
1961 
1962     // Skip direct calls to externally defined or non-dso_local functions.
1963     if (isDirectCall(U) && (Old->isDSOLocal() || !IsJumpTableCanonical))
1964       continue;
1965 
1966     // Skip function annotation.
1967     if (isFunctionAnnotation(U.getUser()))
1968       continue;
1969 
1970     // Must handle Constants specially, we cannot call replaceUsesOfWith on a
1971     // constant because they are uniqued.
1972     if (auto *C = dyn_cast<Constant>(U.getUser())) {
1973       if (!isa<GlobalValue>(C)) {
1974         // Save unique users to avoid processing operand replacement
1975         // more than once.
1976         Constants.insert(C);
1977         continue;
1978       }
1979     }
1980 
1981     U.set(New);
1982   }
1983 
1984   // Process operand replacement of saved constants.
1985   for (auto *C : Constants)
1986     C->handleOperandChange(Old, New);
1987 }
1988 
replaceDirectCalls(Value * Old,Value * New)1989 void LowerTypeTestsModule::replaceDirectCalls(Value *Old, Value *New) {
1990   Old->replaceUsesWithIf(New, isDirectCall);
1991 }
1992 
dropTypeTests(Module & M,Function & TypeTestFunc,bool ShouldDropAll)1993 static void dropTypeTests(Module &M, Function &TypeTestFunc,
1994                           bool ShouldDropAll) {
1995   for (Use &U : llvm::make_early_inc_range(TypeTestFunc.uses())) {
1996     auto *CI = cast<CallInst>(U.getUser());
1997     // Find and erase llvm.assume intrinsics for this llvm.type.test call.
1998     for (Use &CIU : llvm::make_early_inc_range(CI->uses()))
1999       if (auto *Assume = dyn_cast<AssumeInst>(CIU.getUser()))
2000         Assume->eraseFromParent();
2001     // If the assume was merged with another assume, we might have a use on a
2002     // phi (which will feed the assume). Simply replace the use on the phi
2003     // with "true" and leave the merged assume.
2004     //
2005     // If ShouldDropAll is set, then we  we need to update any remaining uses,
2006     // regardless of the instruction type.
2007     if (!CI->use_empty()) {
2008       assert(ShouldDropAll || all_of(CI->users(), [](User *U) -> bool {
2009                return isa<PHINode>(U);
2010              }));
2011       CI->replaceAllUsesWith(ConstantInt::getTrue(M.getContext()));
2012     }
2013     CI->eraseFromParent();
2014   }
2015 }
2016 
lower()2017 bool LowerTypeTestsModule::lower() {
2018   Function *TypeTestFunc =
2019       Intrinsic::getDeclarationIfExists(&M, Intrinsic::type_test);
2020 
2021   if (DropTypeTests != DropTestKind::None) {
2022     bool ShouldDropAll = DropTypeTests == DropTestKind::All;
2023     if (TypeTestFunc)
2024       dropTypeTests(M, *TypeTestFunc, ShouldDropAll);
2025     // Normally we'd have already removed all @llvm.public.type.test calls,
2026     // except for in the case where we originally were performing ThinLTO but
2027     // decided not to in the backend.
2028     Function *PublicTypeTestFunc =
2029         Intrinsic::getDeclarationIfExists(&M, Intrinsic::public_type_test);
2030     if (PublicTypeTestFunc)
2031       dropTypeTests(M, *PublicTypeTestFunc, ShouldDropAll);
2032     if (TypeTestFunc || PublicTypeTestFunc) {
2033       // We have deleted the type intrinsics, so we no longer have enough
2034       // information to reason about the liveness of virtual function pointers
2035       // in GlobalDCE.
2036       for (GlobalVariable &GV : M.globals())
2037         GV.eraseMetadata(LLVMContext::MD_vcall_visibility);
2038       return true;
2039     }
2040     return false;
2041   }
2042 
2043   // If only some of the modules were split, we cannot correctly perform
2044   // this transformation. We already checked for the presense of type tests
2045   // with partially split modules during the thin link, and would have emitted
2046   // an error if any were found, so here we can simply return.
2047   if ((ExportSummary && ExportSummary->partiallySplitLTOUnits()) ||
2048       (ImportSummary && ImportSummary->partiallySplitLTOUnits()))
2049     return false;
2050 
2051   Function *ICallBranchFunnelFunc =
2052       Intrinsic::getDeclarationIfExists(&M, Intrinsic::icall_branch_funnel);
2053   if ((!TypeTestFunc || TypeTestFunc->use_empty()) &&
2054       (!ICallBranchFunnelFunc || ICallBranchFunnelFunc->use_empty()) &&
2055       !ExportSummary && !ImportSummary)
2056     return false;
2057 
2058   if (ImportSummary) {
2059     if (TypeTestFunc)
2060       for (Use &U : llvm::make_early_inc_range(TypeTestFunc->uses()))
2061         importTypeTest(cast<CallInst>(U.getUser()));
2062 
2063     if (ICallBranchFunnelFunc && !ICallBranchFunnelFunc->use_empty())
2064       report_fatal_error(
2065           "unexpected call to llvm.icall.branch.funnel during import phase");
2066 
2067     SmallVector<Function *, 8> Defs;
2068     SmallVector<Function *, 8> Decls;
2069     for (auto &F : M) {
2070       // CFI functions are either external, or promoted. A local function may
2071       // have the same name, but it's not the one we are looking for.
2072       if (F.hasLocalLinkage())
2073         continue;
2074       if (ImportSummary->cfiFunctionDefs().count(F.getName()))
2075         Defs.push_back(&F);
2076       else if (ImportSummary->cfiFunctionDecls().count(F.getName()))
2077         Decls.push_back(&F);
2078     }
2079 
2080     std::vector<GlobalAlias *> AliasesToErase;
2081     {
2082       ScopedSaveAliaseesAndUsed S(M);
2083       for (auto *F : Defs)
2084         importFunction(F, /*isJumpTableCanonical*/ true, AliasesToErase);
2085       for (auto *F : Decls)
2086         importFunction(F, /*isJumpTableCanonical*/ false, AliasesToErase);
2087     }
2088     for (GlobalAlias *GA : AliasesToErase)
2089       GA->eraseFromParent();
2090 
2091     return true;
2092   }
2093 
2094   // Equivalence class set containing type identifiers and the globals that
2095   // reference them. This is used to partition the set of type identifiers in
2096   // the module into disjoint sets.
2097   using GlobalClassesTy = EquivalenceClasses<
2098       PointerUnion<GlobalTypeMember *, Metadata *, ICallBranchFunnel *>>;
2099   GlobalClassesTy GlobalClasses;
2100 
2101   // Verify the type metadata and build a few data structures to let us
2102   // efficiently enumerate the type identifiers associated with a global:
2103   // a list of GlobalTypeMembers (a GlobalObject stored alongside a vector
2104   // of associated type metadata) and a mapping from type identifiers to their
2105   // list of GlobalTypeMembers and last observed index in the list of globals.
2106   // The indices will be used later to deterministically order the list of type
2107   // identifiers.
2108   BumpPtrAllocator Alloc;
2109   struct TIInfo {
2110     unsigned UniqueId;
2111     std::vector<GlobalTypeMember *> RefGlobals;
2112   };
2113   DenseMap<Metadata *, TIInfo> TypeIdInfo;
2114   unsigned CurUniqueId = 0;
2115   SmallVector<MDNode *, 2> Types;
2116 
2117   // Cross-DSO CFI emits jumptable entries for exported functions as well as
2118   // address taken functions in case they are address taken in other modules.
2119   const bool CrossDsoCfi = M.getModuleFlag("Cross-DSO CFI") != nullptr;
2120 
2121   struct ExportedFunctionInfo {
2122     CfiFunctionLinkage Linkage;
2123     MDNode *FuncMD; // {name, linkage, type[, type...]}
2124   };
2125   MapVector<StringRef, ExportedFunctionInfo> ExportedFunctions;
2126   if (ExportSummary) {
2127     NamedMDNode *CfiFunctionsMD = M.getNamedMetadata("cfi.functions");
2128     if (CfiFunctionsMD) {
2129       // A set of all functions that are address taken by a live global object.
2130       DenseSet<GlobalValue::GUID> AddressTaken;
2131       for (auto &I : *ExportSummary)
2132         for (auto &GVS : I.second.SummaryList)
2133           if (GVS->isLive())
2134             for (const auto &Ref : GVS->refs()) {
2135               AddressTaken.insert(Ref.getGUID());
2136               for (auto &RefGVS : Ref.getSummaryList())
2137                 if (auto Alias = dyn_cast<AliasSummary>(RefGVS.get()))
2138                   AddressTaken.insert(Alias->getAliaseeGUID());
2139             }
2140       for (auto *FuncMD : CfiFunctionsMD->operands()) {
2141         assert(FuncMD->getNumOperands() >= 2);
2142         StringRef FunctionName =
2143             cast<MDString>(FuncMD->getOperand(0))->getString();
2144         CfiFunctionLinkage Linkage = static_cast<CfiFunctionLinkage>(
2145             cast<ConstantAsMetadata>(FuncMD->getOperand(1))
2146                 ->getValue()
2147                 ->getUniqueInteger()
2148                 .getZExtValue());
2149         const GlobalValue::GUID GUID =
2150             GlobalValue::getGUIDAssumingExternalLinkage(
2151                 GlobalValue::dropLLVMManglingEscape(FunctionName));
2152         // Do not emit jumptable entries for functions that are not-live and
2153         // have no live references (and are not exported with cross-DSO CFI.)
2154         if (!ExportSummary->isGUIDLive(GUID))
2155           continue;
2156         if (!AddressTaken.count(GUID)) {
2157           if (!CrossDsoCfi || Linkage != CFL_Definition)
2158             continue;
2159 
2160           bool Exported = false;
2161           if (auto VI = ExportSummary->getValueInfo(GUID))
2162             for (const auto &GVS : VI.getSummaryList())
2163               if (GVS->isLive() && !GlobalValue::isLocalLinkage(GVS->linkage()))
2164                 Exported = true;
2165 
2166           if (!Exported)
2167             continue;
2168         }
2169         auto P = ExportedFunctions.insert({FunctionName, {Linkage, FuncMD}});
2170         if (!P.second && P.first->second.Linkage != CFL_Definition)
2171           P.first->second = {Linkage, FuncMD};
2172       }
2173 
2174       for (const auto &P : ExportedFunctions) {
2175         StringRef FunctionName = P.first;
2176         CfiFunctionLinkage Linkage = P.second.Linkage;
2177         MDNode *FuncMD = P.second.FuncMD;
2178         Function *F = M.getFunction(FunctionName);
2179         if (F && F->hasLocalLinkage()) {
2180           // Locally defined function that happens to have the same name as a
2181           // function defined in a ThinLTO module. Rename it to move it out of
2182           // the way of the external reference that we're about to create.
2183           // Note that setName will find a unique name for the function, so even
2184           // if there is an existing function with the suffix there won't be a
2185           // name collision.
2186           F->setName(F->getName() + ".1");
2187           F = nullptr;
2188         }
2189 
2190         if (!F)
2191           F = Function::Create(
2192               FunctionType::get(Type::getVoidTy(M.getContext()), false),
2193               GlobalVariable::ExternalLinkage,
2194               M.getDataLayout().getProgramAddressSpace(), FunctionName, &M);
2195 
2196         // If the function is available_externally, remove its definition so
2197         // that it is handled the same way as a declaration. Later we will try
2198         // to create an alias using this function's linkage, which will fail if
2199         // the linkage is available_externally. This will also result in us
2200         // following the code path below to replace the type metadata.
2201         if (F->hasAvailableExternallyLinkage()) {
2202           F->setLinkage(GlobalValue::ExternalLinkage);
2203           F->deleteBody();
2204           F->setComdat(nullptr);
2205           F->clearMetadata();
2206         }
2207 
2208         // Update the linkage for extern_weak declarations when a definition
2209         // exists.
2210         if (Linkage == CFL_Definition && F->hasExternalWeakLinkage())
2211           F->setLinkage(GlobalValue::ExternalLinkage);
2212 
2213         // If the function in the full LTO module is a declaration, replace its
2214         // type metadata with the type metadata we found in cfi.functions. That
2215         // metadata is presumed to be more accurate than the metadata attached
2216         // to the declaration.
2217         if (F->isDeclaration()) {
2218           if (Linkage == CFL_WeakDeclaration)
2219             F->setLinkage(GlobalValue::ExternalWeakLinkage);
2220 
2221           F->eraseMetadata(LLVMContext::MD_type);
2222           for (unsigned I = 2; I < FuncMD->getNumOperands(); ++I)
2223             F->addMetadata(LLVMContext::MD_type,
2224                            *cast<MDNode>(FuncMD->getOperand(I).get()));
2225         }
2226       }
2227     }
2228   }
2229 
2230   DenseMap<GlobalObject *, GlobalTypeMember *> GlobalTypeMembers;
2231   for (GlobalObject &GO : M.global_objects()) {
2232     if (isa<GlobalVariable>(GO) && GO.isDeclarationForLinker())
2233       continue;
2234 
2235     Types.clear();
2236     GO.getMetadata(LLVMContext::MD_type, Types);
2237 
2238     bool IsJumpTableCanonical = false;
2239     bool IsExported = false;
2240     if (Function *F = dyn_cast<Function>(&GO)) {
2241       IsJumpTableCanonical = isJumpTableCanonical(F);
2242       if (auto It = ExportedFunctions.find(F->getName());
2243           It != ExportedFunctions.end()) {
2244         IsJumpTableCanonical |= It->second.Linkage == CFL_Definition;
2245         IsExported = true;
2246       // TODO: The logic here checks only that the function is address taken,
2247       // not that the address takers are live. This can be updated to check
2248       // their liveness and emit fewer jumptable entries once monolithic LTO
2249       // builds also emit summaries.
2250       } else if (!F->hasAddressTaken()) {
2251         if (!CrossDsoCfi || !IsJumpTableCanonical || F->hasLocalLinkage())
2252           continue;
2253       }
2254     }
2255 
2256     auto *GTM = GlobalTypeMember::create(Alloc, &GO, IsJumpTableCanonical,
2257                                          IsExported, Types);
2258     GlobalTypeMembers[&GO] = GTM;
2259     for (MDNode *Type : Types) {
2260       verifyTypeMDNode(&GO, Type);
2261       auto &Info = TypeIdInfo[Type->getOperand(1)];
2262       Info.UniqueId = ++CurUniqueId;
2263       Info.RefGlobals.push_back(GTM);
2264     }
2265   }
2266 
2267   auto AddTypeIdUse = [&](Metadata *TypeId) -> TypeIdUserInfo & {
2268     // Add the call site to the list of call sites for this type identifier. We
2269     // also use TypeIdUsers to keep track of whether we have seen this type
2270     // identifier before. If we have, we don't need to re-add the referenced
2271     // globals to the equivalence class.
2272     auto Ins = TypeIdUsers.insert({TypeId, {}});
2273     if (Ins.second) {
2274       // Add the type identifier to the equivalence class.
2275       auto &GCI = GlobalClasses.insert(TypeId);
2276       GlobalClassesTy::member_iterator CurSet = GlobalClasses.findLeader(GCI);
2277 
2278       // Add the referenced globals to the type identifier's equivalence class.
2279       for (GlobalTypeMember *GTM : TypeIdInfo[TypeId].RefGlobals)
2280         CurSet = GlobalClasses.unionSets(
2281             CurSet, GlobalClasses.findLeader(GlobalClasses.insert(GTM)));
2282     }
2283 
2284     return Ins.first->second;
2285   };
2286 
2287   if (TypeTestFunc) {
2288     for (const Use &U : TypeTestFunc->uses()) {
2289       auto CI = cast<CallInst>(U.getUser());
2290       // If this type test is only used by llvm.assume instructions, it
2291       // was used for whole program devirtualization, and is being kept
2292       // for use by other optimization passes. We do not need or want to
2293       // lower it here. We also don't want to rewrite any associated globals
2294       // unnecessarily. These will be removed by a subsequent LTT invocation
2295       // with the DropTypeTests flag set.
2296       bool OnlyAssumeUses = !CI->use_empty();
2297       for (const Use &CIU : CI->uses()) {
2298         if (isa<AssumeInst>(CIU.getUser()))
2299           continue;
2300         OnlyAssumeUses = false;
2301         break;
2302       }
2303       if (OnlyAssumeUses)
2304         continue;
2305 
2306       auto TypeIdMDVal = dyn_cast<MetadataAsValue>(CI->getArgOperand(1));
2307       if (!TypeIdMDVal)
2308         report_fatal_error("Second argument of llvm.type.test must be metadata");
2309       auto TypeId = TypeIdMDVal->getMetadata();
2310       AddTypeIdUse(TypeId).CallSites.push_back(CI);
2311     }
2312   }
2313 
2314   if (ICallBranchFunnelFunc) {
2315     for (const Use &U : ICallBranchFunnelFunc->uses()) {
2316       if (Arch != Triple::x86_64)
2317         report_fatal_error(
2318             "llvm.icall.branch.funnel not supported on this target");
2319 
2320       auto CI = cast<CallInst>(U.getUser());
2321 
2322       std::vector<GlobalTypeMember *> Targets;
2323       if (CI->arg_size() % 2 != 1)
2324         report_fatal_error("number of arguments should be odd");
2325 
2326       GlobalClassesTy::member_iterator CurSet;
2327       for (unsigned I = 1; I != CI->arg_size(); I += 2) {
2328         int64_t Offset;
2329         auto *Base = dyn_cast<GlobalObject>(GetPointerBaseWithConstantOffset(
2330             CI->getOperand(I), Offset, M.getDataLayout()));
2331         if (!Base)
2332           report_fatal_error(
2333               "Expected branch funnel operand to be global value");
2334 
2335         GlobalTypeMember *GTM = GlobalTypeMembers[Base];
2336         Targets.push_back(GTM);
2337         GlobalClassesTy::member_iterator NewSet =
2338             GlobalClasses.findLeader(GlobalClasses.insert(GTM));
2339         if (I == 1)
2340           CurSet = NewSet;
2341         else
2342           CurSet = GlobalClasses.unionSets(CurSet, NewSet);
2343       }
2344 
2345       GlobalClasses.unionSets(
2346           CurSet, GlobalClasses.findLeader(
2347                       GlobalClasses.insert(ICallBranchFunnel::create(
2348                           Alloc, CI, Targets, ++CurUniqueId))));
2349     }
2350   }
2351 
2352   if (ExportSummary) {
2353     DenseMap<GlobalValue::GUID, TinyPtrVector<Metadata *>> MetadataByGUID;
2354     for (auto &P : TypeIdInfo) {
2355       if (auto *TypeId = dyn_cast<MDString>(P.first))
2356         MetadataByGUID[GlobalValue::getGUIDAssumingExternalLinkage(
2357                            TypeId->getString())]
2358             .push_back(TypeId);
2359     }
2360 
2361     for (auto &P : *ExportSummary) {
2362       for (auto &S : P.second.SummaryList) {
2363         if (!ExportSummary->isGlobalValueLive(S.get()))
2364           continue;
2365         if (auto *FS = dyn_cast<FunctionSummary>(S->getBaseObject()))
2366           for (GlobalValue::GUID G : FS->type_tests())
2367             for (Metadata *MD : MetadataByGUID[G])
2368               AddTypeIdUse(MD).IsExported = true;
2369       }
2370     }
2371   }
2372 
2373   if (GlobalClasses.empty())
2374     return false;
2375 
2376   {
2377     ScopedSaveAliaseesAndUsed S(M);
2378     // For each disjoint set we found...
2379     for (const auto &C : GlobalClasses) {
2380       if (!C->isLeader())
2381         continue;
2382 
2383       ++NumTypeIdDisjointSets;
2384       // Build the list of type identifiers in this disjoint set.
2385       std::vector<Metadata *> TypeIds;
2386       std::vector<GlobalTypeMember *> Globals;
2387       std::vector<ICallBranchFunnel *> ICallBranchFunnels;
2388       for (auto M : GlobalClasses.members(*C)) {
2389         if (isa<Metadata *>(M))
2390           TypeIds.push_back(cast<Metadata *>(M));
2391         else if (isa<GlobalTypeMember *>(M))
2392           Globals.push_back(cast<GlobalTypeMember *>(M));
2393         else
2394           ICallBranchFunnels.push_back(cast<ICallBranchFunnel *>(M));
2395       }
2396 
2397       // Order type identifiers by unique ID for determinism. This ordering is
2398       // stable as there is a one-to-one mapping between metadata and unique
2399       // IDs.
2400       llvm::sort(TypeIds, [&](Metadata *M1, Metadata *M2) {
2401         return TypeIdInfo[M1].UniqueId < TypeIdInfo[M2].UniqueId;
2402       });
2403 
2404       // Same for the branch funnels.
2405       llvm::sort(ICallBranchFunnels,
2406                  [&](ICallBranchFunnel *F1, ICallBranchFunnel *F2) {
2407                    return F1->UniqueId < F2->UniqueId;
2408                  });
2409 
2410       // Build bitsets for this disjoint set.
2411       buildBitSetsFromDisjointSet(TypeIds, Globals, ICallBranchFunnels);
2412     }
2413   }
2414 
2415   allocateByteArrays();
2416 
2417   // Parse alias data to replace stand-in function declarations for aliases
2418   // with an alias to the intended target.
2419   if (ExportSummary) {
2420     if (NamedMDNode *AliasesMD = M.getNamedMetadata("aliases")) {
2421       for (auto *AliasMD : AliasesMD->operands()) {
2422         assert(AliasMD->getNumOperands() >= 4);
2423         StringRef AliasName =
2424             cast<MDString>(AliasMD->getOperand(0))->getString();
2425         StringRef Aliasee = cast<MDString>(AliasMD->getOperand(1))->getString();
2426 
2427         if (auto It = ExportedFunctions.find(Aliasee);
2428             It == ExportedFunctions.end() ||
2429             It->second.Linkage != CFL_Definition || !M.getNamedAlias(Aliasee))
2430           continue;
2431 
2432         GlobalValue::VisibilityTypes Visibility =
2433             static_cast<GlobalValue::VisibilityTypes>(
2434                 cast<ConstantAsMetadata>(AliasMD->getOperand(2))
2435                     ->getValue()
2436                     ->getUniqueInteger()
2437                     .getZExtValue());
2438         bool Weak =
2439             static_cast<bool>(cast<ConstantAsMetadata>(AliasMD->getOperand(3))
2440                                   ->getValue()
2441                                   ->getUniqueInteger()
2442                                   .getZExtValue());
2443 
2444         auto *Alias = GlobalAlias::create("", M.getNamedAlias(Aliasee));
2445         Alias->setVisibility(Visibility);
2446         if (Weak)
2447           Alias->setLinkage(GlobalValue::WeakAnyLinkage);
2448 
2449         if (auto *F = M.getFunction(AliasName)) {
2450           Alias->takeName(F);
2451           F->replaceAllUsesWith(Alias);
2452           F->eraseFromParent();
2453         } else {
2454           Alias->setName(AliasName);
2455         }
2456       }
2457     }
2458   }
2459 
2460   // Emit .symver directives for exported functions, if they exist.
2461   if (ExportSummary) {
2462     if (NamedMDNode *SymversMD = M.getNamedMetadata("symvers")) {
2463       for (auto *Symver : SymversMD->operands()) {
2464         assert(Symver->getNumOperands() >= 2);
2465         StringRef SymbolName =
2466             cast<MDString>(Symver->getOperand(0))->getString();
2467         StringRef Alias = cast<MDString>(Symver->getOperand(1))->getString();
2468 
2469         if (!ExportedFunctions.count(SymbolName))
2470           continue;
2471 
2472         M.appendModuleInlineAsm(
2473             (llvm::Twine(".symver ") + SymbolName + ", " + Alias).str());
2474       }
2475     }
2476   }
2477 
2478   return true;
2479 }
2480 
run(Module & M,ModuleAnalysisManager & AM)2481 PreservedAnalyses LowerTypeTestsPass::run(Module &M,
2482                                           ModuleAnalysisManager &AM) {
2483   bool Changed;
2484   if (UseCommandLine)
2485     Changed = LowerTypeTestsModule::runForTesting(M, AM);
2486   else
2487     Changed =
2488         LowerTypeTestsModule(M, AM, ExportSummary, ImportSummary, DropTypeTests)
2489             .lower();
2490   if (!Changed)
2491     return PreservedAnalyses::all();
2492   return PreservedAnalyses::none();
2493 }
2494 
run(Module & M,ModuleAnalysisManager & AM)2495 PreservedAnalyses SimplifyTypeTestsPass::run(Module &M,
2496                                              ModuleAnalysisManager &AM) {
2497   bool Changed = false;
2498   // Figure out whether inlining has exposed a constant address to a lowered
2499   // type test, and remove the test if so and the address is known to pass the
2500   // test. Unfortunately this pass ends up needing to reverse engineer what
2501   // LowerTypeTests did; this is currently inherent to the design of ThinLTO
2502   // importing where LowerTypeTests needs to run at the start.
2503   //
2504   // We look for things like:
2505   //
2506   // sub (i64 ptrtoint (ptr @_Z2fpv to i64), i64 ptrtoint (ptr
2507   // @__typeid__ZTSFvvE_global_addr to i64))
2508   //
2509   // which gets replaced with 0 if _Z2fpv (more specifically _Z2fpv.cfi, the
2510   // function referred to by the jump table) is a member of the type _ZTSFvv, as
2511   // well as things like
2512   //
2513   // icmp eq ptr @_Z2fpv, @__typeid__ZTSFvvE_global_addr
2514   //
2515   // which gets replaced with true if _Z2fpv is a member.
2516   for (auto &GV : M.globals()) {
2517     if (!GV.getName().starts_with("__typeid_") ||
2518         !GV.getName().ends_with("_global_addr"))
2519       continue;
2520     // __typeid_foo_global_addr -> foo
2521     auto *MD = MDString::get(M.getContext(),
2522                              GV.getName().substr(9, GV.getName().size() - 21));
2523     auto MaySimplifyPtr = [&](Value *Ptr) {
2524       if (auto *GV = dyn_cast<GlobalValue>(Ptr))
2525         if (auto *CFIGV = M.getNamedValue((GV->getName() + ".cfi").str()))
2526           Ptr = CFIGV;
2527       return isKnownTypeIdMember(MD, M.getDataLayout(), Ptr, 0);
2528     };
2529     auto MaySimplifyInt = [&](Value *Op) {
2530       auto *PtrAsInt = dyn_cast<ConstantExpr>(Op);
2531       if (!PtrAsInt || PtrAsInt->getOpcode() != Instruction::PtrToInt)
2532         return false;
2533       return MaySimplifyPtr(PtrAsInt->getOperand(0));
2534     };
2535     for (User *U : make_early_inc_range(GV.users())) {
2536       if (auto *CI = dyn_cast<ICmpInst>(U)) {
2537         if (CI->getPredicate() == CmpInst::ICMP_EQ &&
2538             MaySimplifyPtr(CI->getOperand(0))) {
2539           // This is an equality comparison (TypeTestResolution::Single case in
2540           // lowerTypeTestCall). In this case we just replace the comparison
2541           // with true.
2542           CI->replaceAllUsesWith(ConstantInt::getTrue(M.getContext()));
2543           CI->eraseFromParent();
2544           Changed = true;
2545           continue;
2546         }
2547       }
2548       auto *CE = dyn_cast<ConstantExpr>(U);
2549       if (!CE || CE->getOpcode() != Instruction::PtrToInt)
2550         continue;
2551       for (Use &U : make_early_inc_range(CE->uses())) {
2552         auto *CE = dyn_cast<ConstantExpr>(U.getUser());
2553         if (U.getOperandNo() == 0 && CE &&
2554             CE->getOpcode() == Instruction::Sub &&
2555             MaySimplifyInt(CE->getOperand(1))) {
2556           // This is a computation of PtrOffset as generated by
2557           // LowerTypeTestsModule::lowerTypeTestCall above. If
2558           // isKnownTypeIdMember passes we just pretend it evaluated to 0. This
2559           // should cause later passes to remove the range and alignment checks.
2560           // The bitset checks won't be removed but those are uncommon.
2561           CE->replaceAllUsesWith(ConstantInt::get(CE->getType(), 0));
2562           Changed = true;
2563         }
2564         auto *CI = dyn_cast<ICmpInst>(U.getUser());
2565         if (U.getOperandNo() == 1 && CI &&
2566             CI->getPredicate() == CmpInst::ICMP_EQ &&
2567             MaySimplifyInt(CI->getOperand(0))) {
2568           // This is an equality comparison. Unlike in the case above it
2569           // remained as an integer compare.
2570           CI->replaceAllUsesWith(ConstantInt::getTrue(M.getContext()));
2571           CI->eraseFromParent();
2572           Changed = true;
2573         }
2574       }
2575     }
2576   }
2577 
2578   if (!Changed)
2579     return PreservedAnalyses::all();
2580   PreservedAnalyses PA = PreservedAnalyses::none();
2581   PA.preserve<DominatorTreeAnalysis>();
2582   PA.preserve<PostDominatorTreeAnalysis>();
2583   PA.preserve<LoopAnalysis>();
2584   return PA;
2585 }
2586