1 //===- AArch64GlobalsTagging.cpp - Global tagging in IR -------------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 //===----------------------------------------------------------------------===// 10 11 #include "AArch64.h" 12 #include "llvm/BinaryFormat/ELF.h" 13 #include "llvm/IR/Attributes.h" 14 #include "llvm/IR/Constants.h" 15 #include "llvm/IR/GlobalValue.h" 16 #include "llvm/IR/GlobalVariable.h" 17 #include "llvm/IR/IRBuilder.h" 18 #include "llvm/IR/Module.h" 19 #include "llvm/Pass.h" 20 #include "llvm/Support/raw_ostream.h" 21 22 #include <algorithm> 23 #include <set> 24 25 using namespace llvm; 26 27 static const Align kTagGranuleSize = Align(16); 28 29 static bool shouldTagGlobal(GlobalVariable &G) { 30 if (!G.isTagged()) 31 return false; 32 33 assert(G.hasSanitizerMetadata() && 34 "Missing sanitizer metadata, but symbol is apparently tagged."); 35 GlobalValue::SanitizerMetadata Meta = G.getSanitizerMetadata(); 36 37 // For now, don't instrument constant data, as it'll be in .rodata anyway. It 38 // may be worth instrumenting these in future to stop them from being used as 39 // gadgets. 40 if (G.getName().starts_with("llvm.") || G.isThreadLocal() || G.isConstant()) { 41 Meta.Memtag = false; 42 G.setSanitizerMetadata(Meta); 43 return false; 44 } 45 46 // Globals can be placed implicitly or explicitly in sections. There's two 47 // different types of globals that meet this criteria that cause problems: 48 // 1. Function pointers that are going into various init arrays (either 49 // explicitly through `__attribute__((section(<foo>)))` or implicitly 50 // through `__attribute__((constructor)))`, such as ".(pre)init(_array)", 51 // ".fini(_array)", ".ctors", and ".dtors". These function pointers end up 52 // overaligned and overpadded, making iterating over them problematic, and 53 // each function pointer is individually tagged (so the iteration over 54 // them causes SIGSEGV/MTE[AS]ERR). 55 // 2. Global variables put into an explicit section, where the section's name 56 // is a valid C-style identifier. The linker emits a `__start_<name>` and 57 // `__stop_<na,e>` symbol for the section, so that you can iterate over 58 // globals within this section. Unfortunately, again, these globals would 59 // be tagged and so iteration causes SIGSEGV/MTE[AS]ERR. 60 // 61 // To mitigate both these cases, and because specifying a section is rare 62 // outside of these two cases, disable MTE protection for globals in any 63 // section. 64 if (G.hasSection()) { 65 Meta.Memtag = false; 66 G.setSanitizerMetadata(Meta); 67 return false; 68 } 69 70 return true; 71 } 72 73 // Technically, due to ELF symbol interposition semantics, we can't change the 74 // alignment or size of symbols. If we increase the alignment or size of a 75 // symbol, the compiler may make optimisations based on this new alignment or 76 // size. If the symbol is interposed, this optimisation could lead to 77 // alignment-related or OOB read/write crashes. 78 // 79 // This is handled in the linker. When the linker sees multiple declarations of 80 // a global variable, and some are tagged, and some are untagged, it resolves it 81 // to be an untagged definition - but preserves the tag-granule-rounded size and 82 // tag-granule-alignment. This should prevent these kind of crashes intra-DSO. 83 // For cross-DSO, it's been a reasonable contract that if you're interposing a 84 // sanitizer-instrumented global, then the interposer also needs to be 85 // sanitizer-instrumented. 86 // 87 // FIXME: In theory, this can be fixed by splitting the size/alignment of 88 // globals into two uses: an "output alignment" that's emitted to the ELF file, 89 // and an "optimisation alignment" that's used for optimisation. Thus, we could 90 // adjust the output alignment only, and still optimise based on the pessimistic 91 // pre-tagging size/alignment. 92 static void tagGlobalDefinition(Module &M, GlobalVariable *G) { 93 Constant *Initializer = G->getInitializer(); 94 uint64_t SizeInBytes = 95 M.getDataLayout().getTypeAllocSize(Initializer->getType()); 96 97 uint64_t NewSize = alignTo(SizeInBytes, kTagGranuleSize); 98 if (SizeInBytes != NewSize) { 99 // Pad the initializer out to the next multiple of 16 bytes. 100 llvm::SmallVector<uint8_t> Init(NewSize - SizeInBytes, 0); 101 Constant *Padding = ConstantDataArray::get(M.getContext(), Init); 102 Initializer = ConstantStruct::getAnon({Initializer, Padding}); 103 auto *NewGV = new GlobalVariable( 104 M, Initializer->getType(), G->isConstant(), G->getLinkage(), 105 Initializer, "", G, G->getThreadLocalMode(), G->getAddressSpace()); 106 NewGV->copyAttributesFrom(G); 107 NewGV->setComdat(G->getComdat()); 108 NewGV->copyMetadata(G, 0); 109 110 NewGV->takeName(G); 111 G->replaceAllUsesWith(NewGV); 112 G->eraseFromParent(); 113 G = NewGV; 114 } 115 116 G->setAlignment(std::max(G->getAlign().valueOrOne(), kTagGranuleSize)); 117 118 // Ensure that tagged globals don't get merged by ICF - as they should have 119 // different tags at runtime. 120 G->setUnnamedAddr(GlobalValue::UnnamedAddr::None); 121 } 122 123 namespace { 124 class AArch64GlobalsTagging : public ModulePass { 125 public: 126 static char ID; 127 128 explicit AArch64GlobalsTagging() : ModulePass(ID) { 129 initializeAArch64GlobalsTaggingPass(*PassRegistry::getPassRegistry()); 130 } 131 132 bool runOnModule(Module &M) override; 133 134 StringRef getPassName() const override { return "AArch64 Globals Tagging"; } 135 136 private: 137 std::set<GlobalVariable *> GlobalsToTag; 138 }; 139 } // anonymous namespace 140 141 char AArch64GlobalsTagging::ID = 0; 142 143 bool AArch64GlobalsTagging::runOnModule(Module &M) { 144 // No mutating the globals in-place, or iterator invalidation occurs. 145 std::vector<GlobalVariable *> GlobalsToTag; 146 for (GlobalVariable &G : M.globals()) { 147 if (G.isDeclaration() || !shouldTagGlobal(G)) 148 continue; 149 GlobalsToTag.push_back(&G); 150 } 151 152 for (GlobalVariable *G : GlobalsToTag) { 153 tagGlobalDefinition(M, G); 154 } 155 156 return true; 157 } 158 159 INITIALIZE_PASS_BEGIN(AArch64GlobalsTagging, "aarch64-globals-tagging", 160 "AArch64 Globals Tagging Pass", false, false) 161 INITIALIZE_PASS_END(AArch64GlobalsTagging, "aarch64-globals-tagging", 162 "AArch64 Globals Tagging Pass", false, false) 163 164 ModulePass *llvm::createAArch64GlobalsTaggingPass() { 165 return new AArch64GlobalsTagging(); 166 } 167