1 //===- StringTableBuilder.cpp - String table building utility -------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "llvm/MC/StringTableBuilder.h" 10 #include "llvm/ADT/CachedHashString.h" 11 #include "llvm/ADT/SmallString.h" 12 #include "llvm/ADT/StringRef.h" 13 #include "llvm/BinaryFormat/COFF.h" 14 #include "llvm/Support/Endian.h" 15 #include "llvm/Support/MathExtras.h" 16 #include "llvm/Support/raw_ostream.h" 17 #include <cassert> 18 #include <cstddef> 19 #include <cstdint> 20 #include <cstring> 21 #include <utility> 22 #include <vector> 23 24 using namespace llvm; 25 26 StringTableBuilder::~StringTableBuilder() = default; 27 28 void StringTableBuilder::initSize() { 29 // Account for leading bytes in table so that offsets returned from add are 30 // correct. 31 switch (K) { 32 case RAW: 33 case DWARF: 34 Size = 0; 35 break; 36 case MachOLinked: 37 case MachO64Linked: 38 Size = 2; 39 break; 40 case MachO: 41 case MachO64: 42 case ELF: 43 // Start the table with a NUL byte. 44 Size = 1; 45 break; 46 case XCOFF: 47 case WinCOFF: 48 // Make room to write the table size later. 49 Size = 4; 50 break; 51 } 52 } 53 54 StringTableBuilder::StringTableBuilder(Kind K, unsigned Alignment) 55 : K(K), Alignment(Alignment) { 56 initSize(); 57 } 58 59 void StringTableBuilder::write(raw_ostream &OS) const { 60 assert(isFinalized()); 61 SmallString<0> Data; 62 Data.resize(getSize()); 63 write((uint8_t *)Data.data()); 64 OS << Data; 65 } 66 67 using StringPair = std::pair<CachedHashStringRef, size_t>; 68 69 void StringTableBuilder::write(uint8_t *Buf) const { 70 assert(isFinalized()); 71 for (const StringPair &P : StringIndexMap) { 72 StringRef Data = P.first.val(); 73 if (!Data.empty()) 74 memcpy(Buf + P.second, Data.data(), Data.size()); 75 } 76 // The COFF formats store the size of the string table in the first 4 bytes. 77 // For Windows, the format is little-endian; for AIX, it is big-endian. 78 if (K == WinCOFF) 79 support::endian::write32le(Buf, Size); 80 else if (K == XCOFF) 81 support::endian::write32be(Buf, Size); 82 } 83 84 // Returns the character at Pos from end of a string. 85 static int charTailAt(StringPair *P, size_t Pos) { 86 StringRef S = P->first.val(); 87 if (Pos >= S.size()) 88 return -1; 89 return (unsigned char)S[S.size() - Pos - 1]; 90 } 91 92 // Three-way radix quicksort. This is much faster than std::sort with strcmp 93 // because it does not compare characters that we already know the same. 94 static void multikeySort(MutableArrayRef<StringPair *> Vec, int Pos) { 95 tailcall: 96 if (Vec.size() <= 1) 97 return; 98 99 // Partition items so that items in [0, I) are greater than the pivot, 100 // [I, J) are the same as the pivot, and [J, Vec.size()) are less than 101 // the pivot. 102 int Pivot = charTailAt(Vec[0], Pos); 103 size_t I = 0; 104 size_t J = Vec.size(); 105 for (size_t K = 1; K < J;) { 106 int C = charTailAt(Vec[K], Pos); 107 if (C > Pivot) 108 std::swap(Vec[I++], Vec[K++]); 109 else if (C < Pivot) 110 std::swap(Vec[--J], Vec[K]); 111 else 112 K++; 113 } 114 115 multikeySort(Vec.slice(0, I), Pos); 116 multikeySort(Vec.slice(J), Pos); 117 118 // multikeySort(Vec.slice(I, J - I), Pos + 1), but with 119 // tail call optimization. 120 if (Pivot != -1) { 121 Vec = Vec.slice(I, J - I); 122 ++Pos; 123 goto tailcall; 124 } 125 } 126 127 void StringTableBuilder::finalize() { 128 assert(K != DWARF); 129 finalizeStringTable(/*Optimize=*/true); 130 } 131 132 void StringTableBuilder::finalizeInOrder() { 133 finalizeStringTable(/*Optimize=*/false); 134 } 135 136 void StringTableBuilder::finalizeStringTable(bool Optimize) { 137 Finalized = true; 138 139 if (Optimize) { 140 std::vector<StringPair *> Strings; 141 Strings.reserve(StringIndexMap.size()); 142 for (StringPair &P : StringIndexMap) 143 Strings.push_back(&P); 144 145 multikeySort(Strings, 0); 146 initSize(); 147 148 StringRef Previous; 149 for (StringPair *P : Strings) { 150 StringRef S = P->first.val(); 151 if (Previous.endswith(S)) { 152 size_t Pos = Size - S.size() - (K != RAW); 153 if (!(Pos & (Alignment - 1))) { 154 P->second = Pos; 155 continue; 156 } 157 } 158 159 Size = alignTo(Size, Alignment); 160 P->second = Size; 161 162 Size += S.size(); 163 if (K != RAW) 164 ++Size; 165 Previous = S; 166 } 167 } 168 169 if (K == MachO || K == MachOLinked) 170 Size = alignTo(Size, 4); // Pad to multiple of 4. 171 if (K == MachO64 || K == MachO64Linked) 172 Size = alignTo(Size, 8); // Pad to multiple of 8. 173 174 // According to ld64 the string table of a final linked Mach-O binary starts 175 // with " ", i.e. the first byte is ' ' and the second byte is zero. In 176 // 'initSize()' we reserved the first two bytes for holding this string. 177 if (K == MachOLinked || K == MachO64Linked) 178 StringIndexMap[CachedHashStringRef(" ")] = 0; 179 180 // The first byte in an ELF string table must be null, according to the ELF 181 // specification. In 'initSize()' we reserved the first byte to hold null for 182 // this purpose and here we actually add the string to allow 'getOffset()' to 183 // be called on an empty string. 184 if (K == ELF) 185 StringIndexMap[CachedHashStringRef("")] = 0; 186 } 187 188 void StringTableBuilder::clear() { 189 Finalized = false; 190 StringIndexMap.clear(); 191 } 192 193 size_t StringTableBuilder::getOffset(CachedHashStringRef S) const { 194 assert(isFinalized()); 195 auto I = StringIndexMap.find(S); 196 assert(I != StringIndexMap.end() && "String is not in table!"); 197 return I->second; 198 } 199 200 size_t StringTableBuilder::add(CachedHashStringRef S) { 201 if (K == WinCOFF) 202 assert(S.size() > COFF::NameSize && "Short string in COFF string table!"); 203 204 assert(!isFinalized()); 205 auto P = StringIndexMap.insert(std::make_pair(S, 0)); 206 if (P.second) { 207 size_t Start = alignTo(Size, Alignment); 208 P.first->second = Start; 209 Size = Start + S.size() + (K != RAW); 210 } 211 return P.first->second; 212 } 213