1 //===--- COFFModuleDefinition.cpp - Simple DEF parser ---------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // Windows-specific. 10 // A parser for the module-definition file (.def file). 11 // 12 // The format of module-definition files are described in this document: 13 // https://msdn.microsoft.com/en-us/library/28d6s79h.aspx 14 // 15 //===----------------------------------------------------------------------===// 16 17 #include "llvm/Object/COFFModuleDefinition.h" 18 #include "llvm/ADT/StringRef.h" 19 #include "llvm/ADT/StringSwitch.h" 20 #include "llvm/Object/COFFImportFile.h" 21 #include "llvm/Object/Error.h" 22 #include "llvm/Support/Error.h" 23 #include "llvm/Support/Path.h" 24 25 using namespace llvm::COFF; 26 using namespace llvm; 27 28 namespace llvm { 29 namespace object { 30 31 enum Kind { 32 Unknown, 33 Eof, 34 Identifier, 35 Comma, 36 Equal, 37 EqualEqual, 38 KwBase, 39 KwConstant, 40 KwData, 41 KwExports, 42 KwHeapsize, 43 KwLibrary, 44 KwName, 45 KwNoname, 46 KwPrivate, 47 KwStacksize, 48 KwVersion, 49 }; 50 51 struct Token { 52 explicit Token(Kind T = Unknown, StringRef S = "") : K(T), Value(S) {} 53 Kind K; 54 StringRef Value; 55 }; 56 57 static bool isDecorated(StringRef Sym, bool MingwDef) { 58 // In def files, the symbols can either be listed decorated or undecorated. 59 // 60 // - For cdecl symbols, only the undecorated form is allowed. 61 // - For fastcall and vectorcall symbols, both fully decorated or 62 // undecorated forms can be present. 63 // - For stdcall symbols in non-MinGW environments, the decorated form is 64 // fully decorated with leading underscore and trailing stack argument 65 // size - like "_Func@0". 66 // - In MinGW def files, a decorated stdcall symbol does not include the 67 // leading underscore though, like "Func@0". 68 69 // This function controls whether a leading underscore should be added to 70 // the given symbol name or not. For MinGW, treat a stdcall symbol name such 71 // as "Func@0" as undecorated, i.e. a leading underscore must be added. 72 // For non-MinGW, look for '@' in the whole string and consider "_Func@0" 73 // as decorated, i.e. don't add any more leading underscores. 74 // We can't check for a leading underscore here, since function names 75 // themselves can start with an underscore, while a second one still needs 76 // to be added. 77 return Sym.starts_with("@") || Sym.contains("@@") || Sym.starts_with("?") || 78 (!MingwDef && Sym.contains('@')); 79 } 80 81 class Lexer { 82 public: 83 Lexer(StringRef S) : Buf(S) {} 84 85 Token lex() { 86 Buf = Buf.trim(); 87 if (Buf.empty()) 88 return Token(Eof); 89 90 switch (Buf[0]) { 91 case '\0': 92 return Token(Eof); 93 case ';': { 94 size_t End = Buf.find('\n'); 95 Buf = (End == Buf.npos) ? "" : Buf.drop_front(End); 96 return lex(); 97 } 98 case '=': 99 Buf = Buf.drop_front(); 100 if (Buf.starts_with("=")) { 101 Buf = Buf.drop_front(); 102 return Token(EqualEqual, "=="); 103 } 104 return Token(Equal, "="); 105 case ',': 106 Buf = Buf.drop_front(); 107 return Token(Comma, ","); 108 case '"': { 109 StringRef S; 110 std::tie(S, Buf) = Buf.substr(1).split('"'); 111 return Token(Identifier, S); 112 } 113 default: { 114 size_t End = Buf.find_first_of("=,;\r\n \t\v"); 115 StringRef Word = Buf.substr(0, End); 116 Kind K = llvm::StringSwitch<Kind>(Word) 117 .Case("BASE", KwBase) 118 .Case("CONSTANT", KwConstant) 119 .Case("DATA", KwData) 120 .Case("EXPORTS", KwExports) 121 .Case("HEAPSIZE", KwHeapsize) 122 .Case("LIBRARY", KwLibrary) 123 .Case("NAME", KwName) 124 .Case("NONAME", KwNoname) 125 .Case("PRIVATE", KwPrivate) 126 .Case("STACKSIZE", KwStacksize) 127 .Case("VERSION", KwVersion) 128 .Default(Identifier); 129 Buf = (End == Buf.npos) ? "" : Buf.drop_front(End); 130 return Token(K, Word); 131 } 132 } 133 } 134 135 private: 136 StringRef Buf; 137 }; 138 139 class Parser { 140 public: 141 explicit Parser(StringRef S, MachineTypes M, bool B, bool AU) 142 : Lex(S), Machine(M), MingwDef(B), AddUnderscores(AU) { 143 if (Machine != IMAGE_FILE_MACHINE_I386) 144 AddUnderscores = false; 145 } 146 147 Expected<COFFModuleDefinition> parse() { 148 do { 149 if (Error Err = parseOne()) 150 return std::move(Err); 151 } while (Tok.K != Eof); 152 return Info; 153 } 154 155 private: 156 void read() { 157 if (Stack.empty()) { 158 Tok = Lex.lex(); 159 return; 160 } 161 Tok = Stack.back(); 162 Stack.pop_back(); 163 } 164 165 Error readAsInt(uint64_t *I) { 166 read(); 167 if (Tok.K != Identifier || Tok.Value.getAsInteger(10, *I)) 168 return createError("integer expected"); 169 return Error::success(); 170 } 171 172 Error expect(Kind Expected, StringRef Msg) { 173 read(); 174 if (Tok.K != Expected) 175 return createError(Msg); 176 return Error::success(); 177 } 178 179 void unget() { Stack.push_back(Tok); } 180 181 Error parseOne() { 182 read(); 183 switch (Tok.K) { 184 case Eof: 185 return Error::success(); 186 case KwExports: 187 for (;;) { 188 read(); 189 if (Tok.K != Identifier) { 190 unget(); 191 return Error::success(); 192 } 193 if (Error Err = parseExport()) 194 return Err; 195 } 196 case KwHeapsize: 197 return parseNumbers(&Info.HeapReserve, &Info.HeapCommit); 198 case KwStacksize: 199 return parseNumbers(&Info.StackReserve, &Info.StackCommit); 200 case KwLibrary: 201 case KwName: { 202 bool IsDll = Tok.K == KwLibrary; // Check before parseName. 203 std::string Name; 204 if (Error Err = parseName(&Name, &Info.ImageBase)) 205 return Err; 206 207 Info.ImportName = Name; 208 209 // Set the output file, but don't override /out if it was already passed. 210 if (Info.OutputFile.empty()) { 211 Info.OutputFile = Name; 212 // Append the appropriate file extension if not already present. 213 if (!sys::path::has_extension(Name)) 214 Info.OutputFile += IsDll ? ".dll" : ".exe"; 215 } 216 217 return Error::success(); 218 } 219 case KwVersion: 220 return parseVersion(&Info.MajorImageVersion, &Info.MinorImageVersion); 221 default: 222 return createError("unknown directive: " + Tok.Value); 223 } 224 } 225 226 Error parseExport() { 227 COFFShortExport E; 228 E.Name = std::string(Tok.Value); 229 read(); 230 if (Tok.K == Equal) { 231 read(); 232 if (Tok.K != Identifier) 233 return createError("identifier expected, but got " + Tok.Value); 234 E.ExtName = E.Name; 235 E.Name = std::string(Tok.Value); 236 } else { 237 unget(); 238 } 239 240 if (AddUnderscores) { 241 if (!isDecorated(E.Name, MingwDef)) 242 E.Name = (std::string("_").append(E.Name)); 243 if (!E.ExtName.empty() && !isDecorated(E.ExtName, MingwDef)) 244 E.ExtName = (std::string("_").append(E.ExtName)); 245 } 246 247 for (;;) { 248 read(); 249 if (Tok.K == Identifier && Tok.Value[0] == '@') { 250 if (Tok.Value == "@") { 251 // "foo @ 10" 252 read(); 253 Tok.Value.getAsInteger(10, E.Ordinal); 254 } else if (Tok.Value.drop_front().getAsInteger(10, E.Ordinal)) { 255 // "foo \n @bar" - Not an ordinal modifier at all, but the next 256 // export (fastcall decorated) - complete the current one. 257 unget(); 258 Info.Exports.push_back(E); 259 return Error::success(); 260 } 261 // "foo @10" 262 read(); 263 if (Tok.K == KwNoname) { 264 E.Noname = true; 265 } else { 266 unget(); 267 } 268 continue; 269 } 270 if (Tok.K == KwData) { 271 E.Data = true; 272 continue; 273 } 274 if (Tok.K == KwConstant) { 275 E.Constant = true; 276 continue; 277 } 278 if (Tok.K == KwPrivate) { 279 E.Private = true; 280 continue; 281 } 282 if (Tok.K == EqualEqual) { 283 read(); 284 E.AliasTarget = std::string(Tok.Value); 285 if (AddUnderscores && !isDecorated(E.AliasTarget, MingwDef)) 286 E.AliasTarget = std::string("_").append(E.AliasTarget); 287 continue; 288 } 289 unget(); 290 Info.Exports.push_back(E); 291 return Error::success(); 292 } 293 } 294 295 // HEAPSIZE/STACKSIZE reserve[,commit] 296 Error parseNumbers(uint64_t *Reserve, uint64_t *Commit) { 297 if (Error Err = readAsInt(Reserve)) 298 return Err; 299 read(); 300 if (Tok.K != Comma) { 301 unget(); 302 Commit = nullptr; 303 return Error::success(); 304 } 305 if (Error Err = readAsInt(Commit)) 306 return Err; 307 return Error::success(); 308 } 309 310 // NAME outputPath [BASE=address] 311 Error parseName(std::string *Out, uint64_t *Baseaddr) { 312 read(); 313 if (Tok.K == Identifier) { 314 *Out = std::string(Tok.Value); 315 } else { 316 *Out = ""; 317 unget(); 318 return Error::success(); 319 } 320 read(); 321 if (Tok.K == KwBase) { 322 if (Error Err = expect(Equal, "'=' expected")) 323 return Err; 324 if (Error Err = readAsInt(Baseaddr)) 325 return Err; 326 } else { 327 unget(); 328 *Baseaddr = 0; 329 } 330 return Error::success(); 331 } 332 333 // VERSION major[.minor] 334 Error parseVersion(uint32_t *Major, uint32_t *Minor) { 335 read(); 336 if (Tok.K != Identifier) 337 return createError("identifier expected, but got " + Tok.Value); 338 StringRef V1, V2; 339 std::tie(V1, V2) = Tok.Value.split('.'); 340 if (V1.getAsInteger(10, *Major)) 341 return createError("integer expected, but got " + Tok.Value); 342 if (V2.empty()) 343 *Minor = 0; 344 else if (V2.getAsInteger(10, *Minor)) 345 return createError("integer expected, but got " + Tok.Value); 346 return Error::success(); 347 } 348 349 Lexer Lex; 350 Token Tok; 351 std::vector<Token> Stack; 352 MachineTypes Machine; 353 COFFModuleDefinition Info; 354 bool MingwDef; 355 bool AddUnderscores; 356 }; 357 358 Expected<COFFModuleDefinition> parseCOFFModuleDefinition(MemoryBufferRef MB, 359 MachineTypes Machine, 360 bool MingwDef, 361 bool AddUnderscores) { 362 return Parser(MB.getBuffer(), Machine, MingwDef, AddUnderscores).parse(); 363 } 364 365 } // namespace object 366 } // namespace llvm 367