1 //===--- COFFModuleDefinition.cpp - Simple DEF parser ---------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // Windows-specific. 10 // A parser for the module-definition file (.def file). 11 // 12 // The format of module-definition files are described in this document: 13 // https://msdn.microsoft.com/en-us/library/28d6s79h.aspx 14 // 15 //===----------------------------------------------------------------------===// 16 17 #include "llvm/Object/COFFModuleDefinition.h" 18 #include "llvm/ADT/StringRef.h" 19 #include "llvm/ADT/StringSwitch.h" 20 #include "llvm/Object/COFFImportFile.h" 21 #include "llvm/Object/Error.h" 22 #include "llvm/Support/Error.h" 23 #include "llvm/Support/Path.h" 24 25 using namespace llvm::COFF; 26 using namespace llvm; 27 28 namespace llvm { 29 namespace object { 30 31 enum Kind { 32 Unknown, 33 Eof, 34 Identifier, 35 Comma, 36 Equal, 37 EqualEqual, 38 KwBase, 39 KwConstant, 40 KwData, 41 KwExports, 42 KwExportAs, 43 KwHeapsize, 44 KwLibrary, 45 KwName, 46 KwNoname, 47 KwPrivate, 48 KwStacksize, 49 KwVersion, 50 }; 51 52 struct Token { 53 explicit Token(Kind T = Unknown, StringRef S = "") : K(T), Value(S) {} 54 Kind K; 55 StringRef Value; 56 }; 57 58 static bool isDecorated(StringRef Sym, bool MingwDef) { 59 // In def files, the symbols can either be listed decorated or undecorated. 60 // 61 // - For cdecl symbols, only the undecorated form is allowed. 62 // - For fastcall and vectorcall symbols, both fully decorated or 63 // undecorated forms can be present. 64 // - For stdcall symbols in non-MinGW environments, the decorated form is 65 // fully decorated with leading underscore and trailing stack argument 66 // size - like "_Func@0". 67 // - In MinGW def files, a decorated stdcall symbol does not include the 68 // leading underscore though, like "Func@0". 69 70 // This function controls whether a leading underscore should be added to 71 // the given symbol name or not. For MinGW, treat a stdcall symbol name such 72 // as "Func@0" as undecorated, i.e. a leading underscore must be added. 73 // For non-MinGW, look for '@' in the whole string and consider "_Func@0" 74 // as decorated, i.e. don't add any more leading underscores. 75 // We can't check for a leading underscore here, since function names 76 // themselves can start with an underscore, while a second one still needs 77 // to be added. 78 return Sym.starts_with("@") || Sym.contains("@@") || Sym.starts_with("?") || 79 (!MingwDef && Sym.contains('@')); 80 } 81 82 class Lexer { 83 public: 84 Lexer(StringRef S) : Buf(S) {} 85 86 Token lex() { 87 Buf = Buf.trim(); 88 if (Buf.empty()) 89 return Token(Eof); 90 91 switch (Buf[0]) { 92 case '\0': 93 return Token(Eof); 94 case ';': { 95 size_t End = Buf.find('\n'); 96 Buf = (End == Buf.npos) ? "" : Buf.drop_front(End); 97 return lex(); 98 } 99 case '=': 100 Buf = Buf.drop_front(); 101 if (Buf.consume_front("=")) 102 return Token(EqualEqual, "=="); 103 return Token(Equal, "="); 104 case ',': 105 Buf = Buf.drop_front(); 106 return Token(Comma, ","); 107 case '"': { 108 StringRef S; 109 std::tie(S, Buf) = Buf.substr(1).split('"'); 110 return Token(Identifier, S); 111 } 112 default: { 113 size_t End = Buf.find_first_of("=,;\r\n \t\v"); 114 StringRef Word = Buf.substr(0, End); 115 Kind K = llvm::StringSwitch<Kind>(Word) 116 .Case("BASE", KwBase) 117 .Case("CONSTANT", KwConstant) 118 .Case("DATA", KwData) 119 .Case("EXPORTS", KwExports) 120 .Case("EXPORTAS", KwExportAs) 121 .Case("HEAPSIZE", KwHeapsize) 122 .Case("LIBRARY", KwLibrary) 123 .Case("NAME", KwName) 124 .Case("NONAME", KwNoname) 125 .Case("PRIVATE", KwPrivate) 126 .Case("STACKSIZE", KwStacksize) 127 .Case("VERSION", KwVersion) 128 .Default(Identifier); 129 Buf = (End == Buf.npos) ? "" : Buf.drop_front(End); 130 return Token(K, Word); 131 } 132 } 133 } 134 135 private: 136 StringRef Buf; 137 }; 138 139 class Parser { 140 public: 141 explicit Parser(StringRef S, MachineTypes M, bool B, bool AU) 142 : Lex(S), Machine(M), MingwDef(B), AddUnderscores(AU) { 143 if (Machine != IMAGE_FILE_MACHINE_I386) 144 AddUnderscores = false; 145 } 146 147 Expected<COFFModuleDefinition> parse() { 148 do { 149 if (Error Err = parseOne()) 150 return std::move(Err); 151 } while (Tok.K != Eof); 152 return Info; 153 } 154 155 private: 156 void read() { 157 if (Stack.empty()) { 158 Tok = Lex.lex(); 159 return; 160 } 161 Tok = Stack.back(); 162 Stack.pop_back(); 163 } 164 165 Error readAsInt(uint64_t *I) { 166 read(); 167 if (Tok.K != Identifier || Tok.Value.getAsInteger(10, *I)) 168 return createError("integer expected"); 169 return Error::success(); 170 } 171 172 Error expect(Kind Expected, StringRef Msg) { 173 read(); 174 if (Tok.K != Expected) 175 return createError(Msg); 176 return Error::success(); 177 } 178 179 void unget() { Stack.push_back(Tok); } 180 181 Error parseOne() { 182 read(); 183 switch (Tok.K) { 184 case Eof: 185 return Error::success(); 186 case KwExports: 187 for (;;) { 188 read(); 189 if (Tok.K != Identifier) { 190 unget(); 191 return Error::success(); 192 } 193 if (Error Err = parseExport()) 194 return Err; 195 } 196 case KwHeapsize: 197 return parseNumbers(&Info.HeapReserve, &Info.HeapCommit); 198 case KwStacksize: 199 return parseNumbers(&Info.StackReserve, &Info.StackCommit); 200 case KwLibrary: 201 case KwName: { 202 bool IsDll = Tok.K == KwLibrary; // Check before parseName. 203 std::string Name; 204 if (Error Err = parseName(&Name, &Info.ImageBase)) 205 return Err; 206 207 Info.ImportName = Name; 208 209 // Set the output file, but don't override /out if it was already passed. 210 if (Info.OutputFile.empty()) { 211 Info.OutputFile = Name; 212 // Append the appropriate file extension if not already present. 213 if (!sys::path::has_extension(Name)) 214 Info.OutputFile += IsDll ? ".dll" : ".exe"; 215 } 216 217 return Error::success(); 218 } 219 case KwVersion: 220 return parseVersion(&Info.MajorImageVersion, &Info.MinorImageVersion); 221 default: 222 return createError("unknown directive: " + Tok.Value); 223 } 224 } 225 226 Error parseExport() { 227 COFFShortExport E; 228 E.Name = std::string(Tok.Value); 229 read(); 230 if (Tok.K == Equal) { 231 read(); 232 if (Tok.K != Identifier) 233 return createError("identifier expected, but got " + Tok.Value); 234 E.ExtName = E.Name; 235 E.Name = std::string(Tok.Value); 236 } else { 237 unget(); 238 } 239 240 if (AddUnderscores) { 241 // Don't add underscore if the name is already mangled or if it's a 242 // forward target. 243 if (!isDecorated(E.Name, MingwDef) && 244 (E.ExtName.empty() || !StringRef(E.Name).contains("."))) 245 E.Name = (std::string("_").append(E.Name)); 246 if (!E.ExtName.empty() && !isDecorated(E.ExtName, MingwDef)) 247 E.ExtName = (std::string("_").append(E.ExtName)); 248 } 249 250 for (;;) { 251 read(); 252 if (Tok.K == Identifier && Tok.Value[0] == '@') { 253 if (Tok.Value == "@") { 254 // "foo @ 10" 255 read(); 256 Tok.Value.getAsInteger(10, E.Ordinal); 257 } else if (Tok.Value.drop_front().getAsInteger(10, E.Ordinal)) { 258 // "foo \n @bar" - Not an ordinal modifier at all, but the next 259 // export (fastcall decorated) - complete the current one. 260 unget(); 261 Info.Exports.push_back(E); 262 return Error::success(); 263 } 264 // "foo @10" 265 read(); 266 if (Tok.K == KwNoname) { 267 E.Noname = true; 268 } else { 269 unget(); 270 } 271 continue; 272 } 273 if (Tok.K == KwData) { 274 E.Data = true; 275 continue; 276 } 277 if (Tok.K == KwConstant) { 278 E.Constant = true; 279 continue; 280 } 281 if (Tok.K == KwPrivate) { 282 E.Private = true; 283 continue; 284 } 285 if (Tok.K == EqualEqual) { 286 read(); 287 E.ImportName = std::string(Tok.Value); 288 continue; 289 } 290 // EXPORTAS must be at the end of export definition 291 if (Tok.K == KwExportAs) { 292 read(); 293 if (Tok.K == Eof) 294 return createError( 295 "unexpected end of file, EXPORTAS identifier expected"); 296 E.ExportAs = std::string(Tok.Value); 297 } else { 298 unget(); 299 } 300 Info.Exports.push_back(E); 301 return Error::success(); 302 } 303 } 304 305 // HEAPSIZE/STACKSIZE reserve[,commit] 306 Error parseNumbers(uint64_t *Reserve, uint64_t *Commit) { 307 if (Error Err = readAsInt(Reserve)) 308 return Err; 309 read(); 310 if (Tok.K != Comma) { 311 unget(); 312 Commit = nullptr; 313 return Error::success(); 314 } 315 if (Error Err = readAsInt(Commit)) 316 return Err; 317 return Error::success(); 318 } 319 320 // NAME outputPath [BASE=address] 321 Error parseName(std::string *Out, uint64_t *Baseaddr) { 322 read(); 323 if (Tok.K == Identifier) { 324 *Out = std::string(Tok.Value); 325 } else { 326 *Out = ""; 327 unget(); 328 return Error::success(); 329 } 330 read(); 331 if (Tok.K == KwBase) { 332 if (Error Err = expect(Equal, "'=' expected")) 333 return Err; 334 if (Error Err = readAsInt(Baseaddr)) 335 return Err; 336 } else { 337 unget(); 338 *Baseaddr = 0; 339 } 340 return Error::success(); 341 } 342 343 // VERSION major[.minor] 344 Error parseVersion(uint32_t *Major, uint32_t *Minor) { 345 read(); 346 if (Tok.K != Identifier) 347 return createError("identifier expected, but got " + Tok.Value); 348 StringRef V1, V2; 349 std::tie(V1, V2) = Tok.Value.split('.'); 350 if (V1.getAsInteger(10, *Major)) 351 return createError("integer expected, but got " + Tok.Value); 352 if (V2.empty()) 353 *Minor = 0; 354 else if (V2.getAsInteger(10, *Minor)) 355 return createError("integer expected, but got " + Tok.Value); 356 return Error::success(); 357 } 358 359 Lexer Lex; 360 Token Tok; 361 std::vector<Token> Stack; 362 MachineTypes Machine; 363 COFFModuleDefinition Info; 364 bool MingwDef; 365 bool AddUnderscores; 366 }; 367 368 Expected<COFFModuleDefinition> parseCOFFModuleDefinition(MemoryBufferRef MB, 369 MachineTypes Machine, 370 bool MingwDef, 371 bool AddUnderscores) { 372 return Parser(MB.getBuffer(), Machine, MingwDef, AddUnderscores).parse(); 373 } 374 375 } // namespace object 376 } // namespace llvm 377