1 //===--- COFFModuleDefinition.cpp - Simple DEF parser ---------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // Windows-specific. 10 // A parser for the module-definition file (.def file). 11 // 12 // The format of module-definition files are described in this document: 13 // https://msdn.microsoft.com/en-us/library/28d6s79h.aspx 14 // 15 //===----------------------------------------------------------------------===// 16 17 #include "llvm/Object/COFFModuleDefinition.h" 18 #include "llvm/ADT/StringRef.h" 19 #include "llvm/ADT/StringSwitch.h" 20 #include "llvm/Object/COFFImportFile.h" 21 #include "llvm/Object/Error.h" 22 #include "llvm/Support/Error.h" 23 #include "llvm/Support/Path.h" 24 25 using namespace llvm::COFF; 26 using namespace llvm; 27 28 namespace llvm { 29 namespace object { 30 31 enum Kind { 32 Unknown, 33 Eof, 34 Identifier, 35 Comma, 36 Equal, 37 EqualEqual, 38 KwBase, 39 KwConstant, 40 KwData, 41 KwExports, 42 KwHeapsize, 43 KwLibrary, 44 KwName, 45 KwNoname, 46 KwPrivate, 47 KwStacksize, 48 KwVersion, 49 }; 50 51 struct Token { 52 explicit Token(Kind T = Unknown, StringRef S = "") : K(T), Value(S) {} 53 Kind K; 54 StringRef Value; 55 }; 56 57 static bool isDecorated(StringRef Sym, bool MingwDef) { 58 // In def files, the symbols can either be listed decorated or undecorated. 59 // 60 // - For cdecl symbols, only the undecorated form is allowed. 61 // - For fastcall and vectorcall symbols, both fully decorated or 62 // undecorated forms can be present. 63 // - For stdcall symbols in non-MinGW environments, the decorated form is 64 // fully decorated with leading underscore and trailing stack argument 65 // size - like "_Func@0". 66 // - In MinGW def files, a decorated stdcall symbol does not include the 67 // leading underscore though, like "Func@0". 68 69 // This function controls whether a leading underscore should be added to 70 // the given symbol name or not. For MinGW, treat a stdcall symbol name such 71 // as "Func@0" as undecorated, i.e. a leading underscore must be added. 72 // For non-MinGW, look for '@' in the whole string and consider "_Func@0" 73 // as decorated, i.e. don't add any more leading underscores. 74 // We can't check for a leading underscore here, since function names 75 // themselves can start with an underscore, while a second one still needs 76 // to be added. 77 return Sym.startswith("@") || Sym.contains("@@") || Sym.startswith("?") || 78 (!MingwDef && Sym.contains('@')); 79 } 80 81 class Lexer { 82 public: 83 Lexer(StringRef S) : Buf(S) {} 84 85 Token lex() { 86 Buf = Buf.trim(); 87 if (Buf.empty()) 88 return Token(Eof); 89 90 switch (Buf[0]) { 91 case '\0': 92 return Token(Eof); 93 case ';': { 94 size_t End = Buf.find('\n'); 95 Buf = (End == Buf.npos) ? "" : Buf.drop_front(End); 96 return lex(); 97 } 98 case '=': 99 Buf = Buf.drop_front(); 100 if (Buf.startswith("=")) { 101 Buf = Buf.drop_front(); 102 return Token(EqualEqual, "=="); 103 } 104 return Token(Equal, "="); 105 case ',': 106 Buf = Buf.drop_front(); 107 return Token(Comma, ","); 108 case '"': { 109 StringRef S; 110 std::tie(S, Buf) = Buf.substr(1).split('"'); 111 return Token(Identifier, S); 112 } 113 default: { 114 size_t End = Buf.find_first_of("=,;\r\n \t\v"); 115 StringRef Word = Buf.substr(0, End); 116 Kind K = llvm::StringSwitch<Kind>(Word) 117 .Case("BASE", KwBase) 118 .Case("CONSTANT", KwConstant) 119 .Case("DATA", KwData) 120 .Case("EXPORTS", KwExports) 121 .Case("HEAPSIZE", KwHeapsize) 122 .Case("LIBRARY", KwLibrary) 123 .Case("NAME", KwName) 124 .Case("NONAME", KwNoname) 125 .Case("PRIVATE", KwPrivate) 126 .Case("STACKSIZE", KwStacksize) 127 .Case("VERSION", KwVersion) 128 .Default(Identifier); 129 Buf = (End == Buf.npos) ? "" : Buf.drop_front(End); 130 return Token(K, Word); 131 } 132 } 133 } 134 135 private: 136 StringRef Buf; 137 }; 138 139 class Parser { 140 public: 141 explicit Parser(StringRef S, MachineTypes M, bool B) 142 : Lex(S), Machine(M), MingwDef(B) {} 143 144 Expected<COFFModuleDefinition> parse() { 145 do { 146 if (Error Err = parseOne()) 147 return std::move(Err); 148 } while (Tok.K != Eof); 149 return Info; 150 } 151 152 private: 153 void read() { 154 if (Stack.empty()) { 155 Tok = Lex.lex(); 156 return; 157 } 158 Tok = Stack.back(); 159 Stack.pop_back(); 160 } 161 162 Error readAsInt(uint64_t *I) { 163 read(); 164 if (Tok.K != Identifier || Tok.Value.getAsInteger(10, *I)) 165 return createError("integer expected"); 166 return Error::success(); 167 } 168 169 Error expect(Kind Expected, StringRef Msg) { 170 read(); 171 if (Tok.K != Expected) 172 return createError(Msg); 173 return Error::success(); 174 } 175 176 void unget() { Stack.push_back(Tok); } 177 178 Error parseOne() { 179 read(); 180 switch (Tok.K) { 181 case Eof: 182 return Error::success(); 183 case KwExports: 184 for (;;) { 185 read(); 186 if (Tok.K != Identifier) { 187 unget(); 188 return Error::success(); 189 } 190 if (Error Err = parseExport()) 191 return Err; 192 } 193 case KwHeapsize: 194 return parseNumbers(&Info.HeapReserve, &Info.HeapCommit); 195 case KwStacksize: 196 return parseNumbers(&Info.StackReserve, &Info.StackCommit); 197 case KwLibrary: 198 case KwName: { 199 bool IsDll = Tok.K == KwLibrary; // Check before parseName. 200 std::string Name; 201 if (Error Err = parseName(&Name, &Info.ImageBase)) 202 return Err; 203 204 Info.ImportName = Name; 205 206 // Set the output file, but don't override /out if it was already passed. 207 if (Info.OutputFile.empty()) { 208 Info.OutputFile = Name; 209 // Append the appropriate file extension if not already present. 210 if (!sys::path::has_extension(Name)) 211 Info.OutputFile += IsDll ? ".dll" : ".exe"; 212 } 213 214 return Error::success(); 215 } 216 case KwVersion: 217 return parseVersion(&Info.MajorImageVersion, &Info.MinorImageVersion); 218 default: 219 return createError("unknown directive: " + Tok.Value); 220 } 221 } 222 223 Error parseExport() { 224 COFFShortExport E; 225 E.Name = std::string(Tok.Value); 226 read(); 227 if (Tok.K == Equal) { 228 read(); 229 if (Tok.K != Identifier) 230 return createError("identifier expected, but got " + Tok.Value); 231 E.ExtName = E.Name; 232 E.Name = std::string(Tok.Value); 233 } else { 234 unget(); 235 } 236 237 if (Machine == IMAGE_FILE_MACHINE_I386) { 238 if (!isDecorated(E.Name, MingwDef)) 239 E.Name = (std::string("_").append(E.Name)); 240 if (!E.ExtName.empty() && !isDecorated(E.ExtName, MingwDef)) 241 E.ExtName = (std::string("_").append(E.ExtName)); 242 } 243 244 for (;;) { 245 read(); 246 if (Tok.K == Identifier && Tok.Value[0] == '@') { 247 if (Tok.Value == "@") { 248 // "foo @ 10" 249 read(); 250 Tok.Value.getAsInteger(10, E.Ordinal); 251 } else if (Tok.Value.drop_front().getAsInteger(10, E.Ordinal)) { 252 // "foo \n @bar" - Not an ordinal modifier at all, but the next 253 // export (fastcall decorated) - complete the current one. 254 unget(); 255 Info.Exports.push_back(E); 256 return Error::success(); 257 } 258 // "foo @10" 259 read(); 260 if (Tok.K == KwNoname) { 261 E.Noname = true; 262 } else { 263 unget(); 264 } 265 continue; 266 } 267 if (Tok.K == KwData) { 268 E.Data = true; 269 continue; 270 } 271 if (Tok.K == KwConstant) { 272 E.Constant = true; 273 continue; 274 } 275 if (Tok.K == KwPrivate) { 276 E.Private = true; 277 continue; 278 } 279 if (Tok.K == EqualEqual) { 280 read(); 281 E.AliasTarget = std::string(Tok.Value); 282 if (Machine == IMAGE_FILE_MACHINE_I386 && !isDecorated(E.AliasTarget, MingwDef)) 283 E.AliasTarget = std::string("_").append(E.AliasTarget); 284 continue; 285 } 286 unget(); 287 Info.Exports.push_back(E); 288 return Error::success(); 289 } 290 } 291 292 // HEAPSIZE/STACKSIZE reserve[,commit] 293 Error parseNumbers(uint64_t *Reserve, uint64_t *Commit) { 294 if (Error Err = readAsInt(Reserve)) 295 return Err; 296 read(); 297 if (Tok.K != Comma) { 298 unget(); 299 Commit = nullptr; 300 return Error::success(); 301 } 302 if (Error Err = readAsInt(Commit)) 303 return Err; 304 return Error::success(); 305 } 306 307 // NAME outputPath [BASE=address] 308 Error parseName(std::string *Out, uint64_t *Baseaddr) { 309 read(); 310 if (Tok.K == Identifier) { 311 *Out = std::string(Tok.Value); 312 } else { 313 *Out = ""; 314 unget(); 315 return Error::success(); 316 } 317 read(); 318 if (Tok.K == KwBase) { 319 if (Error Err = expect(Equal, "'=' expected")) 320 return Err; 321 if (Error Err = readAsInt(Baseaddr)) 322 return Err; 323 } else { 324 unget(); 325 *Baseaddr = 0; 326 } 327 return Error::success(); 328 } 329 330 // VERSION major[.minor] 331 Error parseVersion(uint32_t *Major, uint32_t *Minor) { 332 read(); 333 if (Tok.K != Identifier) 334 return createError("identifier expected, but got " + Tok.Value); 335 StringRef V1, V2; 336 std::tie(V1, V2) = Tok.Value.split('.'); 337 if (V1.getAsInteger(10, *Major)) 338 return createError("integer expected, but got " + Tok.Value); 339 if (V2.empty()) 340 *Minor = 0; 341 else if (V2.getAsInteger(10, *Minor)) 342 return createError("integer expected, but got " + Tok.Value); 343 return Error::success(); 344 } 345 346 Lexer Lex; 347 Token Tok; 348 std::vector<Token> Stack; 349 MachineTypes Machine; 350 COFFModuleDefinition Info; 351 bool MingwDef; 352 }; 353 354 Expected<COFFModuleDefinition> parseCOFFModuleDefinition(MemoryBufferRef MB, 355 MachineTypes Machine, 356 bool MingwDef) { 357 return Parser(MB.getBuffer(), Machine, MingwDef).parse(); 358 } 359 360 } // namespace object 361 } // namespace llvm 362