1 //===--- COFFModuleDefinition.cpp - Simple DEF parser ---------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // Windows-specific. 10 // A parser for the module-definition file (.def file). 11 // 12 // The format of module-definition files are described in this document: 13 // https://msdn.microsoft.com/en-us/library/28d6s79h.aspx 14 // 15 //===----------------------------------------------------------------------===// 16 17 #include "llvm/Object/COFFModuleDefinition.h" 18 #include "llvm/ADT/StringRef.h" 19 #include "llvm/ADT/StringSwitch.h" 20 #include "llvm/Object/COFF.h" 21 #include "llvm/Object/COFFImportFile.h" 22 #include "llvm/Object/Error.h" 23 #include "llvm/Support/Error.h" 24 #include "llvm/Support/Path.h" 25 #include "llvm/Support/raw_ostream.h" 26 27 using namespace llvm::COFF; 28 using namespace llvm; 29 30 namespace llvm { 31 namespace object { 32 33 enum Kind { 34 Unknown, 35 Eof, 36 Identifier, 37 Comma, 38 Equal, 39 EqualEqual, 40 KwBase, 41 KwConstant, 42 KwData, 43 KwExports, 44 KwHeapsize, 45 KwLibrary, 46 KwName, 47 KwNoname, 48 KwPrivate, 49 KwStacksize, 50 KwVersion, 51 }; 52 53 struct Token { 54 explicit Token(Kind T = Unknown, StringRef S = "") : K(T), Value(S) {} 55 Kind K; 56 StringRef Value; 57 }; 58 59 static bool isDecorated(StringRef Sym, bool MingwDef) { 60 // In def files, the symbols can either be listed decorated or undecorated. 61 // 62 // - For cdecl symbols, only the undecorated form is allowed. 63 // - For fastcall and vectorcall symbols, both fully decorated or 64 // undecorated forms can be present. 65 // - For stdcall symbols in non-MinGW environments, the decorated form is 66 // fully decorated with leading underscore and trailing stack argument 67 // size - like "_Func@0". 68 // - In MinGW def files, a decorated stdcall symbol does not include the 69 // leading underscore though, like "Func@0". 70 71 // This function controls whether a leading underscore should be added to 72 // the given symbol name or not. For MinGW, treat a stdcall symbol name such 73 // as "Func@0" as undecorated, i.e. a leading underscore must be added. 74 // For non-MinGW, look for '@' in the whole string and consider "_Func@0" 75 // as decorated, i.e. don't add any more leading underscores. 76 // We can't check for a leading underscore here, since function names 77 // themselves can start with an underscore, while a second one still needs 78 // to be added. 79 return Sym.startswith("@") || Sym.contains("@@") || Sym.startswith("?") || 80 (!MingwDef && Sym.contains('@')); 81 } 82 83 class Lexer { 84 public: 85 Lexer(StringRef S) : Buf(S) {} 86 87 Token lex() { 88 Buf = Buf.trim(); 89 if (Buf.empty()) 90 return Token(Eof); 91 92 switch (Buf[0]) { 93 case '\0': 94 return Token(Eof); 95 case ';': { 96 size_t End = Buf.find('\n'); 97 Buf = (End == Buf.npos) ? "" : Buf.drop_front(End); 98 return lex(); 99 } 100 case '=': 101 Buf = Buf.drop_front(); 102 if (Buf.startswith("=")) { 103 Buf = Buf.drop_front(); 104 return Token(EqualEqual, "=="); 105 } 106 return Token(Equal, "="); 107 case ',': 108 Buf = Buf.drop_front(); 109 return Token(Comma, ","); 110 case '"': { 111 StringRef S; 112 std::tie(S, Buf) = Buf.substr(1).split('"'); 113 return Token(Identifier, S); 114 } 115 default: { 116 size_t End = Buf.find_first_of("=,;\r\n \t\v"); 117 StringRef Word = Buf.substr(0, End); 118 Kind K = llvm::StringSwitch<Kind>(Word) 119 .Case("BASE", KwBase) 120 .Case("CONSTANT", KwConstant) 121 .Case("DATA", KwData) 122 .Case("EXPORTS", KwExports) 123 .Case("HEAPSIZE", KwHeapsize) 124 .Case("LIBRARY", KwLibrary) 125 .Case("NAME", KwName) 126 .Case("NONAME", KwNoname) 127 .Case("PRIVATE", KwPrivate) 128 .Case("STACKSIZE", KwStacksize) 129 .Case("VERSION", KwVersion) 130 .Default(Identifier); 131 Buf = (End == Buf.npos) ? "" : Buf.drop_front(End); 132 return Token(K, Word); 133 } 134 } 135 } 136 137 private: 138 StringRef Buf; 139 }; 140 141 class Parser { 142 public: 143 explicit Parser(StringRef S, MachineTypes M, bool B) 144 : Lex(S), Machine(M), MingwDef(B) {} 145 146 Expected<COFFModuleDefinition> parse() { 147 do { 148 if (Error Err = parseOne()) 149 return std::move(Err); 150 } while (Tok.K != Eof); 151 return Info; 152 } 153 154 private: 155 void read() { 156 if (Stack.empty()) { 157 Tok = Lex.lex(); 158 return; 159 } 160 Tok = Stack.back(); 161 Stack.pop_back(); 162 } 163 164 Error readAsInt(uint64_t *I) { 165 read(); 166 if (Tok.K != Identifier || Tok.Value.getAsInteger(10, *I)) 167 return createError("integer expected"); 168 return Error::success(); 169 } 170 171 Error expect(Kind Expected, StringRef Msg) { 172 read(); 173 if (Tok.K != Expected) 174 return createError(Msg); 175 return Error::success(); 176 } 177 178 void unget() { Stack.push_back(Tok); } 179 180 Error parseOne() { 181 read(); 182 switch (Tok.K) { 183 case Eof: 184 return Error::success(); 185 case KwExports: 186 for (;;) { 187 read(); 188 if (Tok.K != Identifier) { 189 unget(); 190 return Error::success(); 191 } 192 if (Error Err = parseExport()) 193 return Err; 194 } 195 case KwHeapsize: 196 return parseNumbers(&Info.HeapReserve, &Info.HeapCommit); 197 case KwStacksize: 198 return parseNumbers(&Info.StackReserve, &Info.StackCommit); 199 case KwLibrary: 200 case KwName: { 201 bool IsDll = Tok.K == KwLibrary; // Check before parseName. 202 std::string Name; 203 if (Error Err = parseName(&Name, &Info.ImageBase)) 204 return Err; 205 206 Info.ImportName = Name; 207 208 // Set the output file, but don't override /out if it was already passed. 209 if (Info.OutputFile.empty()) { 210 Info.OutputFile = Name; 211 // Append the appropriate file extension if not already present. 212 if (!sys::path::has_extension(Name)) 213 Info.OutputFile += IsDll ? ".dll" : ".exe"; 214 } 215 216 return Error::success(); 217 } 218 case KwVersion: 219 return parseVersion(&Info.MajorImageVersion, &Info.MinorImageVersion); 220 default: 221 return createError("unknown directive: " + Tok.Value); 222 } 223 } 224 225 Error parseExport() { 226 COFFShortExport E; 227 E.Name = std::string(Tok.Value); 228 read(); 229 if (Tok.K == Equal) { 230 read(); 231 if (Tok.K != Identifier) 232 return createError("identifier expected, but got " + Tok.Value); 233 E.ExtName = E.Name; 234 E.Name = std::string(Tok.Value); 235 } else { 236 unget(); 237 } 238 239 if (Machine == IMAGE_FILE_MACHINE_I386) { 240 if (!isDecorated(E.Name, MingwDef)) 241 E.Name = (std::string("_").append(E.Name)); 242 if (!E.ExtName.empty() && !isDecorated(E.ExtName, MingwDef)) 243 E.ExtName = (std::string("_").append(E.ExtName)); 244 } 245 246 for (;;) { 247 read(); 248 if (Tok.K == Identifier && Tok.Value[0] == '@') { 249 if (Tok.Value == "@") { 250 // "foo @ 10" 251 read(); 252 Tok.Value.getAsInteger(10, E.Ordinal); 253 } else if (Tok.Value.drop_front().getAsInteger(10, E.Ordinal)) { 254 // "foo \n @bar" - Not an ordinal modifier at all, but the next 255 // export (fastcall decorated) - complete the current one. 256 unget(); 257 Info.Exports.push_back(E); 258 return Error::success(); 259 } 260 // "foo @10" 261 read(); 262 if (Tok.K == KwNoname) { 263 E.Noname = true; 264 } else { 265 unget(); 266 } 267 continue; 268 } 269 if (Tok.K == KwData) { 270 E.Data = true; 271 continue; 272 } 273 if (Tok.K == KwConstant) { 274 E.Constant = true; 275 continue; 276 } 277 if (Tok.K == KwPrivate) { 278 E.Private = true; 279 continue; 280 } 281 if (Tok.K == EqualEqual) { 282 read(); 283 E.AliasTarget = std::string(Tok.Value); 284 if (Machine == IMAGE_FILE_MACHINE_I386 && !isDecorated(E.AliasTarget, MingwDef)) 285 E.AliasTarget = std::string("_").append(E.AliasTarget); 286 continue; 287 } 288 unget(); 289 Info.Exports.push_back(E); 290 return Error::success(); 291 } 292 } 293 294 // HEAPSIZE/STACKSIZE reserve[,commit] 295 Error parseNumbers(uint64_t *Reserve, uint64_t *Commit) { 296 if (Error Err = readAsInt(Reserve)) 297 return Err; 298 read(); 299 if (Tok.K != Comma) { 300 unget(); 301 Commit = nullptr; 302 return Error::success(); 303 } 304 if (Error Err = readAsInt(Commit)) 305 return Err; 306 return Error::success(); 307 } 308 309 // NAME outputPath [BASE=address] 310 Error parseName(std::string *Out, uint64_t *Baseaddr) { 311 read(); 312 if (Tok.K == Identifier) { 313 *Out = std::string(Tok.Value); 314 } else { 315 *Out = ""; 316 unget(); 317 return Error::success(); 318 } 319 read(); 320 if (Tok.K == KwBase) { 321 if (Error Err = expect(Equal, "'=' expected")) 322 return Err; 323 if (Error Err = readAsInt(Baseaddr)) 324 return Err; 325 } else { 326 unget(); 327 *Baseaddr = 0; 328 } 329 return Error::success(); 330 } 331 332 // VERSION major[.minor] 333 Error parseVersion(uint32_t *Major, uint32_t *Minor) { 334 read(); 335 if (Tok.K != Identifier) 336 return createError("identifier expected, but got " + Tok.Value); 337 StringRef V1, V2; 338 std::tie(V1, V2) = Tok.Value.split('.'); 339 if (V1.getAsInteger(10, *Major)) 340 return createError("integer expected, but got " + Tok.Value); 341 if (V2.empty()) 342 *Minor = 0; 343 else if (V2.getAsInteger(10, *Minor)) 344 return createError("integer expected, but got " + Tok.Value); 345 return Error::success(); 346 } 347 348 Lexer Lex; 349 Token Tok; 350 std::vector<Token> Stack; 351 MachineTypes Machine; 352 COFFModuleDefinition Info; 353 bool MingwDef; 354 }; 355 356 Expected<COFFModuleDefinition> parseCOFFModuleDefinition(MemoryBufferRef MB, 357 MachineTypes Machine, 358 bool MingwDef) { 359 return Parser(MB.getBuffer(), Machine, MingwDef).parse(); 360 } 361 362 } // namespace object 363 } // namespace llvm 364