xref: /freebsd/contrib/llvm-project/llvm/lib/Object/COFFModuleDefinition.cpp (revision 3ceba58a7509418b47b8fca2d2b6bbf088714e26)
1 //===--- COFFModuleDefinition.cpp - Simple DEF parser ---------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // Windows-specific.
10 // A parser for the module-definition file (.def file).
11 //
12 // The format of module-definition files are described in this document:
13 // https://msdn.microsoft.com/en-us/library/28d6s79h.aspx
14 //
15 //===----------------------------------------------------------------------===//
16 
17 #include "llvm/Object/COFFModuleDefinition.h"
18 #include "llvm/ADT/StringRef.h"
19 #include "llvm/ADT/StringSwitch.h"
20 #include "llvm/Object/COFFImportFile.h"
21 #include "llvm/Object/Error.h"
22 #include "llvm/Support/Error.h"
23 #include "llvm/Support/Path.h"
24 
25 using namespace llvm::COFF;
26 using namespace llvm;
27 
28 namespace llvm {
29 namespace object {
30 
31 enum Kind {
32   Unknown,
33   Eof,
34   Identifier,
35   Comma,
36   Equal,
37   EqualEqual,
38   KwBase,
39   KwConstant,
40   KwData,
41   KwExports,
42   KwExportAs,
43   KwHeapsize,
44   KwLibrary,
45   KwName,
46   KwNoname,
47   KwPrivate,
48   KwStacksize,
49   KwVersion,
50 };
51 
52 struct Token {
53   explicit Token(Kind T = Unknown, StringRef S = "") : K(T), Value(S) {}
54   Kind K;
55   StringRef Value;
56 };
57 
58 static bool isDecorated(StringRef Sym, bool MingwDef) {
59   // In def files, the symbols can either be listed decorated or undecorated.
60   //
61   // - For cdecl symbols, only the undecorated form is allowed.
62   // - For fastcall and vectorcall symbols, both fully decorated or
63   //   undecorated forms can be present.
64   // - For stdcall symbols in non-MinGW environments, the decorated form is
65   //   fully decorated with leading underscore and trailing stack argument
66   //   size - like "_Func@0".
67   // - In MinGW def files, a decorated stdcall symbol does not include the
68   //   leading underscore though, like "Func@0".
69 
70   // This function controls whether a leading underscore should be added to
71   // the given symbol name or not. For MinGW, treat a stdcall symbol name such
72   // as "Func@0" as undecorated, i.e. a leading underscore must be added.
73   // For non-MinGW, look for '@' in the whole string and consider "_Func@0"
74   // as decorated, i.e. don't add any more leading underscores.
75   // We can't check for a leading underscore here, since function names
76   // themselves can start with an underscore, while a second one still needs
77   // to be added.
78   return Sym.starts_with("@") || Sym.contains("@@") || Sym.starts_with("?") ||
79          (!MingwDef && Sym.contains('@'));
80 }
81 
82 class Lexer {
83 public:
84   Lexer(StringRef S) : Buf(S) {}
85 
86   Token lex() {
87     Buf = Buf.trim();
88     if (Buf.empty())
89       return Token(Eof);
90 
91     switch (Buf[0]) {
92     case '\0':
93       return Token(Eof);
94     case ';': {
95       size_t End = Buf.find('\n');
96       Buf = (End == Buf.npos) ? "" : Buf.drop_front(End);
97       return lex();
98     }
99     case '=':
100       Buf = Buf.drop_front();
101       if (Buf.consume_front("="))
102         return Token(EqualEqual, "==");
103       return Token(Equal, "=");
104     case ',':
105       Buf = Buf.drop_front();
106       return Token(Comma, ",");
107     case '"': {
108       StringRef S;
109       std::tie(S, Buf) = Buf.substr(1).split('"');
110       return Token(Identifier, S);
111     }
112     default: {
113       size_t End = Buf.find_first_of("=,;\r\n \t\v");
114       StringRef Word = Buf.substr(0, End);
115       Kind K = llvm::StringSwitch<Kind>(Word)
116                    .Case("BASE", KwBase)
117                    .Case("CONSTANT", KwConstant)
118                    .Case("DATA", KwData)
119                    .Case("EXPORTS", KwExports)
120                    .Case("EXPORTAS", KwExportAs)
121                    .Case("HEAPSIZE", KwHeapsize)
122                    .Case("LIBRARY", KwLibrary)
123                    .Case("NAME", KwName)
124                    .Case("NONAME", KwNoname)
125                    .Case("PRIVATE", KwPrivate)
126                    .Case("STACKSIZE", KwStacksize)
127                    .Case("VERSION", KwVersion)
128                    .Default(Identifier);
129       Buf = (End == Buf.npos) ? "" : Buf.drop_front(End);
130       return Token(K, Word);
131     }
132     }
133   }
134 
135 private:
136   StringRef Buf;
137 };
138 
139 class Parser {
140 public:
141   explicit Parser(StringRef S, MachineTypes M, bool B, bool AU)
142       : Lex(S), Machine(M), MingwDef(B), AddUnderscores(AU) {
143     if (Machine != IMAGE_FILE_MACHINE_I386)
144       AddUnderscores = false;
145   }
146 
147   Expected<COFFModuleDefinition> parse() {
148     do {
149       if (Error Err = parseOne())
150         return std::move(Err);
151     } while (Tok.K != Eof);
152     return Info;
153   }
154 
155 private:
156   void read() {
157     if (Stack.empty()) {
158       Tok = Lex.lex();
159       return;
160     }
161     Tok = Stack.back();
162     Stack.pop_back();
163   }
164 
165   Error readAsInt(uint64_t *I) {
166     read();
167     if (Tok.K != Identifier || Tok.Value.getAsInteger(10, *I))
168       return createError("integer expected");
169     return Error::success();
170   }
171 
172   Error expect(Kind Expected, StringRef Msg) {
173     read();
174     if (Tok.K != Expected)
175       return createError(Msg);
176     return Error::success();
177   }
178 
179   void unget() { Stack.push_back(Tok); }
180 
181   Error parseOne() {
182     read();
183     switch (Tok.K) {
184     case Eof:
185       return Error::success();
186     case KwExports:
187       for (;;) {
188         read();
189         if (Tok.K != Identifier) {
190           unget();
191           return Error::success();
192         }
193         if (Error Err = parseExport())
194           return Err;
195       }
196     case KwHeapsize:
197       return parseNumbers(&Info.HeapReserve, &Info.HeapCommit);
198     case KwStacksize:
199       return parseNumbers(&Info.StackReserve, &Info.StackCommit);
200     case KwLibrary:
201     case KwName: {
202       bool IsDll = Tok.K == KwLibrary; // Check before parseName.
203       std::string Name;
204       if (Error Err = parseName(&Name, &Info.ImageBase))
205         return Err;
206 
207       Info.ImportName = Name;
208 
209       // Set the output file, but don't override /out if it was already passed.
210       if (Info.OutputFile.empty()) {
211         Info.OutputFile = Name;
212         // Append the appropriate file extension if not already present.
213         if (!sys::path::has_extension(Name))
214           Info.OutputFile += IsDll ? ".dll" : ".exe";
215       }
216 
217       return Error::success();
218     }
219     case KwVersion:
220       return parseVersion(&Info.MajorImageVersion, &Info.MinorImageVersion);
221     default:
222       return createError("unknown directive: " + Tok.Value);
223     }
224   }
225 
226   Error parseExport() {
227     COFFShortExport E;
228     E.Name = std::string(Tok.Value);
229     read();
230     if (Tok.K == Equal) {
231       read();
232       if (Tok.K != Identifier)
233         return createError("identifier expected, but got " + Tok.Value);
234       E.ExtName = E.Name;
235       E.Name = std::string(Tok.Value);
236     } else {
237       unget();
238     }
239 
240     if (AddUnderscores) {
241       if (!isDecorated(E.Name, MingwDef))
242         E.Name = (std::string("_").append(E.Name));
243       if (!E.ExtName.empty() && !isDecorated(E.ExtName, MingwDef))
244         E.ExtName = (std::string("_").append(E.ExtName));
245     }
246 
247     for (;;) {
248       read();
249       if (Tok.K == Identifier && Tok.Value[0] == '@') {
250         if (Tok.Value == "@") {
251           // "foo @ 10"
252           read();
253           Tok.Value.getAsInteger(10, E.Ordinal);
254         } else if (Tok.Value.drop_front().getAsInteger(10, E.Ordinal)) {
255           // "foo \n @bar" - Not an ordinal modifier at all, but the next
256           // export (fastcall decorated) - complete the current one.
257           unget();
258           Info.Exports.push_back(E);
259           return Error::success();
260         }
261         // "foo @10"
262         read();
263         if (Tok.K == KwNoname) {
264           E.Noname = true;
265         } else {
266           unget();
267         }
268         continue;
269       }
270       if (Tok.K == KwData) {
271         E.Data = true;
272         continue;
273       }
274       if (Tok.K == KwConstant) {
275         E.Constant = true;
276         continue;
277       }
278       if (Tok.K == KwPrivate) {
279         E.Private = true;
280         continue;
281       }
282       if (Tok.K == EqualEqual) {
283         read();
284         E.ImportName = std::string(Tok.Value);
285         continue;
286       }
287       // EXPORTAS must be at the end of export definition
288       if (Tok.K == KwExportAs) {
289         read();
290         if (Tok.K == Eof)
291           return createError(
292               "unexpected end of file, EXPORTAS identifier expected");
293         E.ExportAs = std::string(Tok.Value);
294       } else {
295         unget();
296       }
297       Info.Exports.push_back(E);
298       return Error::success();
299     }
300   }
301 
302   // HEAPSIZE/STACKSIZE reserve[,commit]
303   Error parseNumbers(uint64_t *Reserve, uint64_t *Commit) {
304     if (Error Err = readAsInt(Reserve))
305       return Err;
306     read();
307     if (Tok.K != Comma) {
308       unget();
309       Commit = nullptr;
310       return Error::success();
311     }
312     if (Error Err = readAsInt(Commit))
313       return Err;
314     return Error::success();
315   }
316 
317   // NAME outputPath [BASE=address]
318   Error parseName(std::string *Out, uint64_t *Baseaddr) {
319     read();
320     if (Tok.K == Identifier) {
321       *Out = std::string(Tok.Value);
322     } else {
323       *Out = "";
324       unget();
325       return Error::success();
326     }
327     read();
328     if (Tok.K == KwBase) {
329       if (Error Err = expect(Equal, "'=' expected"))
330         return Err;
331       if (Error Err = readAsInt(Baseaddr))
332         return Err;
333     } else {
334       unget();
335       *Baseaddr = 0;
336     }
337     return Error::success();
338   }
339 
340   // VERSION major[.minor]
341   Error parseVersion(uint32_t *Major, uint32_t *Minor) {
342     read();
343     if (Tok.K != Identifier)
344       return createError("identifier expected, but got " + Tok.Value);
345     StringRef V1, V2;
346     std::tie(V1, V2) = Tok.Value.split('.');
347     if (V1.getAsInteger(10, *Major))
348       return createError("integer expected, but got " + Tok.Value);
349     if (V2.empty())
350       *Minor = 0;
351     else if (V2.getAsInteger(10, *Minor))
352       return createError("integer expected, but got " + Tok.Value);
353     return Error::success();
354   }
355 
356   Lexer Lex;
357   Token Tok;
358   std::vector<Token> Stack;
359   MachineTypes Machine;
360   COFFModuleDefinition Info;
361   bool MingwDef;
362   bool AddUnderscores;
363 };
364 
365 Expected<COFFModuleDefinition> parseCOFFModuleDefinition(MemoryBufferRef MB,
366                                                          MachineTypes Machine,
367                                                          bool MingwDef,
368                                                          bool AddUnderscores) {
369   return Parser(MB.getBuffer(), Machine, MingwDef, AddUnderscores).parse();
370 }
371 
372 } // namespace object
373 } // namespace llvm
374