xref: /freebsd/contrib/llvm-project/llvm/lib/Object/COFFModuleDefinition.cpp (revision 770cf0a5f02dc8983a89c6568d741fbc25baa999)
1 //===--- COFFModuleDefinition.cpp - Simple DEF parser ---------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // Windows-specific.
10 // A parser for the module-definition file (.def file).
11 //
12 // The format of module-definition files are described in this document:
13 // https://msdn.microsoft.com/en-us/library/28d6s79h.aspx
14 //
15 //===----------------------------------------------------------------------===//
16 
17 #include "llvm/Object/COFFModuleDefinition.h"
18 #include "llvm/ADT/StringRef.h"
19 #include "llvm/ADT/StringSwitch.h"
20 #include "llvm/Object/COFFImportFile.h"
21 #include "llvm/Object/Error.h"
22 #include "llvm/Support/Error.h"
23 #include "llvm/Support/Path.h"
24 
25 using namespace llvm::COFF;
26 using namespace llvm;
27 
28 namespace llvm {
29 namespace object {
30 
31 enum Kind {
32   Unknown,
33   Eof,
34   Identifier,
35   Comma,
36   Equal,
37   EqualEqual,
38   KwBase,
39   KwConstant,
40   KwData,
41   KwExports,
42   KwExportAs,
43   KwHeapsize,
44   KwLibrary,
45   KwName,
46   KwNoname,
47   KwPrivate,
48   KwStacksize,
49   KwVersion,
50 };
51 
52 struct Token {
53   explicit Token(Kind T = Unknown, StringRef S = "") : K(T), Value(S) {}
54   Kind K;
55   StringRef Value;
56 };
57 
58 static bool isDecorated(StringRef Sym, bool MingwDef) {
59   // In def files, the symbols can either be listed decorated or undecorated.
60   //
61   // - For cdecl symbols, only the undecorated form is allowed.
62   // - For fastcall and vectorcall symbols, both fully decorated or
63   //   undecorated forms can be present.
64   // - For stdcall symbols in non-MinGW environments, the decorated form is
65   //   fully decorated with leading underscore and trailing stack argument
66   //   size - like "_Func@0".
67   // - In MinGW def files, a decorated stdcall symbol does not include the
68   //   leading underscore though, like "Func@0".
69 
70   // This function controls whether a leading underscore should be added to
71   // the given symbol name or not. For MinGW, treat a stdcall symbol name such
72   // as "Func@0" as undecorated, i.e. a leading underscore must be added.
73   // For non-MinGW, look for '@' in the whole string and consider "_Func@0"
74   // as decorated, i.e. don't add any more leading underscores.
75   // We can't check for a leading underscore here, since function names
76   // themselves can start with an underscore, while a second one still needs
77   // to be added.
78   return Sym.starts_with("@") || Sym.contains("@@") || Sym.starts_with("?") ||
79          (!MingwDef && Sym.contains('@'));
80 }
81 
82 class Lexer {
83 public:
84   Lexer(StringRef S) : Buf(S) {}
85 
86   Token lex() {
87     Buf = Buf.trim();
88     if (Buf.empty())
89       return Token(Eof);
90 
91     switch (Buf[0]) {
92     case '\0':
93       return Token(Eof);
94     case ';': {
95       size_t End = Buf.find('\n');
96       Buf = (End == Buf.npos) ? "" : Buf.drop_front(End);
97       return lex();
98     }
99     case '=':
100       Buf = Buf.drop_front();
101       if (Buf.consume_front("="))
102         return Token(EqualEqual, "==");
103       return Token(Equal, "=");
104     case ',':
105       Buf = Buf.drop_front();
106       return Token(Comma, ",");
107     case '"': {
108       StringRef S;
109       std::tie(S, Buf) = Buf.substr(1).split('"');
110       return Token(Identifier, S);
111     }
112     default: {
113       size_t End = Buf.find_first_of("=,;\r\n \t\v");
114       StringRef Word = Buf.substr(0, End);
115       Kind K = llvm::StringSwitch<Kind>(Word)
116                    .Case("BASE", KwBase)
117                    .Case("CONSTANT", KwConstant)
118                    .Case("DATA", KwData)
119                    .Case("EXPORTS", KwExports)
120                    .Case("EXPORTAS", KwExportAs)
121                    .Case("HEAPSIZE", KwHeapsize)
122                    .Case("LIBRARY", KwLibrary)
123                    .Case("NAME", KwName)
124                    .Case("NONAME", KwNoname)
125                    .Case("PRIVATE", KwPrivate)
126                    .Case("STACKSIZE", KwStacksize)
127                    .Case("VERSION", KwVersion)
128                    .Default(Identifier);
129       Buf = (End == Buf.npos) ? "" : Buf.drop_front(End);
130       return Token(K, Word);
131     }
132     }
133   }
134 
135 private:
136   StringRef Buf;
137 };
138 
139 class Parser {
140 public:
141   explicit Parser(StringRef S, MachineTypes M, bool B, bool AU)
142       : Lex(S), Machine(M), MingwDef(B), AddUnderscores(AU) {
143     if (Machine != IMAGE_FILE_MACHINE_I386)
144       AddUnderscores = false;
145   }
146 
147   Expected<COFFModuleDefinition> parse() {
148     do {
149       if (Error Err = parseOne())
150         return std::move(Err);
151     } while (Tok.K != Eof);
152     return Info;
153   }
154 
155 private:
156   void read() {
157     if (Stack.empty()) {
158       Tok = Lex.lex();
159       return;
160     }
161     Tok = Stack.back();
162     Stack.pop_back();
163   }
164 
165   Error readAsInt(uint64_t *I) {
166     read();
167     if (Tok.K != Identifier || Tok.Value.getAsInteger(10, *I))
168       return createError("integer expected");
169     return Error::success();
170   }
171 
172   Error expect(Kind Expected, StringRef Msg) {
173     read();
174     if (Tok.K != Expected)
175       return createError(Msg);
176     return Error::success();
177   }
178 
179   void unget() { Stack.push_back(Tok); }
180 
181   Error parseOne() {
182     read();
183     switch (Tok.K) {
184     case Eof:
185       return Error::success();
186     case KwExports:
187       for (;;) {
188         read();
189         if (Tok.K != Identifier) {
190           unget();
191           return Error::success();
192         }
193         if (Error Err = parseExport())
194           return Err;
195       }
196     case KwHeapsize:
197       return parseNumbers(&Info.HeapReserve, &Info.HeapCommit);
198     case KwStacksize:
199       return parseNumbers(&Info.StackReserve, &Info.StackCommit);
200     case KwLibrary:
201     case KwName: {
202       bool IsDll = Tok.K == KwLibrary; // Check before parseName.
203       std::string Name;
204       if (Error Err = parseName(&Name, &Info.ImageBase))
205         return Err;
206 
207       Info.ImportName = Name;
208 
209       // Set the output file, but don't override /out if it was already passed.
210       if (Info.OutputFile.empty()) {
211         Info.OutputFile = Name;
212         // Append the appropriate file extension if not already present.
213         if (!sys::path::has_extension(Name))
214           Info.OutputFile += IsDll ? ".dll" : ".exe";
215       }
216 
217       return Error::success();
218     }
219     case KwVersion:
220       return parseVersion(&Info.MajorImageVersion, &Info.MinorImageVersion);
221     default:
222       return createError("unknown directive: " + Tok.Value);
223     }
224   }
225 
226   Error parseExport() {
227     COFFShortExport E;
228     E.Name = std::string(Tok.Value);
229     read();
230     if (Tok.K == Equal) {
231       read();
232       if (Tok.K != Identifier)
233         return createError("identifier expected, but got " + Tok.Value);
234       E.ExtName = E.Name;
235       E.Name = std::string(Tok.Value);
236     } else {
237       unget();
238     }
239 
240     if (AddUnderscores) {
241       // Don't add underscore if the name is already mangled or if it's a
242       // forward target.
243       if (!isDecorated(E.Name, MingwDef) &&
244           (E.ExtName.empty() || !StringRef(E.Name).contains(".")))
245         E.Name = (std::string("_").append(E.Name));
246       if (!E.ExtName.empty() && !isDecorated(E.ExtName, MingwDef))
247         E.ExtName = (std::string("_").append(E.ExtName));
248     }
249 
250     for (;;) {
251       read();
252       if (Tok.K == Identifier && Tok.Value[0] == '@') {
253         if (Tok.Value == "@") {
254           // "foo @ 10"
255           read();
256           Tok.Value.getAsInteger(10, E.Ordinal);
257         } else if (Tok.Value.drop_front().getAsInteger(10, E.Ordinal)) {
258           // "foo \n @bar" - Not an ordinal modifier at all, but the next
259           // export (fastcall decorated) - complete the current one.
260           unget();
261           Info.Exports.push_back(E);
262           return Error::success();
263         }
264         // "foo @10"
265         read();
266         if (Tok.K == KwNoname) {
267           E.Noname = true;
268         } else {
269           unget();
270         }
271         continue;
272       }
273       if (Tok.K == KwData) {
274         E.Data = true;
275         continue;
276       }
277       if (Tok.K == KwConstant) {
278         E.Constant = true;
279         continue;
280       }
281       if (Tok.K == KwPrivate) {
282         E.Private = true;
283         continue;
284       }
285       if (Tok.K == EqualEqual) {
286         read();
287         E.ImportName = std::string(Tok.Value);
288         continue;
289       }
290       // EXPORTAS must be at the end of export definition
291       if (Tok.K == KwExportAs) {
292         read();
293         if (Tok.K == Eof)
294           return createError(
295               "unexpected end of file, EXPORTAS identifier expected");
296         E.ExportAs = std::string(Tok.Value);
297       } else {
298         unget();
299       }
300       Info.Exports.push_back(E);
301       return Error::success();
302     }
303   }
304 
305   // HEAPSIZE/STACKSIZE reserve[,commit]
306   Error parseNumbers(uint64_t *Reserve, uint64_t *Commit) {
307     if (Error Err = readAsInt(Reserve))
308       return Err;
309     read();
310     if (Tok.K != Comma) {
311       unget();
312       Commit = nullptr;
313       return Error::success();
314     }
315     if (Error Err = readAsInt(Commit))
316       return Err;
317     return Error::success();
318   }
319 
320   // NAME outputPath [BASE=address]
321   Error parseName(std::string *Out, uint64_t *Baseaddr) {
322     read();
323     if (Tok.K == Identifier) {
324       *Out = std::string(Tok.Value);
325     } else {
326       *Out = "";
327       unget();
328       return Error::success();
329     }
330     read();
331     if (Tok.K == KwBase) {
332       if (Error Err = expect(Equal, "'=' expected"))
333         return Err;
334       if (Error Err = readAsInt(Baseaddr))
335         return Err;
336     } else {
337       unget();
338       *Baseaddr = 0;
339     }
340     return Error::success();
341   }
342 
343   // VERSION major[.minor]
344   Error parseVersion(uint32_t *Major, uint32_t *Minor) {
345     read();
346     if (Tok.K != Identifier)
347       return createError("identifier expected, but got " + Tok.Value);
348     StringRef V1, V2;
349     std::tie(V1, V2) = Tok.Value.split('.');
350     if (V1.getAsInteger(10, *Major))
351       return createError("integer expected, but got " + Tok.Value);
352     if (V2.empty())
353       *Minor = 0;
354     else if (V2.getAsInteger(10, *Minor))
355       return createError("integer expected, but got " + Tok.Value);
356     return Error::success();
357   }
358 
359   Lexer Lex;
360   Token Tok;
361   std::vector<Token> Stack;
362   MachineTypes Machine;
363   COFFModuleDefinition Info;
364   bool MingwDef;
365   bool AddUnderscores;
366 };
367 
368 Expected<COFFModuleDefinition> parseCOFFModuleDefinition(MemoryBufferRef MB,
369                                                          MachineTypes Machine,
370                                                          bool MingwDef,
371                                                          bool AddUnderscores) {
372   return Parser(MB.getBuffer(), Machine, MingwDef, AddUnderscores).parse();
373 }
374 
375 } // namespace object
376 } // namespace llvm
377