xref: /freebsd/contrib/llvm-project/llvm/lib/Object/COFFModuleDefinition.cpp (revision ca53e5aedfebcc1b4091b68e01b2d5cae923f85e)
1 //===--- COFFModuleDefinition.cpp - Simple DEF parser ---------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // Windows-specific.
10 // A parser for the module-definition file (.def file).
11 //
12 // The format of module-definition files are described in this document:
13 // https://msdn.microsoft.com/en-us/library/28d6s79h.aspx
14 //
15 //===----------------------------------------------------------------------===//
16 
17 #include "llvm/Object/COFFModuleDefinition.h"
18 #include "llvm/ADT/StringRef.h"
19 #include "llvm/ADT/StringSwitch.h"
20 #include "llvm/Object/COFF.h"
21 #include "llvm/Object/COFFImportFile.h"
22 #include "llvm/Object/Error.h"
23 #include "llvm/Support/Error.h"
24 #include "llvm/Support/Path.h"
25 #include "llvm/Support/raw_ostream.h"
26 
27 using namespace llvm::COFF;
28 using namespace llvm;
29 
30 namespace llvm {
31 namespace object {
32 
33 enum Kind {
34   Unknown,
35   Eof,
36   Identifier,
37   Comma,
38   Equal,
39   EqualEqual,
40   KwBase,
41   KwConstant,
42   KwData,
43   KwExports,
44   KwHeapsize,
45   KwLibrary,
46   KwName,
47   KwNoname,
48   KwPrivate,
49   KwStacksize,
50   KwVersion,
51 };
52 
53 struct Token {
54   explicit Token(Kind T = Unknown, StringRef S = "") : K(T), Value(S) {}
55   Kind K;
56   StringRef Value;
57 };
58 
59 static bool isDecorated(StringRef Sym, bool MingwDef) {
60   // In def files, the symbols can either be listed decorated or undecorated.
61   //
62   // - For cdecl symbols, only the undecorated form is allowed.
63   // - For fastcall and vectorcall symbols, both fully decorated or
64   //   undecorated forms can be present.
65   // - For stdcall symbols in non-MinGW environments, the decorated form is
66   //   fully decorated with leading underscore and trailing stack argument
67   //   size - like "_Func@0".
68   // - In MinGW def files, a decorated stdcall symbol does not include the
69   //   leading underscore though, like "Func@0".
70 
71   // This function controls whether a leading underscore should be added to
72   // the given symbol name or not. For MinGW, treat a stdcall symbol name such
73   // as "Func@0" as undecorated, i.e. a leading underscore must be added.
74   // For non-MinGW, look for '@' in the whole string and consider "_Func@0"
75   // as decorated, i.e. don't add any more leading underscores.
76   // We can't check for a leading underscore here, since function names
77   // themselves can start with an underscore, while a second one still needs
78   // to be added.
79   return Sym.startswith("@") || Sym.contains("@@") || Sym.startswith("?") ||
80          (!MingwDef && Sym.contains('@'));
81 }
82 
83 static Error createError(const Twine &Err) {
84   return make_error<StringError>(StringRef(Err.str()),
85                                  object_error::parse_failed);
86 }
87 
88 class Lexer {
89 public:
90   Lexer(StringRef S) : Buf(S) {}
91 
92   Token lex() {
93     Buf = Buf.trim();
94     if (Buf.empty())
95       return Token(Eof);
96 
97     switch (Buf[0]) {
98     case '\0':
99       return Token(Eof);
100     case ';': {
101       size_t End = Buf.find('\n');
102       Buf = (End == Buf.npos) ? "" : Buf.drop_front(End);
103       return lex();
104     }
105     case '=':
106       Buf = Buf.drop_front();
107       if (Buf.startswith("=")) {
108         Buf = Buf.drop_front();
109         return Token(EqualEqual, "==");
110       }
111       return Token(Equal, "=");
112     case ',':
113       Buf = Buf.drop_front();
114       return Token(Comma, ",");
115     case '"': {
116       StringRef S;
117       std::tie(S, Buf) = Buf.substr(1).split('"');
118       return Token(Identifier, S);
119     }
120     default: {
121       size_t End = Buf.find_first_of("=,;\r\n \t\v");
122       StringRef Word = Buf.substr(0, End);
123       Kind K = llvm::StringSwitch<Kind>(Word)
124                    .Case("BASE", KwBase)
125                    .Case("CONSTANT", KwConstant)
126                    .Case("DATA", KwData)
127                    .Case("EXPORTS", KwExports)
128                    .Case("HEAPSIZE", KwHeapsize)
129                    .Case("LIBRARY", KwLibrary)
130                    .Case("NAME", KwName)
131                    .Case("NONAME", KwNoname)
132                    .Case("PRIVATE", KwPrivate)
133                    .Case("STACKSIZE", KwStacksize)
134                    .Case("VERSION", KwVersion)
135                    .Default(Identifier);
136       Buf = (End == Buf.npos) ? "" : Buf.drop_front(End);
137       return Token(K, Word);
138     }
139     }
140   }
141 
142 private:
143   StringRef Buf;
144 };
145 
146 class Parser {
147 public:
148   explicit Parser(StringRef S, MachineTypes M, bool B)
149       : Lex(S), Machine(M), MingwDef(B) {}
150 
151   Expected<COFFModuleDefinition> parse() {
152     do {
153       if (Error Err = parseOne())
154         return std::move(Err);
155     } while (Tok.K != Eof);
156     return Info;
157   }
158 
159 private:
160   void read() {
161     if (Stack.empty()) {
162       Tok = Lex.lex();
163       return;
164     }
165     Tok = Stack.back();
166     Stack.pop_back();
167   }
168 
169   Error readAsInt(uint64_t *I) {
170     read();
171     if (Tok.K != Identifier || Tok.Value.getAsInteger(10, *I))
172       return createError("integer expected");
173     return Error::success();
174   }
175 
176   Error expect(Kind Expected, StringRef Msg) {
177     read();
178     if (Tok.K != Expected)
179       return createError(Msg);
180     return Error::success();
181   }
182 
183   void unget() { Stack.push_back(Tok); }
184 
185   Error parseOne() {
186     read();
187     switch (Tok.K) {
188     case Eof:
189       return Error::success();
190     case KwExports:
191       for (;;) {
192         read();
193         if (Tok.K != Identifier) {
194           unget();
195           return Error::success();
196         }
197         if (Error Err = parseExport())
198           return Err;
199       }
200     case KwHeapsize:
201       return parseNumbers(&Info.HeapReserve, &Info.HeapCommit);
202     case KwStacksize:
203       return parseNumbers(&Info.StackReserve, &Info.StackCommit);
204     case KwLibrary:
205     case KwName: {
206       bool IsDll = Tok.K == KwLibrary; // Check before parseName.
207       std::string Name;
208       if (Error Err = parseName(&Name, &Info.ImageBase))
209         return Err;
210 
211       Info.ImportName = Name;
212 
213       // Set the output file, but don't override /out if it was already passed.
214       if (Info.OutputFile.empty()) {
215         Info.OutputFile = Name;
216         // Append the appropriate file extension if not already present.
217         if (!sys::path::has_extension(Name))
218           Info.OutputFile += IsDll ? ".dll" : ".exe";
219       }
220 
221       return Error::success();
222     }
223     case KwVersion:
224       return parseVersion(&Info.MajorImageVersion, &Info.MinorImageVersion);
225     default:
226       return createError("unknown directive: " + Tok.Value);
227     }
228   }
229 
230   Error parseExport() {
231     COFFShortExport E;
232     E.Name = std::string(Tok.Value);
233     read();
234     if (Tok.K == Equal) {
235       read();
236       if (Tok.K != Identifier)
237         return createError("identifier expected, but got " + Tok.Value);
238       E.ExtName = E.Name;
239       E.Name = std::string(Tok.Value);
240     } else {
241       unget();
242     }
243 
244     if (Machine == IMAGE_FILE_MACHINE_I386) {
245       if (!isDecorated(E.Name, MingwDef))
246         E.Name = (std::string("_").append(E.Name));
247       if (!E.ExtName.empty() && !isDecorated(E.ExtName, MingwDef))
248         E.ExtName = (std::string("_").append(E.ExtName));
249     }
250 
251     for (;;) {
252       read();
253       if (Tok.K == Identifier && Tok.Value[0] == '@') {
254         if (Tok.Value == "@") {
255           // "foo @ 10"
256           read();
257           Tok.Value.getAsInteger(10, E.Ordinal);
258         } else if (Tok.Value.drop_front().getAsInteger(10, E.Ordinal)) {
259           // "foo \n @bar" - Not an ordinal modifier at all, but the next
260           // export (fastcall decorated) - complete the current one.
261           unget();
262           Info.Exports.push_back(E);
263           return Error::success();
264         }
265         // "foo @10"
266         read();
267         if (Tok.K == KwNoname) {
268           E.Noname = true;
269         } else {
270           unget();
271         }
272         continue;
273       }
274       if (Tok.K == KwData) {
275         E.Data = true;
276         continue;
277       }
278       if (Tok.K == KwConstant) {
279         E.Constant = true;
280         continue;
281       }
282       if (Tok.K == KwPrivate) {
283         E.Private = true;
284         continue;
285       }
286       if (Tok.K == EqualEqual) {
287         read();
288         E.AliasTarget = std::string(Tok.Value);
289         if (Machine == IMAGE_FILE_MACHINE_I386 && !isDecorated(E.AliasTarget, MingwDef))
290           E.AliasTarget = std::string("_").append(E.AliasTarget);
291         continue;
292       }
293       unget();
294       Info.Exports.push_back(E);
295       return Error::success();
296     }
297   }
298 
299   // HEAPSIZE/STACKSIZE reserve[,commit]
300   Error parseNumbers(uint64_t *Reserve, uint64_t *Commit) {
301     if (Error Err = readAsInt(Reserve))
302       return Err;
303     read();
304     if (Tok.K != Comma) {
305       unget();
306       Commit = nullptr;
307       return Error::success();
308     }
309     if (Error Err = readAsInt(Commit))
310       return Err;
311     return Error::success();
312   }
313 
314   // NAME outputPath [BASE=address]
315   Error parseName(std::string *Out, uint64_t *Baseaddr) {
316     read();
317     if (Tok.K == Identifier) {
318       *Out = std::string(Tok.Value);
319     } else {
320       *Out = "";
321       unget();
322       return Error::success();
323     }
324     read();
325     if (Tok.K == KwBase) {
326       if (Error Err = expect(Equal, "'=' expected"))
327         return Err;
328       if (Error Err = readAsInt(Baseaddr))
329         return Err;
330     } else {
331       unget();
332       *Baseaddr = 0;
333     }
334     return Error::success();
335   }
336 
337   // VERSION major[.minor]
338   Error parseVersion(uint32_t *Major, uint32_t *Minor) {
339     read();
340     if (Tok.K != Identifier)
341       return createError("identifier expected, but got " + Tok.Value);
342     StringRef V1, V2;
343     std::tie(V1, V2) = Tok.Value.split('.');
344     if (V1.getAsInteger(10, *Major))
345       return createError("integer expected, but got " + Tok.Value);
346     if (V2.empty())
347       *Minor = 0;
348     else if (V2.getAsInteger(10, *Minor))
349       return createError("integer expected, but got " + Tok.Value);
350     return Error::success();
351   }
352 
353   Lexer Lex;
354   Token Tok;
355   std::vector<Token> Stack;
356   MachineTypes Machine;
357   COFFModuleDefinition Info;
358   bool MingwDef;
359 };
360 
361 Expected<COFFModuleDefinition> parseCOFFModuleDefinition(MemoryBufferRef MB,
362                                                          MachineTypes Machine,
363                                                          bool MingwDef) {
364   return Parser(MB.getBuffer(), Machine, MingwDef).parse();
365 }
366 
367 } // namespace object
368 } // namespace llvm
369