1 //===--- COFFModuleDefinition.cpp - Simple DEF parser ---------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // Windows-specific.
10 // A parser for the module-definition file (.def file).
11 //
12 // The format of module-definition files are described in this document:
13 // https://msdn.microsoft.com/en-us/library/28d6s79h.aspx
14 //
15 //===----------------------------------------------------------------------===//
16
17 #include "llvm/Object/COFFModuleDefinition.h"
18 #include "llvm/ADT/StringRef.h"
19 #include "llvm/ADT/StringSwitch.h"
20 #include "llvm/Object/COFFImportFile.h"
21 #include "llvm/Object/Error.h"
22 #include "llvm/Support/Error.h"
23 #include "llvm/Support/Path.h"
24
25 using namespace llvm::COFF;
26 using namespace llvm;
27
28 namespace llvm {
29 namespace object {
30
31 enum Kind {
32 Unknown,
33 Eof,
34 Identifier,
35 Comma,
36 Equal,
37 EqualEqual,
38 KwBase,
39 KwConstant,
40 KwData,
41 KwExports,
42 KwExportAs,
43 KwHeapsize,
44 KwLibrary,
45 KwName,
46 KwNoname,
47 KwPrivate,
48 KwStacksize,
49 KwVersion,
50 };
51
52 struct Token {
Tokenllvm::object::Token53 explicit Token(Kind T = Unknown, StringRef S = "") : K(T), Value(S) {}
54 Kind K;
55 StringRef Value;
56 };
57
isDecorated(StringRef Sym,bool MingwDef)58 static bool isDecorated(StringRef Sym, bool MingwDef) {
59 // In def files, the symbols can either be listed decorated or undecorated.
60 //
61 // - For cdecl symbols, only the undecorated form is allowed.
62 // - For fastcall and vectorcall symbols, both fully decorated or
63 // undecorated forms can be present.
64 // - For stdcall symbols in non-MinGW environments, the decorated form is
65 // fully decorated with leading underscore and trailing stack argument
66 // size - like "_Func@0".
67 // - In MinGW def files, a decorated stdcall symbol does not include the
68 // leading underscore though, like "Func@0".
69
70 // This function controls whether a leading underscore should be added to
71 // the given symbol name or not. For MinGW, treat a stdcall symbol name such
72 // as "Func@0" as undecorated, i.e. a leading underscore must be added.
73 // For non-MinGW, look for '@' in the whole string and consider "_Func@0"
74 // as decorated, i.e. don't add any more leading underscores.
75 // We can't check for a leading underscore here, since function names
76 // themselves can start with an underscore, while a second one still needs
77 // to be added.
78 return Sym.starts_with("@") || Sym.contains("@@") || Sym.starts_with("?") ||
79 (!MingwDef && Sym.contains('@'));
80 }
81
82 class Lexer {
83 public:
Lexer(StringRef S)84 Lexer(StringRef S) : Buf(S) {}
85
lex()86 Token lex() {
87 Buf = Buf.trim();
88 if (Buf.empty())
89 return Token(Eof);
90
91 switch (Buf[0]) {
92 case '\0':
93 return Token(Eof);
94 case ';': {
95 size_t End = Buf.find('\n');
96 Buf = (End == Buf.npos) ? "" : Buf.drop_front(End);
97 return lex();
98 }
99 case '=':
100 Buf = Buf.drop_front();
101 if (Buf.consume_front("="))
102 return Token(EqualEqual, "==");
103 return Token(Equal, "=");
104 case ',':
105 Buf = Buf.drop_front();
106 return Token(Comma, ",");
107 case '"': {
108 StringRef S;
109 std::tie(S, Buf) = Buf.substr(1).split('"');
110 return Token(Identifier, S);
111 }
112 default: {
113 size_t End = Buf.find_first_of("=,;\r\n \t\v");
114 StringRef Word = Buf.substr(0, End);
115 Kind K = llvm::StringSwitch<Kind>(Word)
116 .Case("BASE", KwBase)
117 .Case("CONSTANT", KwConstant)
118 .Case("DATA", KwData)
119 .Case("EXPORTS", KwExports)
120 .Case("EXPORTAS", KwExportAs)
121 .Case("HEAPSIZE", KwHeapsize)
122 .Case("LIBRARY", KwLibrary)
123 .Case("NAME", KwName)
124 .Case("NONAME", KwNoname)
125 .Case("PRIVATE", KwPrivate)
126 .Case("STACKSIZE", KwStacksize)
127 .Case("VERSION", KwVersion)
128 .Default(Identifier);
129 Buf = (End == Buf.npos) ? "" : Buf.drop_front(End);
130 return Token(K, Word);
131 }
132 }
133 }
134
135 private:
136 StringRef Buf;
137 };
138
139 class Parser {
140 public:
Parser(StringRef S,MachineTypes M,bool B,bool AU)141 explicit Parser(StringRef S, MachineTypes M, bool B, bool AU)
142 : Lex(S), Machine(M), MingwDef(B), AddUnderscores(AU) {
143 if (Machine != IMAGE_FILE_MACHINE_I386)
144 AddUnderscores = false;
145 }
146
parse()147 Expected<COFFModuleDefinition> parse() {
148 do {
149 if (Error Err = parseOne())
150 return std::move(Err);
151 } while (Tok.K != Eof);
152 return Info;
153 }
154
155 private:
read()156 void read() {
157 if (Stack.empty()) {
158 Tok = Lex.lex();
159 return;
160 }
161 Tok = Stack.back();
162 Stack.pop_back();
163 }
164
readAsInt(uint64_t * I)165 Error readAsInt(uint64_t *I) {
166 read();
167 if (Tok.K != Identifier || Tok.Value.getAsInteger(10, *I))
168 return createError("integer expected");
169 return Error::success();
170 }
171
expect(Kind Expected,StringRef Msg)172 Error expect(Kind Expected, StringRef Msg) {
173 read();
174 if (Tok.K != Expected)
175 return createError(Msg);
176 return Error::success();
177 }
178
unget()179 void unget() { Stack.push_back(Tok); }
180
parseOne()181 Error parseOne() {
182 read();
183 switch (Tok.K) {
184 case Eof:
185 return Error::success();
186 case KwExports:
187 for (;;) {
188 read();
189 if (Tok.K != Identifier) {
190 unget();
191 return Error::success();
192 }
193 if (Error Err = parseExport())
194 return Err;
195 }
196 case KwHeapsize:
197 return parseNumbers(&Info.HeapReserve, &Info.HeapCommit);
198 case KwStacksize:
199 return parseNumbers(&Info.StackReserve, &Info.StackCommit);
200 case KwLibrary:
201 case KwName: {
202 bool IsDll = Tok.K == KwLibrary; // Check before parseName.
203 std::string Name;
204 if (Error Err = parseName(&Name, &Info.ImageBase))
205 return Err;
206
207 Info.ImportName = Name;
208
209 // Set the output file, but don't override /out if it was already passed.
210 if (Info.OutputFile.empty()) {
211 Info.OutputFile = Name;
212 // Append the appropriate file extension if not already present.
213 if (!sys::path::has_extension(Name))
214 Info.OutputFile += IsDll ? ".dll" : ".exe";
215 }
216
217 return Error::success();
218 }
219 case KwVersion:
220 return parseVersion(&Info.MajorImageVersion, &Info.MinorImageVersion);
221 default:
222 return createError("unknown directive: " + Tok.Value);
223 }
224 }
225
parseExport()226 Error parseExport() {
227 COFFShortExport E;
228 E.Name = std::string(Tok.Value);
229 read();
230 if (Tok.K == Equal) {
231 read();
232 if (Tok.K != Identifier)
233 return createError("identifier expected, but got " + Tok.Value);
234 E.ExtName = E.Name;
235 E.Name = std::string(Tok.Value);
236 } else {
237 unget();
238 }
239
240 if (AddUnderscores) {
241 if (!isDecorated(E.Name, MingwDef))
242 E.Name = (std::string("_").append(E.Name));
243 if (!E.ExtName.empty() && !isDecorated(E.ExtName, MingwDef))
244 E.ExtName = (std::string("_").append(E.ExtName));
245 }
246
247 for (;;) {
248 read();
249 if (Tok.K == Identifier && Tok.Value[0] == '@') {
250 if (Tok.Value == "@") {
251 // "foo @ 10"
252 read();
253 Tok.Value.getAsInteger(10, E.Ordinal);
254 } else if (Tok.Value.drop_front().getAsInteger(10, E.Ordinal)) {
255 // "foo \n @bar" - Not an ordinal modifier at all, but the next
256 // export (fastcall decorated) - complete the current one.
257 unget();
258 Info.Exports.push_back(E);
259 return Error::success();
260 }
261 // "foo @10"
262 read();
263 if (Tok.K == KwNoname) {
264 E.Noname = true;
265 } else {
266 unget();
267 }
268 continue;
269 }
270 if (Tok.K == KwData) {
271 E.Data = true;
272 continue;
273 }
274 if (Tok.K == KwConstant) {
275 E.Constant = true;
276 continue;
277 }
278 if (Tok.K == KwPrivate) {
279 E.Private = true;
280 continue;
281 }
282 if (Tok.K == EqualEqual) {
283 read();
284 E.ImportName = std::string(Tok.Value);
285 continue;
286 }
287 // EXPORTAS must be at the end of export definition
288 if (Tok.K == KwExportAs) {
289 read();
290 if (Tok.K == Eof)
291 return createError(
292 "unexpected end of file, EXPORTAS identifier expected");
293 E.ExportAs = std::string(Tok.Value);
294 } else {
295 unget();
296 }
297 Info.Exports.push_back(E);
298 return Error::success();
299 }
300 }
301
302 // HEAPSIZE/STACKSIZE reserve[,commit]
parseNumbers(uint64_t * Reserve,uint64_t * Commit)303 Error parseNumbers(uint64_t *Reserve, uint64_t *Commit) {
304 if (Error Err = readAsInt(Reserve))
305 return Err;
306 read();
307 if (Tok.K != Comma) {
308 unget();
309 Commit = nullptr;
310 return Error::success();
311 }
312 if (Error Err = readAsInt(Commit))
313 return Err;
314 return Error::success();
315 }
316
317 // NAME outputPath [BASE=address]
parseName(std::string * Out,uint64_t * Baseaddr)318 Error parseName(std::string *Out, uint64_t *Baseaddr) {
319 read();
320 if (Tok.K == Identifier) {
321 *Out = std::string(Tok.Value);
322 } else {
323 *Out = "";
324 unget();
325 return Error::success();
326 }
327 read();
328 if (Tok.K == KwBase) {
329 if (Error Err = expect(Equal, "'=' expected"))
330 return Err;
331 if (Error Err = readAsInt(Baseaddr))
332 return Err;
333 } else {
334 unget();
335 *Baseaddr = 0;
336 }
337 return Error::success();
338 }
339
340 // VERSION major[.minor]
parseVersion(uint32_t * Major,uint32_t * Minor)341 Error parseVersion(uint32_t *Major, uint32_t *Minor) {
342 read();
343 if (Tok.K != Identifier)
344 return createError("identifier expected, but got " + Tok.Value);
345 StringRef V1, V2;
346 std::tie(V1, V2) = Tok.Value.split('.');
347 if (V1.getAsInteger(10, *Major))
348 return createError("integer expected, but got " + Tok.Value);
349 if (V2.empty())
350 *Minor = 0;
351 else if (V2.getAsInteger(10, *Minor))
352 return createError("integer expected, but got " + Tok.Value);
353 return Error::success();
354 }
355
356 Lexer Lex;
357 Token Tok;
358 std::vector<Token> Stack;
359 MachineTypes Machine;
360 COFFModuleDefinition Info;
361 bool MingwDef;
362 bool AddUnderscores;
363 };
364
parseCOFFModuleDefinition(MemoryBufferRef MB,MachineTypes Machine,bool MingwDef,bool AddUnderscores)365 Expected<COFFModuleDefinition> parseCOFFModuleDefinition(MemoryBufferRef MB,
366 MachineTypes Machine,
367 bool MingwDef,
368 bool AddUnderscores) {
369 return Parser(MB.getBuffer(), Machine, MingwDef, AddUnderscores).parse();
370 }
371
372 } // namespace object
373 } // namespace llvm
374