1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright (c) 1988 AT&T
24 * All Rights Reserved
25 *
26 * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
27 *
28 * Copyright 2019 Joyent, Inc.
29 */
30
31 /*
32 * Map file parsing (Shared Core Code).
33 */
34 #include <fcntl.h>
35 #include <stdio.h>
36 #include <unistd.h>
37 #include <sys/stat.h>
38 #include <errno.h>
39 #include <limits.h>
40 #include <dirent.h>
41 #include <ctype.h>
42 #include <debug.h>
43 #include "msg.h"
44 #include "_libld.h"
45 #include "_map.h"
46
47 /*
48 * There are two styles of mapfile supported by the link-editor:
49 *
50 * 1) The original System V defined syntax, as augmented at Sun
51 * from Solaris 2.0 through Solaris 10. This style is also known
52 * as version 1.
53 *
54 * 2) A newer syntax, currently at version 2.
55 *
56 * The original syntax uses special characters (=, :, -, |, etc) as
57 * operators to indicate the operation being specified. Over the years,
58 * this syntax has been problematic:
59 *
60 * 1) Too cryptic: It's hard for people to remember which character
61 * means what.
62 *
63 * 2) Limited expansion potential: There only a few special characters
64 * available on the keyboard for new features, and it is difficult to
65 * add options to existing ones.
66 *
67 * Adding new features into this framework (2) have the effect of
68 * making the syntax even more cryptic (1). The newer syntax addresses
69 * these issues by moving to an extendible identifier based syntax that
70 * allows new features to be added without complicating old ones.
71 *
72 * The new syntax uses the following terminology:
73 *
74 * - Control directives are the directives that start with a '$'.
75 * They control how the mapfile is interpreted. We use the 'cdir_'
76 * prefix on functions and variables related to these directives.
77 *
78 * - Conditional Expressions are the expressions found in $if and $elif
79 * control directives. They evaluate to boolean true/false values.
80 * We use the 'cexp_' prefix for functions and variables related to
81 * these expressions.
82 *
83 * - Regular Directives are names (SYMBOL, VERSION, etc) that convey
84 * directions to the link-editor for building the output object.
85 *
86 * This file contains core code used by both mapfile styles: File management,
87 * lexical analysis, and other shared core functionality. It also contains
88 * the code for control directives, as they are intrinsically part of
89 * lexical analysis --- this is disabled when processing Sysv mapfiles.
90 */
91
92 /*
93 * We use a stack of cdir_level_t structs to manage $if/$elif/$else/$endif
94 * processing. At each level, we keep track of the information needed to
95 * determine whether or not to process nested input lines or skip them,
96 * along with information needed to report errors.
97 */
98 typedef struct {
99 Lineno cdl_if_lineno; /* Line number of opening $if */
100 Lineno cdl_else_lineno; /* 0, or line on which $else seen */
101 int cdl_done; /* True if no longer accepts input */
102 int cdl_pass; /* True if currently accepting input */
103 } cdir_level_t;
104
105 /* Operators in the expressions accepted by $if/$elif */
106 typedef enum {
107 CEXP_OP_NONE, /* Not an operator */
108 CEXP_OP_AND, /* && */
109 CEXP_OP_OR, /* || */
110 CEXP_OP_NEG, /* ! */
111 CEXP_OP_OPAR, /* ( */
112 CEXP_OP_CPAR /* ) */
113 } cexp_op_t;
114
115 /*
116 * Type of conditional expression identifier AVL tree nodes
117 */
118 typedef struct cexp_name_node {
119 avl_node_t ceid_avlnode; /* AVL book-keeping */
120 const char *ceid_name; /* boolean identifier name */
121 } cexp_id_node_t;
122
123
124 /*
125 * Declare a "stack" type, containing a pointer to data, a count of
126 * allocated, and currently used items in the stack. The data type
127 * is specified as the _type argument.
128 */
129 #define STACK(_type) \
130 struct { \
131 _type *stk_s; /* Stack array */ \
132 size_t stk_n; /* Current stack depth */ \
133 size_t stk_n_alloc; /* # of elements pointed at by s */ \
134 }
135
136 /*
137 * The following type represents a "generic" stack, where the data
138 * type is (void). This type is never instantiated. However, it has
139 * the same struct layout as any other STACK(), and is therefore a good
140 * generic type that can be used for stack_resize().
141 */
142 typedef STACK(void) generic_stack_t;
143
144 /*
145 * Ensure that the stack has enough room to push one more item
146 */
147 #define STACK_RESERVE(_stack, _n_default) \
148 (((_stack).stk_n < (_stack).stk_n_alloc) || \
149 stack_resize((generic_stack_t *)&(_stack).stk_s, _n_default, \
150 sizeof (*(_stack).stk_s)))
151
152 /*
153 * Reset a stack to empty.
154 */
155 #define STACK_RESET(_stack) (_stack).stk_n = 0;
156
157 /*
158 * True if stack is empty, False otherwise.
159 */
160 #define STACK_IS_EMPTY(_stack) ((_stack).stk_n == 0)
161
162 /*
163 * Push a value onto a stack. Caller must ensure that stack has room.
164 * This macro is intended to be used as the LHS of an assignment, the
165 * RHS of which is the value:
166 *
167 * STACK_PUSH(stack) = value;
168 */
169 #define STACK_PUSH(_stack) (_stack).stk_s[(_stack).stk_n++]
170
171 /*
172 * Pop a value off a stack. Caller must ensure
173 * that stack is not empty.
174 */
175 #define STACK_POP(_stack) ((_stack).stk_s[--(_stack).stk_n])
176
177 /*
178 * Access top element on stack without popping. Caller must ensure
179 * that stack is not empty.
180 */
181 #define STACK_TOP(_stack) (((_stack).stk_s)[(_stack).stk_n - 1])
182
183 /*
184 * Initial sizes used for the stacks: The stacks are allocated on demand
185 * to these sizes, and then doubled as necessary until they are large enough.
186 *
187 * The ideal size would be large enough that only a single allocation
188 * occurs, and our defaults should generally have that effect. However,
189 * in doing so, we run the risk of a latent error in the resize code going
190 * undetected until triggered by a large task in the field. For this reason,
191 * we set the sizes to the smallest size possible when compiled for debug.
192 */
193 #ifdef DEBUG
194 #define CDIR_STACK_INIT 1
195 #define CEXP_OP_STACK_INIT 1
196 #define CEXP_VAL_STACK_INIT 1
197 #else
198 #define CDIR_STACK_INIT 16
199 #define CEXP_OP_STACK_INIT 8
200 #define CEXP_VAL_STACK_INIT (CEXP_OP_STACK_INIT * 2) /* 2 vals per binop */
201 #endif
202
203
204 /*
205 * Persistent state maintained by map module in between calls.
206 *
207 * This is kept as static file scope data, because it is only used
208 * when libld is called by ld, and not by rtld. If that should change,
209 * the code is designed so that it can become reentrant easily:
210 *
211 * - Add a pointer to the output descriptor to a structure of this type,
212 * allocated dynamically on the first call to ld_map_parse().
213 * - Change all references to lms to instead reference the pointer in
214 * the output descriptor.
215 *
216 * Until then, it is simpler not to expose these details.
217 */
218 typedef struct {
219 int lms_cdir_valid; /* Allow control dir. on entry to gettoken() */
220 STACK(cdir_level_t) lms_cdir_stack; /* Conditional input level */
221 STACK(cexp_op_t) lms_cexp_op_stack; /* Cond. expr operators */
222 STACK(uchar_t) lms_cexp_val_stack; /* Cond. expr values */
223 avl_tree_t *lms_cexp_id;
224 } ld_map_state_t;
225 static ld_map_state_t lms;
226
227
228 /*
229 * Version 1 (SysV) syntax dispatch table for ld_map_gettoken(). For each
230 * of the 7-bit ASCII characters, determine how the lexical analyzer
231 * should behave.
232 *
233 * This table must be kept in sync with tkid_attr[] below.
234 *
235 * Identifier Note:
236 * The Linker and Libraries Guide states that the original syntax uses
237 * C identifier rules, allowing '.' to be treated as a letter. However,
238 * the implementation is considerably looser than that: Any character
239 * with an ASCII code (0-127) which is printable and not used to start
240 * another token is allowed to start an identifier, and they are terminated
241 * by any of: space, double quote, tab, newline, ':', ';', '=', or '#'.
242 * The original code has been replaced, but this table encodes the same
243 * rules, to ensure backward compatibility.
244 */
245 static const mf_tokdisp_t gettok_dispatch_v1 = {
246 TK_OP_EOF, /* 0 - NUL */
247 TK_OP_ILLCHR, /* 1 - SOH */
248 TK_OP_ILLCHR, /* 2 - STX */
249 TK_OP_ILLCHR, /* 3 - ETX */
250 TK_OP_ILLCHR, /* 4 - EOT */
251 TK_OP_ILLCHR, /* 5 - ENQ */
252 TK_OP_ILLCHR, /* 6 - ACK */
253 TK_OP_ILLCHR, /* 7 - BEL */
254 TK_OP_ILLCHR, /* 8 - BS */
255 TK_OP_WS, /* 9 - HT */
256 TK_OP_NL, /* 10 - NL */
257 TK_OP_WS, /* 11 - VT */
258 TK_OP_WS, /* 12 - FF */
259 TK_OP_WS, /* 13 - CR */
260 TK_OP_ILLCHR, /* 14 - SO */
261 TK_OP_ILLCHR, /* 15 - SI */
262 TK_OP_ILLCHR, /* 16 - DLE */
263 TK_OP_ILLCHR, /* 17 - DC1 */
264 TK_OP_ILLCHR, /* 18 - DC2 */
265 TK_OP_ILLCHR, /* 19 - DC3 */
266 TK_OP_ILLCHR, /* 20 - DC4 */
267 TK_OP_ILLCHR, /* 21 - NAK */
268 TK_OP_ILLCHR, /* 22 - SYN */
269 TK_OP_ILLCHR, /* 23 - ETB */
270 TK_OP_ILLCHR, /* 24 - CAN */
271 TK_OP_ILLCHR, /* 25 - EM */
272 TK_OP_ILLCHR, /* 26 - SUB */
273 TK_OP_ILLCHR, /* 27 - ESC */
274 TK_OP_ILLCHR, /* 28 - FS */
275 TK_OP_ILLCHR, /* 29 - GS */
276 TK_OP_ILLCHR, /* 30 - RS */
277 TK_OP_ILLCHR, /* 31 - US */
278 TK_OP_WS, /* 32 - SP */
279 TK_OP_ID, /* 33 - ! */
280 TK_OP_SIMQUOTE, /* 34 - " */
281 TK_OP_CMT, /* 35 - # */
282 TK_OP_ID, /* 36 - $ */
283 TK_OP_ID, /* 37 - % */
284 TK_OP_ID, /* 38 - & */
285 TK_OP_ID, /* 39 - ' */
286 TK_OP_ID, /* 40 - ( */
287 TK_OP_ID, /* 41 - ) */
288 TK_OP_ID, /* 42 - * */
289 TK_OP_ID, /* 43 - + */
290 TK_OP_ID, /* 44 - , */
291 TK_DASH, /* 45 - - */
292 TK_OP_ID, /* 46 - . */
293 TK_OP_ID, /* 47 - / */
294 TK_OP_ID, /* 48 - 0 */
295 TK_OP_ID, /* 49 - 1 */
296 TK_OP_ID, /* 50 - 2 */
297 TK_OP_ID, /* 51 - 3 */
298 TK_OP_ID, /* 52 - 4 */
299 TK_OP_ID, /* 53 - 5 */
300 TK_OP_ID, /* 54 - 6 */
301 TK_OP_ID, /* 55 - 7 */
302 TK_OP_ID, /* 56 - 8 */
303 TK_OP_ID, /* 57 - 9 */
304 TK_COLON, /* 58 - : */
305 TK_SEMICOLON, /* 59 - ; */
306 TK_OP_ID, /* 60 - < */
307 TK_EQUAL, /* 61 - = */
308 TK_OP_ID, /* 62 - > */
309 TK_OP_ID, /* 63 - ? */
310 TK_ATSIGN, /* 64 - @ */
311 TK_OP_ID, /* 65 - A */
312 TK_OP_ID, /* 66 - B */
313 TK_OP_ID, /* 67 - C */
314 TK_OP_ID, /* 68 - D */
315 TK_OP_ID, /* 69 - E */
316 TK_OP_ID, /* 70 - F */
317 TK_OP_ID, /* 71 - G */
318 TK_OP_ID, /* 72 - H */
319 TK_OP_ID, /* 73 - I */
320 TK_OP_ID, /* 74 - J */
321 TK_OP_ID, /* 75 - K */
322 TK_OP_ID, /* 76 - L */
323 TK_OP_ID, /* 77 - M */
324 TK_OP_ID, /* 78 - N */
325 TK_OP_ID, /* 79 - O */
326 TK_OP_ID, /* 80 - P */
327 TK_OP_ID, /* 81 - Q */
328 TK_OP_ID, /* 82 - R */
329 TK_OP_ID, /* 83 - S */
330 TK_OP_ID, /* 84 - T */
331 TK_OP_ID, /* 85 - U */
332 TK_OP_ID, /* 86 - V */
333 TK_OP_ID, /* 87 - W */
334 TK_OP_ID, /* 88 - X */
335 TK_OP_ID, /* 89 - Y */
336 TK_OP_ID, /* 90 - Z */
337 TK_OP_ID, /* 91 - [ */
338 TK_OP_ID, /* 92 - \ */
339 TK_OP_ID, /* 93 - ] */
340 TK_OP_ID, /* 94 - ^ */
341 TK_OP_ID, /* 95 - _ */
342 TK_OP_ID, /* 96 - ` */
343 TK_OP_ID, /* 97 - a */
344 TK_OP_ID, /* 98 - b */
345 TK_OP_ID, /* 99 - c */
346 TK_OP_ID, /* 100 - d */
347 TK_OP_ID, /* 101 - e */
348 TK_OP_ID, /* 102 - f */
349 TK_OP_ID, /* 103 - g */
350 TK_OP_ID, /* 104 - h */
351 TK_OP_ID, /* 105 - i */
352 TK_OP_ID, /* 106 - j */
353 TK_OP_ID, /* 107 - k */
354 TK_OP_ID, /* 108 - l */
355 TK_OP_ID, /* 109 - m */
356 TK_OP_ID, /* 110 - n */
357 TK_OP_ID, /* 111 - o */
358 TK_OP_ID, /* 112 - p */
359 TK_OP_ID, /* 113 - q */
360 TK_OP_ID, /* 114 - r */
361 TK_OP_ID, /* 115 - s */
362 TK_OP_ID, /* 116 - t */
363 TK_OP_ID, /* 117 - u */
364 TK_OP_ID, /* 118 - v */
365 TK_OP_ID, /* 119 - w */
366 TK_OP_ID, /* 120 - x */
367 TK_OP_ID, /* 121 - y */
368 TK_OP_ID, /* 122 - z */
369 TK_LEFTBKT, /* 123 - { */
370 TK_PIPE, /* 124 - | */
371 TK_RIGHTBKT, /* 125 - } */
372 TK_OP_ID, /* 126 - ~ */
373 TK_OP_ILLCHR, /* 127 - DEL */
374 };
375
376 /*
377 * Version 2 syntax dispatch table for ld_map_gettoken(). For each of the
378 * 7-bit ASCII characters, determine how the lexical analyzer should behave.
379 *
380 * This table must be kept in sync with tkid_attr[] below.
381 *
382 * Identifier Note:
383 * We define a letter as being one of the character [A-Z], [a-z], or [_%/.]
384 * A digit is the numbers [0-9], or [$-]. An unquoted identifier is defined
385 * as a letter, followed by any number of letters or digits. This is a loosened
386 * version of the C definition of an identifier. The extra characters not
387 * allowed by C are common in section names and/or file paths.
388 */
389 static const mf_tokdisp_t gettok_dispatch_v2 = {
390 TK_OP_EOF, /* 0 - NUL */
391 TK_OP_ILLCHR, /* 1 - SOH */
392 TK_OP_ILLCHR, /* 2 - STX */
393 TK_OP_ILLCHR, /* 3 - ETX */
394 TK_OP_ILLCHR, /* 4 - EOT */
395 TK_OP_ILLCHR, /* 5 - ENQ */
396 TK_OP_ILLCHR, /* 6 - ACK */
397 TK_OP_ILLCHR, /* 7 - BEL */
398 TK_OP_ILLCHR, /* 8 - BS */
399 TK_OP_WS, /* 9 - HT */
400 TK_OP_NL, /* 10 - NL */
401 TK_OP_WS, /* 11 - VT */
402 TK_OP_WS, /* 12 - FF */
403 TK_OP_WS, /* 13 - CR */
404 TK_OP_ILLCHR, /* 14 - SO */
405 TK_OP_ILLCHR, /* 15 - SI */
406 TK_OP_ILLCHR, /* 16 - DLE */
407 TK_OP_ILLCHR, /* 17 - DC1 */
408 TK_OP_ILLCHR, /* 18 - DC2 */
409 TK_OP_ILLCHR, /* 19 - DC3 */
410 TK_OP_ILLCHR, /* 20 - DC4 */
411 TK_OP_ILLCHR, /* 21 - NAK */
412 TK_OP_ILLCHR, /* 22 - SYN */
413 TK_OP_ILLCHR, /* 23 - ETB */
414 TK_OP_ILLCHR, /* 24 - CAN */
415 TK_OP_ILLCHR, /* 25 - EM */
416 TK_OP_ILLCHR, /* 26 - SUB */
417 TK_OP_ILLCHR, /* 27 - ESC */
418 TK_OP_ILLCHR, /* 28 - FS */
419 TK_OP_ILLCHR, /* 29 - GS */
420 TK_OP_ILLCHR, /* 30 - RS */
421 TK_OP_ILLCHR, /* 31 - US */
422 TK_OP_WS, /* 32 - SP */
423 TK_BANG, /* 33 - ! */
424 TK_OP_CQUOTE, /* 34 - " */
425 TK_OP_CMT, /* 35 - # */
426 TK_OP_CDIR, /* 36 - $ */
427 TK_OP_ID, /* 37 - % */
428 TK_OP_BADCHR, /* 38 - & */
429 TK_OP_SIMQUOTE, /* 39 - ' */
430 TK_OP_BADCHR, /* 40 - ( */
431 TK_OP_BADCHR, /* 41 - ) */
432 TK_STAR, /* 42 - * */
433 TK_OP_CEQUAL, /* 43 - + */
434 TK_OP_BADCHR, /* 44 - , */
435 TK_OP_CEQUAL, /* 45 - - */
436 TK_OP_ID, /* 46 - . */
437 TK_OP_ID, /* 47 - / */
438 TK_OP_NUM, /* 48 - 0 */
439 TK_OP_NUM, /* 49 - 1 */
440 TK_OP_NUM, /* 50 - 2 */
441 TK_OP_NUM, /* 51 - 3 */
442 TK_OP_NUM, /* 52 - 4 */
443 TK_OP_NUM, /* 53 - 5 */
444 TK_OP_NUM, /* 54 - 6 */
445 TK_OP_NUM, /* 55 - 7 */
446 TK_OP_NUM, /* 56 - 8 */
447 TK_OP_NUM, /* 57 - 9 */
448 TK_COLON, /* 58 - : */
449 TK_SEMICOLON, /* 59 - ; */
450 TK_OP_BADCHR, /* 60 - < */
451 TK_EQUAL, /* 61 - = */
452 TK_OP_BADCHR, /* 62 - > */
453 TK_OP_BADCHR, /* 63 - ? */
454 TK_OP_BADCHR, /* 64 - @ */
455 TK_OP_ID, /* 65 - A */
456 TK_OP_ID, /* 66 - B */
457 TK_OP_ID, /* 67 - C */
458 TK_OP_ID, /* 68 - D */
459 TK_OP_ID, /* 69 - E */
460 TK_OP_ID, /* 70 - F */
461 TK_OP_ID, /* 71 - G */
462 TK_OP_ID, /* 72 - H */
463 TK_OP_ID, /* 73 - I */
464 TK_OP_ID, /* 74 - J */
465 TK_OP_ID, /* 75 - K */
466 TK_OP_ID, /* 76 - L */
467 TK_OP_ID, /* 77 - M */
468 TK_OP_ID, /* 78 - N */
469 TK_OP_ID, /* 79 - O */
470 TK_OP_ID, /* 80 - P */
471 TK_OP_ID, /* 81 - Q */
472 TK_OP_ID, /* 82 - R */
473 TK_OP_ID, /* 83 - S */
474 TK_OP_ID, /* 84 - T */
475 TK_OP_ID, /* 85 - U */
476 TK_OP_ID, /* 86 - V */
477 TK_OP_ID, /* 87 - W */
478 TK_OP_ID, /* 88 - X */
479 TK_OP_ID, /* 89 - Y */
480 TK_OP_ID, /* 90 - Z */
481 TK_LEFTSQR, /* 91 - [ */
482 TK_OP_BADCHR, /* 92 - \ */
483 TK_RIGHTSQR, /* 93 - ] */
484 TK_OP_BADCHR, /* 94 - ^ */
485 TK_OP_ID, /* 95 - _ */
486 TK_OP_BADCHR, /* 96 - ` */
487 TK_OP_ID, /* 97 - a */
488 TK_OP_ID, /* 98 - b */
489 TK_OP_ID, /* 99 - c */
490 TK_OP_ID, /* 100 - d */
491 TK_OP_ID, /* 101 - e */
492 TK_OP_ID, /* 102 - f */
493 TK_OP_ID, /* 103 - g */
494 TK_OP_ID, /* 104 - h */
495 TK_OP_ID, /* 105 - i */
496 TK_OP_ID, /* 106 - j */
497 TK_OP_ID, /* 107 - k */
498 TK_OP_ID, /* 108 - l */
499 TK_OP_ID, /* 109 - m */
500 TK_OP_ID, /* 110 - n */
501 TK_OP_ID, /* 111 - o */
502 TK_OP_ID, /* 112 - p */
503 TK_OP_ID, /* 113 - q */
504 TK_OP_ID, /* 114 - r */
505 TK_OP_ID, /* 115 - s */
506 TK_OP_ID, /* 116 - t */
507 TK_OP_ID, /* 117 - u */
508 TK_OP_ID, /* 118 - v */
509 TK_OP_ID, /* 119 - w */
510 TK_OP_ID, /* 120 - x */
511 TK_OP_ID, /* 121 - y */
512 TK_OP_ID, /* 122 - z */
513 TK_LEFTBKT, /* 123 - { */
514 TK_OP_BADCHR, /* 124 - | */
515 TK_RIGHTBKT, /* 125 - } */
516 TK_OP_BADCHR, /* 126 - ~ */
517 TK_OP_ILLCHR, /* 127 - DEL */
518 };
519
520
521 /*
522 * Table used to identify unquoted identifiers. Each element of this array
523 * contains a bitmask indicating whether the character it represents starts,
524 * or continues an identifier, for each supported mapfile syntax version.
525 */
526 static const char tkid_attr[128] = {
527 0, /* 0 - NUL */
528 TKID_ATTR_CONT(1), /* 1 - SOH */
529 TKID_ATTR_CONT(1), /* 2 - STX */
530 TKID_ATTR_CONT(1), /* 3 - ETX */
531 TKID_ATTR_CONT(1), /* 4 - EOT */
532 TKID_ATTR_CONT(1), /* 5 - ENQ */
533 TKID_ATTR_CONT(1), /* 6 - ACK */
534 TKID_ATTR_CONT(1), /* 7 - BEL */
535 TKID_ATTR_CONT(1), /* 8 - BS */
536 0, /* 9 - HT */
537 0, /* 10 - NL */
538 TKID_ATTR_CONT(1), /* 11 - VT */
539 TKID_ATTR_CONT(1), /* 12 - FF */
540 TKID_ATTR_CONT(1), /* 13 - CR */
541 TKID_ATTR_CONT(1), /* 14 - SO */
542 TKID_ATTR_CONT(1), /* 15 - SI */
543 TKID_ATTR_CONT(1), /* 16 - DLE */
544 TKID_ATTR_CONT(1), /* 17 - DC1 */
545 TKID_ATTR_CONT(1), /* 18 - DC2 */
546 TKID_ATTR_CONT(1), /* 19 - DC3 */
547 TKID_ATTR_CONT(1), /* 20 - DC4 */
548 TKID_ATTR_CONT(1), /* 21 - NAK */
549 TKID_ATTR_CONT(1), /* 22 - SYN */
550 TKID_ATTR_CONT(1), /* 23 - ETB */
551 TKID_ATTR_CONT(1), /* 24 - CAN */
552 TKID_ATTR_CONT(1), /* 25 - EM */
553 TKID_ATTR_CONT(1), /* 26 - SUB */
554 TKID_ATTR_CONT(1), /* 27 - ESC */
555 TKID_ATTR_CONT(1), /* 28 - FS */
556 TKID_ATTR_CONT(1), /* 29 - GS */
557 TKID_ATTR_CONT(1), /* 30 - RS */
558 TKID_ATTR_CONT(1), /* 31 - US */
559 0, /* 32 - SP */
560 TKID_ATTR(1), /* 33 - ! */
561 0, /* 34 - " */
562 0, /* 35 - # */
563 TKID_ATTR(1) | TKID_ATTR_CONT(2), /* 36 - $ */
564 TKID_ATTR(1) | TKID_ATTR_CONT(2), /* 37 - % */
565 TKID_ATTR(1), /* 38 - & */
566 TKID_ATTR(1), /* 39 - ' */
567 TKID_ATTR(1), /* 40 - ( */
568 TKID_ATTR(1), /* 41 - ) */
569 TKID_ATTR(1), /* 42 - * */
570 TKID_ATTR(1), /* 43 - + */
571 TKID_ATTR(1), /* 44 - , */
572 TKID_ATTR_CONT(1) | TKID_ATTR_CONT(2), /* 45 - - */
573 TKID_ATTR(1) | TKID_ATTR(2), /* 46 - . */
574 TKID_ATTR(1) | TKID_ATTR(2), /* 47 - / */
575 TKID_ATTR(1) | TKID_ATTR_CONT(2), /* 48 - 0 */
576 TKID_ATTR(1) | TKID_ATTR_CONT(2), /* 49 - 1 */
577 TKID_ATTR(1) | TKID_ATTR_CONT(2), /* 50 - 2 */
578 TKID_ATTR(1) | TKID_ATTR_CONT(2), /* 51 - 3 */
579 TKID_ATTR(1) | TKID_ATTR_CONT(2), /* 52 - 4 */
580 TKID_ATTR(1) | TKID_ATTR_CONT(2), /* 53 - 5 */
581 TKID_ATTR(1) | TKID_ATTR_CONT(2), /* 54 - 6 */
582 TKID_ATTR(1) | TKID_ATTR_CONT(2), /* 55 - 7 */
583 TKID_ATTR(1) | TKID_ATTR_CONT(2), /* 56 - 8 */
584 TKID_ATTR(1) | TKID_ATTR_CONT(2), /* 57 - 9 */
585 0, /* 58 - : */
586 0, /* 59 - ; */
587 TKID_ATTR(1), /* 60 - < */
588 0, /* 61 - = */
589 TKID_ATTR(1), /* 62 - > */
590 TKID_ATTR(1), /* 63 - ? */
591 TKID_ATTR_CONT(1), /* 64 - @ */
592 TKID_ATTR(1) | TKID_ATTR(2), /* 65 - A */
593 TKID_ATTR(1) | TKID_ATTR(2), /* 66 - B */
594 TKID_ATTR(1) | TKID_ATTR(2), /* 67 - C */
595 TKID_ATTR(1) | TKID_ATTR(2), /* 68 - D */
596 TKID_ATTR(1) | TKID_ATTR(2), /* 69 - E */
597 TKID_ATTR(1) | TKID_ATTR(2), /* 70 - F */
598 TKID_ATTR(1) | TKID_ATTR(2), /* 71 - G */
599 TKID_ATTR(1) | TKID_ATTR(2), /* 72 - H */
600 TKID_ATTR(1) | TKID_ATTR(2), /* 73 - I */
601 TKID_ATTR(1) | TKID_ATTR(2), /* 74 - J */
602 TKID_ATTR(1) | TKID_ATTR(2), /* 75 - K */
603 TKID_ATTR(1) | TKID_ATTR(2), /* 76 - L */
604 TKID_ATTR(1) | TKID_ATTR(2), /* 77 - M */
605 TKID_ATTR(1) | TKID_ATTR(2), /* 78 - N */
606 TKID_ATTR(1) | TKID_ATTR(2), /* 79 - O */
607 TKID_ATTR(1) | TKID_ATTR(2), /* 80 - P */
608 TKID_ATTR(1) | TKID_ATTR(2), /* 81 - Q */
609 TKID_ATTR(1) | TKID_ATTR(2), /* 82 - R */
610 TKID_ATTR(1) | TKID_ATTR(2), /* 83 - S */
611 TKID_ATTR(1) | TKID_ATTR(2), /* 84 - T */
612 TKID_ATTR(1) | TKID_ATTR(2), /* 85 - U */
613 TKID_ATTR(1) | TKID_ATTR(2), /* 86 - V */
614 TKID_ATTR(1) | TKID_ATTR(2), /* 87 - W */
615 TKID_ATTR(1) | TKID_ATTR(2), /* 88 - X */
616 TKID_ATTR(1) | TKID_ATTR(2), /* 89 - Y */
617 TKID_ATTR(1) | TKID_ATTR(2), /* 90 - Z */
618 TKID_ATTR(1), /* 91 - [ */
619 TKID_ATTR(1), /* 92 - \ */
620 TKID_ATTR(1), /* 93 - ] */
621 TKID_ATTR(1), /* 94 - ^ */
622 TKID_ATTR(1) | TKID_ATTR(2), /* 95 - _ */
623 TKID_ATTR(1), /* 96 - ` */
624 TKID_ATTR(1) | TKID_ATTR(2), /* 97 - a */
625 TKID_ATTR(1) | TKID_ATTR(2), /* 98 - b */
626 TKID_ATTR(1) | TKID_ATTR(2), /* 99 - c */
627 TKID_ATTR(1) | TKID_ATTR(2), /* 100 - d */
628 TKID_ATTR(1) | TKID_ATTR(2), /* 101 - e */
629 TKID_ATTR(1) | TKID_ATTR(2), /* 102 - f */
630 TKID_ATTR(1) | TKID_ATTR(2), /* 103 - g */
631 TKID_ATTR(1) | TKID_ATTR(2), /* 104 - h */
632 TKID_ATTR(1) | TKID_ATTR(2), /* 105 - i */
633 TKID_ATTR(1) | TKID_ATTR(2), /* 106 - j */
634 TKID_ATTR(1) | TKID_ATTR(2), /* 107 - k */
635 TKID_ATTR(1) | TKID_ATTR(2), /* 108 - l */
636 TKID_ATTR(1) | TKID_ATTR(2), /* 109 - m */
637 TKID_ATTR(1) | TKID_ATTR(2), /* 110 - n */
638 TKID_ATTR(1) | TKID_ATTR(2), /* 111 - o */
639 TKID_ATTR(1) | TKID_ATTR(2), /* 112 - p */
640 TKID_ATTR(1) | TKID_ATTR(2), /* 113 - q */
641 TKID_ATTR(1) | TKID_ATTR(2), /* 114 - r */
642 TKID_ATTR(1) | TKID_ATTR(2), /* 115 - s */
643 TKID_ATTR(1) | TKID_ATTR(2), /* 116 - t */
644 TKID_ATTR(1) | TKID_ATTR(2), /* 117 - u */
645 TKID_ATTR(1) | TKID_ATTR(2), /* 118 - v */
646 TKID_ATTR(1) | TKID_ATTR(2), /* 119 - w */
647 TKID_ATTR(1) | TKID_ATTR(2), /* 120 - x */
648 TKID_ATTR(1) | TKID_ATTR(2), /* 121 - y */
649 TKID_ATTR(1) | TKID_ATTR(2), /* 122 - z */
650 TKID_ATTR_CONT(1), /* 123 - { */
651 TKID_ATTR_CONT(1), /* 124 - | */
652 TKID_ATTR_CONT(1), /* 125 - } */
653 TKID_ATTR(1), /* 126 - ~ */
654 TKID_ATTR_CONT(1), /* 127 - DEL */
655 };
656
657
658 /*
659 * Advance the given string pointer to the next newline character,
660 * or the terminating NULL if there is none.
661 */
662 inline static void
advance_to_eol(char ** str)663 advance_to_eol(char **str)
664 {
665 char *s = *str;
666
667 while ((*s != '\n') && (*s != '\0'))
668 s++;
669 *str = s;
670 }
671
672 /*
673 * Insert a NULL patch at the given address
674 */
675 inline static void
null_patch_set(char * str,ld_map_npatch_t * np)676 null_patch_set(char *str, ld_map_npatch_t *np)
677 {
678 np->np_ptr = str;
679 np->np_ch = *str;
680 *str = '\0';
681 }
682
683 /*
684 * Undo a NULL patch
685 */
686 inline static void
null_patch_undo(ld_map_npatch_t * np)687 null_patch_undo(ld_map_npatch_t *np)
688 {
689 *np->np_ptr = np->np_ch;
690 }
691
692 /*
693 * Insert a NULL patch at the end of the line containing str.
694 */
695 static void
null_patch_eol(char * str,ld_map_npatch_t * np)696 null_patch_eol(char *str, ld_map_npatch_t *np)
697 {
698 advance_to_eol(&str);
699 null_patch_set(str, np);
700 }
701
702 /*
703 * Locate the end of an unquoted identifier.
704 *
705 * entry:
706 * mf - Mapfile descriptor, positioned to first character
707 * of identifier.
708 *
709 * exit:
710 * If the item pointed at by mf is not an identifier, returns NULL.
711 * Otherwise, returns pointer to character after the last character
712 * of the identifier.
713 */
714 inline static char *
ident_delimit(Mapfile * mf)715 ident_delimit(Mapfile *mf)
716 {
717 char *str = mf->mf_next;
718 ld_map_npatch_t np;
719 int c = *str++;
720
721 /* If not a valid start character, report the error */
722 if ((c & 0x80) || !(tkid_attr[c] & mf->mf_tkid_start)) {
723 null_patch_set(str, &np);
724 mf_fatal(mf, MSG_INTL(MSG_MAP_BADCHAR), str);
725 null_patch_undo(&np);
726 return (NULL);
727 }
728
729 /* Keep going until we hit a non-continuing character */
730 for (c = *str; !(c & 0x80) && (tkid_attr[c] & mf->mf_tkid_cont);
731 c = *++str)
732 ;
733
734 return (str);
735 }
736
737 /*
738 * Allocate memory for a stack.
739 *
740 * entry:
741 * stack - Pointer to stack for which memory is required, cast
742 * to the generic stack type.
743 * n_default - Size to use for initial allocation.
744 * elt_size - sizeof(elt), where elt is the actual stack data type.
745 *
746 * exit:
747 * Returns (1) on success. On error (memory allocation), a message
748 * is printed and False (0) is returned.
749 *
750 * note:
751 * The caller casts the pointer to their actual datatype-specific stack
752 * to be a (generic_stack_t *). The C language will give all stack
753 * structs the same size and layout as long as the underlying platform
754 * uses a single integral type for pointers. Hence, this cast is safe,
755 * and lets a generic routine modify data-specific types without being
756 * aware of those types.
757 */
758 static Boolean
stack_resize(generic_stack_t * stack,size_t n_default,size_t elt_size)759 stack_resize(generic_stack_t *stack, size_t n_default, size_t elt_size)
760 {
761 size_t new_n_alloc;
762 void *newaddr;
763
764 /* Use initial size first, and double the allocation on each call */
765 new_n_alloc = (stack->stk_n_alloc == 0) ?
766 n_default : (stack->stk_n_alloc * 2);
767
768 newaddr = libld_realloc(stack->stk_s, new_n_alloc * elt_size);
769 if (newaddr == NULL)
770 return (FALSE);
771
772 stack->stk_s = newaddr;
773 stack->stk_n_alloc = new_n_alloc;
774 return (TRUE);
775 }
776
777 /*
778 * AVL comparison function for cexp_id_node_t items.
779 *
780 * entry:
781 * n1, n2 - pointers to nodes to be compared
782 *
783 * exit:
784 * Returns -1 if (n1 < n2), 0 if they are equal, and 1 if (n1 > n2)
785 */
786 static int
cexp_ident_cmp(const void * n1,const void * n2)787 cexp_ident_cmp(const void *n1, const void *n2)
788 {
789 int rc;
790
791 rc = strcmp(((cexp_id_node_t *)n1)->ceid_name,
792 ((cexp_id_node_t *)n2)->ceid_name);
793
794 if (rc > 0)
795 return (1);
796 if (rc < 0)
797 return (-1);
798 return (0);
799 }
800
801
802 /*
803 * Returns True (1) if name is in the conditional expression identifier
804 * AVL tree, and False (0) otherwise.
805 */
806 static int
cexp_ident_test(const char * name)807 cexp_ident_test(const char *name)
808 {
809 cexp_id_node_t node;
810
811 node.ceid_name = name;
812 return (avl_find(lms.lms_cexp_id, &node, 0) != NULL);
813 }
814
815 /*
816 * Add a new boolean identifier to the conditional expression identifier
817 * AVL tree.
818 *
819 * entry:
820 * mf - If non-NULL, the mapfile descriptor for the mapfile
821 * containing the $add directive. NULL if this is an
822 * initialization call.
823 * name - Name of identifier. Must point at stable storage that will
824 * not be moved or modified by the caller following this call.
825 *
826 * exit:
827 * On success, True (1) is returned and name has been entered.
828 * On failure, False (0) is returned and an error has been printed.
829 */
830 static int
cexp_ident_add(Mapfile * mf,const char * name)831 cexp_ident_add(Mapfile *mf, const char *name)
832 {
833 cexp_id_node_t *node;
834
835 if (mf != NULL) {
836 DBG_CALL(Dbg_map_cexp_id(mf->mf_ofl->ofl_lml, 1,
837 mf->mf_name, mf->mf_lineno, name));
838
839 /* If is already known, don't do it again */
840 if (cexp_ident_test(name))
841 return (1);
842 }
843
844 if ((node = libld_calloc(1, sizeof (*node))) == NULL)
845 return (0);
846 node->ceid_name = name;
847 avl_add(lms.lms_cexp_id, node);
848 return (1);
849 }
850
851 /*
852 * Remove a boolean identifier from the conditional expression identifier
853 * AVL tree.
854 *
855 * entry:
856 * mf - Mapfile descriptor
857 * name - Name of identifier.
858 *
859 * exit:
860 * If the name was in the tree, it has been removed. If not,
861 * then this routine quietly returns.
862 */
863 static void
cexp_ident_clear(Mapfile * mf,const char * name)864 cexp_ident_clear(Mapfile *mf, const char *name)
865 {
866 cexp_id_node_t node;
867 cexp_id_node_t *real_node;
868
869 DBG_CALL(Dbg_map_cexp_id(mf->mf_ofl->ofl_lml, 0,
870 mf->mf_name, mf->mf_lineno, name));
871
872 node.ceid_name = name;
873 real_node = avl_find(lms.lms_cexp_id, &node, 0);
874 if (real_node != NULL)
875 avl_remove(lms.lms_cexp_id, real_node);
876 }
877
878 /*
879 * Initialize the AVL tree that holds the names of the currently defined
880 * boolean identifiers for conditional expressions ($if/$elif).
881 *
882 * entry:
883 * ofl - Output file descriptor
884 *
885 * exit:
886 * On success, TRUE (1) is returned and lms.lms_cexp_id is ready for use.
887 * On failure, FALSE (0) is returned.
888 */
889 static Boolean
cexp_ident_init(void)890 cexp_ident_init(void)
891 {
892 /* If already done, use it */
893 if (lms.lms_cexp_id != NULL)
894 return (TRUE);
895
896 lms.lms_cexp_id = libld_calloc(1, sizeof (*lms.lms_cexp_id));
897 if (lms.lms_cexp_id == NULL)
898 return (FALSE);
899 avl_create(lms.lms_cexp_id, cexp_ident_cmp, sizeof (cexp_id_node_t),
900 SGSOFFSETOF(cexp_id_node_t, ceid_avlnode));
901
902
903 /* ELFCLASS */
904 if (cexp_ident_add(NULL, (ld_targ.t_m.m_class == ELFCLASS32) ?
905 MSG_ORIG(MSG_STR_UELF32) : MSG_ORIG(MSG_STR_UELF64)) == 0)
906 return (FALSE);
907
908 /* Machine */
909 switch (ld_targ.t_m.m_mach) {
910 case EM_386:
911 case EM_AMD64:
912 if (cexp_ident_add(NULL, MSG_ORIG(MSG_STR_UX86)) == 0)
913 return (FALSE);
914 break;
915
916 case EM_SPARC:
917 case EM_SPARCV9:
918 if (cexp_ident_add(NULL, MSG_ORIG(MSG_STR_USPARC)) == 0)
919 return (FALSE);
920 break;
921 }
922
923 /* true is always defined */
924 if (cexp_ident_add(NULL, MSG_ORIG(MSG_STR_TRUE)) == 0)
925 return (FALSE);
926
927 return (TRUE);
928 }
929
930 /*
931 * Validate the string starting at mf->mf_next as being a
932 * boolean conditional expression identifier.
933 *
934 * entry:
935 * mf - Mapfile descriptor
936 * len - NULL, or address of variable to receive strlen() of identifier
937 * directive - If (len == NULL), string giving name of directive being
938 * processed. Ignored if (len != NULL).
939 *
940 * exit:
941 * On success:
942 * - If len is NULL, a NULL is inserted following the final
943 * character of the identifier, and the remainder of the string
944 * is tested to ensure it is empty, or only contains whitespace.
945 * - If len is non-NULL, *len is set to the number of characters
946 * in the identifier, and the rest of the string is not modified.
947 * - TRUE (1) is returned
948 *
949 * On failure, returns FALSE (0).
950 */
951 static Boolean
cexp_ident_validate(Mapfile * mf,size_t * len,const char * directive)952 cexp_ident_validate(Mapfile *mf, size_t *len, const char *directive)
953 {
954 char *tail;
955
956 if ((tail = ident_delimit(mf)) == NULL)
957 return (FALSE);
958
959 /*
960 * If len is non-NULL, we simple count the number of characters
961 * consumed by the identifier and are done. If len is NULL, then
962 * ensure there's nothing left but whitespace, and NULL terminate
963 * the identifier to remove it.
964 */
965 if (len != NULL) {
966 *len = tail - mf->mf_next;
967 } else if (*tail != '\0') {
968 *tail++ = '\0';
969 while (isspace(*tail))
970 tail++;
971 if (*tail != '\0') {
972 mf_fatal(mf, MSG_INTL(MSG_MAP_BADEXTRA), directive);
973 return (FALSE);
974 }
975 }
976
977 return (TRUE);
978 }
979
980 /*
981 * Push a new operator onto the conditional expression operator stack.
982 *
983 * entry:
984 * mf - Mapfile descriptor
985 * op - Operator to push
986 *
987 * exit:
988 * On success, TRUE (1) is returned, otherwise FALSE (0).
989 */
990 static Boolean
cexp_push_op(cexp_op_t op)991 cexp_push_op(cexp_op_t op)
992 {
993 if (STACK_RESERVE(lms.lms_cexp_op_stack, CEXP_OP_STACK_INIT) == 0)
994 return (FALSE);
995
996 STACK_PUSH(lms.lms_cexp_op_stack) = op;
997 return (TRUE);
998 }
999
1000 /*
1001 * Evaluate the basic operator (non-paren) at the top of lms.lms_cexp_op_stack,
1002 * and push the results on lms.lms_cexp_val_stack.
1003 *
1004 * exit:
1005 * On success, returns TRUE (1). On error, FALSE (0) is returned,
1006 * and the caller is responsible for issuing the error.
1007 */
1008 static Boolean
cexp_eval_op(void)1009 cexp_eval_op(void)
1010 {
1011 cexp_op_t op;
1012 uchar_t val;
1013
1014 op = STACK_POP(lms.lms_cexp_op_stack);
1015 switch (op) {
1016 case CEXP_OP_AND:
1017 if (lms.lms_cexp_val_stack.stk_n < 2)
1018 return (FALSE);
1019 val = STACK_POP(lms.lms_cexp_val_stack);
1020 STACK_TOP(lms.lms_cexp_val_stack) = val &&
1021 STACK_TOP(lms.lms_cexp_val_stack);
1022 break;
1023
1024 case CEXP_OP_OR:
1025 if (lms.lms_cexp_val_stack.stk_n < 2)
1026 return (FALSE);
1027 val = STACK_POP(lms.lms_cexp_val_stack);
1028 STACK_TOP(lms.lms_cexp_val_stack) = val ||
1029 STACK_TOP(lms.lms_cexp_val_stack);
1030 break;
1031
1032 case CEXP_OP_NEG:
1033 if (lms.lms_cexp_val_stack.stk_n < 1)
1034 return (FALSE);
1035 STACK_TOP(lms.lms_cexp_val_stack) =
1036 !STACK_TOP(lms.lms_cexp_val_stack);
1037 break;
1038 default:
1039 return (FALSE);
1040 }
1041
1042 return (TRUE);
1043 }
1044
1045 /*
1046 * Evaluate an expression for a $if/$elif control directive.
1047 *
1048 * entry:
1049 * mf - Mapfile descriptor for NULL terminated string
1050 * containing the expression.
1051 *
1052 * exit:
1053 * The contents of str are modified by this routine.
1054 * One of the following values are returned:
1055 * -1 Syntax error encountered (an error is printed)
1056 * 0 The expression evaluates to False
1057 * 1 The expression evaluates to True.
1058 *
1059 * note:
1060 * A simplified version of Dijkstra's Shunting Yard algorithm is used
1061 * to convert this syntax into postfix form and then evaluate it.
1062 * Our version has no functions and a tiny set of operators.
1063 *
1064 * The expressions consist of boolean identifiers, which can be
1065 * combined using the following operators, listed from highest
1066 * precedence to least:
1067 *
1068 * Operator Meaning
1069 * -------------------------------------------------
1070 * (expr) sub-expression, non-associative
1071 * ! logical negation, prefix, left associative
1072 * && || logical and/or, binary, left associative
1073 *
1074 * The operands manipulated by these operators are names, consisting of
1075 * a sequence of letters and digits. The first character must be a letter.
1076 * Underscore (_) and period (.) are also considered to be characters.
1077 * An operand is considered True if it is found in our set of known
1078 * names (lms.lms_cexp_id), and False otherwise.
1079 *
1080 * The Shunting Yard algorithm works using two stacks, one for operators,
1081 * and a second for operands. The infix input expression is tokenized from
1082 * left to right and processed in order. Issues of associativity and
1083 * precedence are managed by reducing (poping and evaluating) items with
1084 * higer precedence before pushing additional tokens with lower precedence.
1085 */
1086 static int
cexp_eval_expr(Mapfile * mf)1087 cexp_eval_expr(Mapfile *mf)
1088 {
1089 char *ident;
1090 size_t len;
1091 cexp_op_t new_op = CEXP_OP_AND; /* to catch binop at start */
1092 ld_map_npatch_t np;
1093 char *str = mf->mf_next;
1094
1095 STACK_RESET(lms.lms_cexp_op_stack);
1096 STACK_RESET(lms.lms_cexp_val_stack);
1097
1098 for (; *str; str++) {
1099
1100 /* Skip whitespace */
1101 while (isspace(*str))
1102 str++;
1103 if (!*str)
1104 break;
1105
1106 switch (*str) {
1107 case '&':
1108 case '|':
1109 if (*(str + 1) != *str)
1110 goto token_error;
1111 if ((new_op != CEXP_OP_NONE) &&
1112 (new_op != CEXP_OP_CPAR)) {
1113 mf_fatal0(mf, MSG_INTL(MSG_MAP_CEXP_BADOPUSE));
1114 return (-1);
1115 }
1116 str++;
1117
1118 /*
1119 * As this is a left associative binary operator, we
1120 * need to process all operators of equal or higher
1121 * precedence before pushing the new operator.
1122 */
1123 while (!STACK_IS_EMPTY(lms.lms_cexp_op_stack)) {
1124 cexp_op_t op = STACK_TOP(lms.lms_cexp_op_stack);
1125
1126
1127 if ((op != CEXP_OP_AND) && (op != CEXP_OP_OR) &&
1128 (op != CEXP_OP_NEG))
1129 break;
1130
1131 if (!cexp_eval_op())
1132 goto semantic_error;
1133 }
1134
1135 new_op = (*str == '&') ? CEXP_OP_AND : CEXP_OP_OR;
1136 if (!cexp_push_op(new_op))
1137 return (-1);
1138 break;
1139
1140 case '!':
1141 new_op = CEXP_OP_NEG;
1142 if (!cexp_push_op(new_op))
1143 return (-1);
1144 break;
1145
1146 case '(':
1147 new_op = CEXP_OP_OPAR;
1148 if (!cexp_push_op(new_op))
1149 return (-1);
1150 break;
1151
1152 case ')':
1153 new_op = CEXP_OP_CPAR;
1154
1155 /* Evaluate the operator stack until reach '(' */
1156 while (!STACK_IS_EMPTY(lms.lms_cexp_op_stack) &&
1157 (STACK_TOP(lms.lms_cexp_op_stack) != CEXP_OP_OPAR))
1158 if (!cexp_eval_op())
1159 goto semantic_error;
1160
1161 /*
1162 * If the top of operator stack is not an open paren,
1163 * when we have an error. In this case, the operator
1164 * stack will be empty due to the loop above.
1165 */
1166 if (STACK_IS_EMPTY(lms.lms_cexp_op_stack))
1167 goto unbalpar_error;
1168 lms.lms_cexp_op_stack.stk_n--; /* Pop OPAR */
1169 break;
1170
1171 default:
1172 /* Ensure there's room to push another operand */
1173 if (STACK_RESERVE(lms.lms_cexp_val_stack,
1174 CEXP_VAL_STACK_INIT) == 0)
1175 return (0);
1176 new_op = CEXP_OP_NONE;
1177
1178 /*
1179 * Operands cannot be numbers. However, we accept two
1180 * special cases: '0' means false, and '1' is true.
1181 * This is done to support the common C idiom of
1182 * '#if 1' and '#if 0' to conditionalize code under
1183 * development.
1184 */
1185 if ((*str == '0') || (*str == '1')) {
1186 STACK_PUSH(lms.lms_cexp_val_stack) =
1187 (*str == '1');
1188 break;
1189 }
1190
1191 /* Look up the identifier */
1192 ident = mf->mf_next = str;
1193 if (!cexp_ident_validate(mf, &len, NULL))
1194 return (-1);
1195 str += len - 1; /* loop will advance past final ch */
1196 null_patch_set(&ident[len], &np);
1197 STACK_PUSH(lms.lms_cexp_val_stack) =
1198 cexp_ident_test(ident);
1199 null_patch_undo(&np);
1200
1201 break;
1202 }
1203 }
1204
1205 /* Evaluate the operator stack until empty */
1206 while (!STACK_IS_EMPTY(lms.lms_cexp_op_stack)) {
1207 if (STACK_TOP(lms.lms_cexp_op_stack) == CEXP_OP_OPAR)
1208 goto unbalpar_error;
1209
1210 if (!cexp_eval_op())
1211 goto semantic_error;
1212 }
1213
1214 /* There should be exactly one value left */
1215 if (lms.lms_cexp_val_stack.stk_n != 1)
1216 goto semantic_error;
1217
1218 /* Final value is the result */
1219 return (lms.lms_cexp_val_stack.stk_s[0]);
1220
1221 /* Errors issued more than once are handled below, accessed via goto */
1222
1223 token_error: /* unexpected characters in input stream */
1224 mf_fatal(mf, MSG_INTL(MSG_MAP_CEXP_TOKERR), str);
1225 return (-1);
1226
1227 semantic_error: /* valid tokens, but in invalid arrangement */
1228 mf_fatal0(mf, MSG_INTL(MSG_MAP_CEXP_SEMERR));
1229 return (-1);
1230
1231 unbalpar_error: /* Extra or missing parenthesis */
1232 mf_fatal0(mf, MSG_INTL(MSG_MAP_CEXP_UNBALPAR));
1233 return (-1);
1234 }
1235
1236 /*
1237 * Process a mapfile control directive. These directives start with
1238 * the dollar character, and are used to manage details of the mapfile
1239 * itself, such as version and conditional input.
1240 *
1241 * entry:
1242 * mf - Mapfile descriptor
1243 *
1244 * exit:
1245 * Returns TRUE (1) for success, and FALSE (0) on error. In the
1246 * error case, a descriptive error is issued.
1247 */
1248 static Boolean
cdir_process(Mapfile * mf)1249 cdir_process(Mapfile *mf)
1250 {
1251 typedef enum { /* Directive types */
1252 CDIR_T_UNKNOWN = 0, /* Unrecognized control directive */
1253 CDIR_T_ADD, /* $add */
1254 CDIR_T_CLEAR, /* $clear */
1255 CDIR_T_ERROR, /* $error */
1256 CDIR_T_VERSION, /* $mapfile_version */
1257 CDIR_T_IF, /* $if */
1258 CDIR_T_ELIF, /* $elif */
1259 CDIR_T_ELSE, /* $else */
1260 CDIR_T_ENDIF, /* $endif */
1261 } cdir_t;
1262
1263 typedef enum { /* Types of arguments accepted by directives */
1264 ARG_T_NONE, /* Directive takes no arguments */
1265 ARG_T_EXPR, /* Directive takes a conditional expression */
1266 ARG_T_ID, /* Conditional expression identifier */
1267 ARG_T_STR, /* Non-empty string */
1268 ARG_T_IGN /* Ignore the argument */
1269 } cdir_arg_t;
1270
1271 typedef struct {
1272 const char *md_name; /* Directive name */
1273 size_t md_size; /* strlen(md_name) */
1274 cdir_arg_t md_arg; /* Type of arguments */
1275 cdir_t md_op; /* CDIR_T_ code */
1276 } cdir_match_t;
1277
1278 /* Control Directives: The most likely items are listed first */
1279 static cdir_match_t match_data[] = {
1280 { MSG_ORIG(MSG_STR_CDIR_IF), MSG_STR_CDIR_IF_SIZE,
1281 ARG_T_EXPR, CDIR_T_IF },
1282 { MSG_ORIG(MSG_STR_CDIR_ENDIF), MSG_STR_CDIR_ENDIF_SIZE,
1283 ARG_T_NONE, CDIR_T_ENDIF },
1284 { MSG_ORIG(MSG_STR_CDIR_ELSE), MSG_STR_CDIR_ELSE_SIZE,
1285 ARG_T_NONE, CDIR_T_ELSE },
1286 { MSG_ORIG(MSG_STR_CDIR_ELIF), MSG_STR_CDIR_ELIF_SIZE,
1287 ARG_T_EXPR, CDIR_T_ELIF },
1288 { MSG_ORIG(MSG_STR_CDIR_ERROR), MSG_STR_CDIR_ERROR_SIZE,
1289 ARG_T_STR, CDIR_T_ERROR },
1290 { MSG_ORIG(MSG_STR_CDIR_ADD), MSG_STR_CDIR_ADD_SIZE,
1291 ARG_T_ID, CDIR_T_ADD },
1292 { MSG_ORIG(MSG_STR_CDIR_CLEAR), MSG_STR_CDIR_CLEAR_SIZE,
1293 ARG_T_ID, CDIR_T_CLEAR },
1294 { MSG_ORIG(MSG_STR_CDIR_MFVER), MSG_STR_CDIR_MFVER_SIZE,
1295 ARG_T_IGN, CDIR_T_VERSION },
1296
1297 { NULL, 0,
1298 ARG_T_IGN, CDIR_T_UNKNOWN }
1299 };
1300
1301 cdir_match_t *mdptr;
1302 char *tail;
1303 int expr_eval; /* Result of evaluating ARG_T_EXPR */
1304 Mapfile arg_mf;
1305 cdir_level_t *level;
1306 int pass, parent_pass; /* Currently accepting input */
1307
1308 restart:
1309 /* Is the immediate context passing input? */
1310 pass = STACK_IS_EMPTY(lms.lms_cdir_stack) ||
1311 STACK_TOP(lms.lms_cdir_stack).cdl_pass;
1312
1313 /* Is the surrounding (parent) context passing input? */
1314 parent_pass = (lms.lms_cdir_stack.stk_n <= 1) ||
1315 lms.lms_cdir_stack.stk_s[lms.lms_cdir_stack.stk_n - 2].cdl_pass;
1316
1317
1318 for (mdptr = match_data; mdptr->md_name; mdptr++) {
1319 /* Prefix must match, or we move on */
1320 if (strncmp(mf->mf_next, mdptr->md_name,
1321 mdptr->md_size) != 0)
1322 continue;
1323 tail = mf->mf_next + mdptr->md_size;
1324
1325 /*
1326 * If there isn't whitespace, or a NULL terminator following
1327 * the prefix, then even though our prefix matched, the actual
1328 * token is longer, and we don't have a match.
1329 */
1330 if (!isspace(*tail) && (*tail != '\0'))
1331 continue;
1332
1333 /* We have matched a valid control directive */
1334 break;
1335 }
1336
1337 /* Advance input to end of the current line */
1338 advance_to_eol(&mf->mf_next);
1339
1340 /*
1341 * Set up a temporary mapfile descriptor to reference the
1342 * argument string. The benefit of this second block, is that
1343 * we can advance the real one to the next line now, which allows
1344 * us to return at any time knowing that the input has been moved
1345 * to the proper spot. This simplifies the error cases.
1346 *
1347 * If we had a match, tail points at the start of the string.
1348 * Otherwise, we want to point at the end of the line.
1349 */
1350 arg_mf = *mf;
1351 if (mdptr->md_name == NULL)
1352 arg_mf.mf_text = arg_mf.mf_next;
1353 else
1354 arg_mf.mf_text = arg_mf.mf_next = tail;
1355
1356 /*
1357 * Null terminate the arguments, and advance the main mapfile
1358 * state block to the next line.
1359 */
1360 if (*mf->mf_next == '\n') {
1361 *mf->mf_next++ = '\0';
1362 mf->mf_lineno++;
1363 }
1364
1365 /* Skip leading whitespace to arguments */
1366 while (isspace(*arg_mf.mf_next))
1367 arg_mf.mf_next++;
1368
1369 /* Strip off any comment present on the line */
1370 for (tail = arg_mf.mf_next; *tail; tail++)
1371 if (*tail == '#') {
1372 *tail = '\0';
1373 break;
1374 }
1375
1376 /*
1377 * Process the arguments as necessary depending on their type.
1378 * If this control directive is nested inside a surrounding context
1379 * that is not currently passing text, then we skip the argument
1380 * evaluation. This follows the behavior of the C preprocessor,
1381 * which only examines enough to detect the operation within
1382 * a disabled section, without issuing errors about the arguments.
1383 */
1384 if (pass || (parent_pass && (mdptr->md_op == CDIR_T_ELIF))) {
1385 switch (mdptr->md_arg) {
1386 case ARG_T_NONE:
1387 if (*arg_mf.mf_next == '\0')
1388 break;
1389 /* Args are present, but not wanted */
1390 mf_fatal(&arg_mf, MSG_INTL(MSG_MAP_CDIR_REQNOARG),
1391 mdptr->md_name);
1392 return (FALSE);
1393
1394 case ARG_T_EXPR:
1395 /* Ensure that arguments are present */
1396 if (*arg_mf.mf_next == '\0')
1397 goto error_reqarg;
1398 expr_eval = cexp_eval_expr(&arg_mf);
1399 if (expr_eval == -1)
1400 return (FALSE);
1401 break;
1402
1403 case ARG_T_ID:
1404 /* Ensure that arguments are present */
1405 if (*arg_mf.mf_next == '\0')
1406 goto error_reqarg;
1407 if (!cexp_ident_validate(&arg_mf, NULL,
1408 mdptr->md_name))
1409 return (FALSE);
1410 break;
1411
1412 case ARG_T_STR:
1413 /* Ensure that arguments are present */
1414 if (*arg_mf.mf_next == '\0')
1415 goto error_reqarg;
1416 /* Remove trailing whitespace */
1417 tail = arg_mf.mf_next + strlen(arg_mf.mf_next);
1418 while ((tail > arg_mf.mf_next) &&
1419 isspace(*(tail -1)))
1420 tail--;
1421 *tail = '\0';
1422 break;
1423 }
1424 }
1425
1426 /*
1427 * Carry out the specified control directive:
1428 */
1429 if (!STACK_IS_EMPTY(lms.lms_cdir_stack))
1430 level = &STACK_TOP(lms.lms_cdir_stack);
1431
1432 switch (mdptr->md_op) {
1433 case CDIR_T_UNKNOWN: /* Unrecognized control directive */
1434 if (!pass)
1435 break;
1436 mf_fatal0(&arg_mf, MSG_INTL(MSG_MAP_CDIR_BAD));
1437 return (FALSE);
1438
1439 case CDIR_T_ADD:
1440 if (pass && !cexp_ident_add(&arg_mf, arg_mf.mf_next))
1441 return (FALSE);
1442 break;
1443
1444 case CDIR_T_CLEAR:
1445 if (pass)
1446 cexp_ident_clear(&arg_mf, arg_mf.mf_next);
1447 break;
1448
1449 case CDIR_T_ERROR:
1450 if (!pass)
1451 break;
1452 mf_fatal(&arg_mf, MSG_INTL(MSG_MAP_CDIR_ERROR),
1453 arg_mf.mf_next);
1454 return (FALSE);
1455
1456 case CDIR_T_VERSION:
1457 /*
1458 * A $mapfile_version control directive can only appear
1459 * as the first directive in a mapfile, and is used to
1460 * determine the syntax for the rest of the file. It's
1461 * too late to be using it here.
1462 */
1463 if (!pass)
1464 break;
1465 mf_fatal0(&arg_mf, MSG_INTL(MSG_MAP_CDIR_REPVER));
1466 return (FALSE);
1467
1468 case CDIR_T_IF:
1469 /* Push a new level on the conditional input stack */
1470 if (STACK_RESERVE(lms.lms_cdir_stack, CDIR_STACK_INIT) == 0)
1471 return (FALSE);
1472 level = &lms.lms_cdir_stack.stk_s[lms.lms_cdir_stack.stk_n++];
1473 level->cdl_if_lineno = arg_mf.mf_lineno;
1474 level->cdl_else_lineno = 0;
1475
1476 /*
1477 * If previous level is not passing, this level is disabled.
1478 * Otherwise, the expression value determines what happens.
1479 */
1480 if (pass) {
1481 level->cdl_done = level->cdl_pass = expr_eval;
1482 } else {
1483 level->cdl_done = 1;
1484 level->cdl_pass = 0;
1485 }
1486 break;
1487
1488 case CDIR_T_ELIF:
1489 /* $elif requires an open $if construct */
1490 if (STACK_IS_EMPTY(lms.lms_cdir_stack)) {
1491 mf_fatal(&arg_mf, MSG_INTL(MSG_MAP_CDIR_NOIF),
1492 MSG_ORIG(MSG_STR_CDIR_ELIF));
1493 return (FALSE);
1494 }
1495
1496 /* $elif cannot follow $else */
1497 if (level->cdl_else_lineno > 0) {
1498 mf_fatal(&arg_mf, MSG_INTL(MSG_MAP_CDIR_ELSE),
1499 MSG_ORIG(MSG_STR_CDIR_ELIF),
1500 EC_LINENO(level->cdl_else_lineno));
1501 return (FALSE);
1502 }
1503
1504 /*
1505 * Accept text from $elif if the level isn't already
1506 * done and the expression evaluates to true.
1507 */
1508 level->cdl_pass = !level->cdl_done && expr_eval;
1509 if (level->cdl_pass)
1510 level->cdl_done = 1;
1511 break;
1512
1513 case CDIR_T_ELSE:
1514 /* $else requires an open $if construct */
1515 if (STACK_IS_EMPTY(lms.lms_cdir_stack)) {
1516 mf_fatal(&arg_mf, MSG_INTL(MSG_MAP_CDIR_NOIF),
1517 MSG_ORIG(MSG_STR_CDIR_ELSE));
1518 return (FALSE);
1519 }
1520
1521 /* There can only be one $else in the chain */
1522 if (level->cdl_else_lineno > 0) {
1523 mf_fatal(&arg_mf, MSG_INTL(MSG_MAP_CDIR_ELSE),
1524 MSG_ORIG(MSG_STR_CDIR_ELSE),
1525 EC_LINENO(level->cdl_else_lineno));
1526 return (FALSE);
1527 }
1528 level->cdl_else_lineno = arg_mf.mf_lineno;
1529
1530 /* Accept text from $else if the level isn't already done */
1531 level->cdl_pass = !level->cdl_done;
1532 level->cdl_done = 1;
1533 break;
1534
1535 case CDIR_T_ENDIF:
1536 /* $endif requires an open $if construct */
1537 if (STACK_IS_EMPTY(lms.lms_cdir_stack)) {
1538 mf_fatal(&arg_mf, MSG_INTL(MSG_MAP_CDIR_NOIF),
1539 MSG_ORIG(MSG_STR_CDIR_ENDIF));
1540 return (FALSE);
1541 }
1542 if (--lms.lms_cdir_stack.stk_n > 0)
1543 level = &STACK_TOP(lms.lms_cdir_stack);
1544 break;
1545
1546 default:
1547 return (FALSE);
1548 }
1549
1550 /* Evaluating the control directive above can change pass status */
1551 expr_eval = STACK_IS_EMPTY(lms.lms_cdir_stack) ||
1552 STACK_TOP(lms.lms_cdir_stack).cdl_pass;
1553 if (expr_eval != pass) {
1554 pass = expr_eval;
1555 DBG_CALL(Dbg_map_pass(arg_mf.mf_ofl->ofl_lml, pass,
1556 arg_mf.mf_name, arg_mf.mf_lineno, mdptr->md_name));
1557 }
1558
1559 /*
1560 * At this point, we have processed a control directive,
1561 * updated our conditional state stack, and the input is
1562 * positioned at the start of the line following the directive.
1563 * If the current level is accepting input, then give control
1564 * back to ld_map_gettoken() to resume its normal operation.
1565 */
1566 if (pass)
1567 return (TRUE);
1568
1569 /*
1570 * The current level is not accepting input. Only another
1571 * control directive can change this, so read and discard input
1572 * until we encounter one of the following:
1573 *
1574 * EOF: Return and let ld_map_gettoken() report it
1575 * Control Directive: Restart this function / evaluate new directive
1576 */
1577 while (*mf->mf_next != '\0') {
1578 /* Skip leading whitespace */
1579 while (isspace_nonl(*mf->mf_next))
1580 mf->mf_next++;
1581
1582 /*
1583 * Control directives start with a '$'. If we hit
1584 * one, restart the function at this point
1585 */
1586 if (*mf->mf_next == '$')
1587 goto restart;
1588
1589 /* Not a control directive, so advance input to next line */
1590 advance_to_eol(&mf->mf_next);
1591 if (*mf->mf_next == '\n') {
1592 mf->mf_lineno++;
1593 mf->mf_next++;
1594 }
1595 }
1596
1597 assert(*mf->mf_next == '\0');
1598 return (TRUE);
1599
1600 /*
1601 * Control directives that require an argument that is not present
1602 * jump here to report the error and exit.
1603 */
1604 error_reqarg:
1605 mf_fatal(&arg_mf, MSG_INTL(MSG_MAP_CDIR_REQARG), mdptr->md_name);
1606 return (FALSE);
1607
1608 }
1609
1610 #ifndef _ELF64
1611 /*
1612 * Convert a string to lowercase.
1613 */
1614 void
ld_map_lowercase(char * str)1615 ld_map_lowercase(char *str)
1616 {
1617 while (*str = tolower(*str))
1618 str++;
1619 }
1620 #endif
1621
1622 /*
1623 * Wrappper on strtoul()/strtoull(), adapted to return an Xword.
1624 *
1625 * entry:
1626 * str - Pointer to string to be converted.
1627 * endptr - As documented for strtoul(3C). Either NULL, or
1628 * address of pointer to receive the address of the first
1629 * unused character in str (called "final" in strtoul(3C)).
1630 * ret_value - Address of Xword variable to receive result.
1631 *
1632 * exit:
1633 * On success, *ret_value receives the result, *endptr is updated if
1634 * endptr is non-NULL, and STRTOXWORD_OK is returned.
1635 * On failure, STRTOXWORD_TOBIG is returned if an otherwise valid
1636 * value was too large, and STRTOXWORD_BAD is returned if the string
1637 * is malformed.
1638 */
1639 ld_map_strtoxword_t
ld_map_strtoxword(const char * restrict str,char ** restrict endptr,Xword * ret_value)1640 ld_map_strtoxword(const char *restrict str, char **restrict endptr,
1641 Xword *ret_value)
1642 {
1643 #if defined(_ELF64) /* _ELF64 */
1644 #define FUNC strtoull /* Function to use */
1645 #define FUNC_MAX ULLONG_MAX /* Largest value returned by FUNC */
1646 #define XWORD_MAX ULLONG_MAX /* Largest Xword value */
1647 uint64_t value; /* Variable of FUNC return type */
1648 #else /* _ELF32 */
1649 #define FUNC strtoul
1650 #define FUNC_MAX ULONG_MAX
1651 #define XWORD_MAX UINT_MAX
1652 ulong_t value;
1653 #endif
1654
1655 char *endptr_local; /* Used if endptr is NULL */
1656
1657 if (endptr == NULL)
1658 endptr = &endptr_local;
1659
1660 errno = 0;
1661 value = FUNC(str, endptr, 0);
1662 if ((errno != 0) || (str == *endptr)) {
1663 if (value == FUNC_MAX)
1664 return (STRTOXWORD_TOOBIG);
1665 else
1666 return (STRTOXWORD_BAD);
1667 }
1668
1669 /*
1670 * If this is a 64-bit linker building an ELFCLASS32 object,
1671 * the FUNC return type is a 64-bit value, while an Xword is
1672 * 32-bit. It is possible for FUNC to be able to convert a value
1673 * too large for our return type.
1674 */
1675 #if FUNC_MAX != XWORD_MAX
1676 if (value > XWORD_MAX)
1677 return (STRTOXWORD_TOOBIG);
1678 #endif
1679
1680 *ret_value = value;
1681 return (STRTOXWORD_OK);
1682
1683 #undef FUNC
1684 #undef FUNC_MAX
1685 #undef XWORD_MAC
1686 }
1687
1688 /*
1689 * Convert the unsigned integer value at the current mapfile input
1690 * into binary form. All numeric values in mapfiles are treated as
1691 * unsigned integers of the appropriate width for an address on the
1692 * given target. Values can be decimal, hex, or octal.
1693 *
1694 * entry:
1695 * str - String to process.
1696 * value - Address of variable to receive resulting value.
1697 * notail - If TRUE, an error is issued if non-whitespace
1698 * characters other than '#' (comment) are found following
1699 * the numeric value before the end of line.
1700 *
1701 * exit:
1702 * On success:
1703 * - *str is advanced to the next character following the value
1704 * - *value receives the value
1705 * - Returns TRUE (1).
1706 * On failure, returns FALSE (0).
1707 */
1708 static Boolean
ld_map_getint(Mapfile * mf,ld_map_tkval_t * value,Boolean notail)1709 ld_map_getint(Mapfile *mf, ld_map_tkval_t *value, Boolean notail)
1710 {
1711 ld_map_strtoxword_t s2xw_ret;
1712 ld_map_npatch_t np;
1713 char *endptr;
1714 char *startptr = mf->mf_next;
1715 char *errstr = mf->mf_next;
1716
1717 value->tkv_int.tkvi_str = mf->mf_next;
1718 s2xw_ret = ld_map_strtoxword(mf->mf_next, &endptr,
1719 &value->tkv_int.tkvi_value);
1720 if (s2xw_ret != STRTOXWORD_OK) {
1721 null_patch_eol(mf->mf_next, &np);
1722 if (s2xw_ret == STRTOXWORD_TOOBIG)
1723 mf_fatal(mf, MSG_INTL(MSG_MAP_VALUELIMIT), errstr);
1724 else
1725 mf_fatal(mf, MSG_INTL(MSG_MAP_MALVALUE), errstr);
1726 null_patch_undo(&np);
1727 return (FALSE);
1728 }
1729
1730 /* Advance position to item following value, skipping whitespace */
1731 value->tkv_int.tkvi_cnt = endptr - startptr;
1732 mf->mf_next = endptr;
1733
1734 while (isspace_nonl(*mf->mf_next))
1735 mf->mf_next++;
1736
1737 /* If requested, ensure there's nothing left */
1738 if (notail && (*mf->mf_next != '\n') && (*mf->mf_next != '#') &&
1739 (*mf->mf_next != '\0')) {
1740 null_patch_eol(mf->mf_next, &np);
1741 mf_fatal(mf, MSG_INTL(MSG_MAP_BADVALUETAIL), errstr);
1742 null_patch_undo(&np);
1743 return (FALSE);
1744 }
1745
1746 return (TRUE);
1747 }
1748
1749 /*
1750 * Convert a an unquoted identifier into a TK_STRING token, using the
1751 * rules for syntax version in use. Used exclusively by ld_map_gettoken().
1752 *
1753 * entry:
1754 * mf - Mapfile descriptor, positioned to the first character of
1755 * the string.
1756 * flags - Bitmask of options to control ld_map_gettoken()s behavior
1757 * tkv- Address of pointer to variable to receive token value.
1758 *
1759 * exit:
1760 * On success, mf is advanced past the token, tkv is updated with
1761 * the string, and TK_STRING is returned. On error, TK_ERROR is returned.
1762 */
1763 inline static Token
gettoken_ident(Mapfile * mf,int flags,ld_map_tkval_t * tkv)1764 gettoken_ident(Mapfile *mf, int flags, ld_map_tkval_t *tkv)
1765 {
1766 char *end;
1767 Token tok;
1768 ld_map_npatch_t np;
1769
1770 tkv->tkv_str = mf->mf_next;
1771 if ((end = ident_delimit(mf)) == NULL)
1772 return (TK_ERROR);
1773 mf->mf_next = end;
1774
1775 /*
1776 * One advantage of reading the entire mapfile into memory is that
1777 * we can access the strings within it without having to allocate
1778 * more memory or make copies. In order to do that, we need to NULL
1779 * terminate this identifier. That is going to overwrite the
1780 * following character. The problem this presents is that the next
1781 * character may well be the first character of a subsequent token.
1782 * The solution to this is:
1783 *
1784 * 1) Disallow the case where the next character is able to
1785 * start a string. This is not legal mapfile syntax anyway,
1786 * so catching it here simplifies matters.
1787 * 2) Copy the character into the special mf->mf_next_ch
1788 * 3) The next call to ld_map_gettoken() checks mf->mf_next_ch,
1789 * and if it is non-0, uses it instead of dereferencing the
1790 * mf_next pointer.
1791 */
1792 tok = (*mf->mf_next & 0x80) ?
1793 TK_OP_ILLCHR : mf->mf_tokdisp[(unsigned)*mf->mf_next];
1794 switch (tok) {
1795 case TK_OP_BADCHR:
1796 null_patch_eol(mf->mf_next, &np);
1797 mf_fatal(mf, MSG_INTL(MSG_MAP_BADCHAR), mf->mf_next);
1798 null_patch_undo(&np);
1799 return (TK_ERROR);
1800
1801 case TK_OP_SIMQUOTE:
1802 case TK_OP_CQUOTE:
1803 case TK_OP_CDIR:
1804 case TK_OP_NUM:
1805 case TK_OP_ID:
1806 null_patch_eol(mf->mf_next, &np);
1807 mf_fatal(mf, MSG_INTL(MSG_MAP_WSNEEDED), mf->mf_next);
1808 null_patch_undo(&np);
1809 return (TK_ERROR);
1810 }
1811
1812 /* Null terminate, saving the replaced character */
1813 mf->mf_next_ch = *mf->mf_next;
1814 *mf->mf_next = '\0';
1815
1816 if (flags & TK_F_STRLC)
1817 ld_map_lowercase(tkv->tkv_str);
1818 return (TK_STRING);
1819 }
1820
1821 /*
1822 * Convert a quoted string into a TK_STRING token, using simple
1823 * quoting rules:
1824 * - Start and end quotes must be present and match
1825 * - There are no special characters or escape sequences.
1826 * This function is used exclusively by ld_map_gettoken().
1827 *
1828 * entry:
1829 * mf - Mapfile descriptor, positioned to the opening quote character.
1830 * flags - Bitmask of options to control ld_map_gettoken()s behavior
1831 * tkv- Address of pointer to variable to receive token value.
1832 *
1833 * exit:
1834 * On success, mf is advanced past the token, tkv is updated with
1835 * the string, and TK_STRING is returned. On error, TK_ERROR is returned.
1836 */
1837 inline static Token
gettoken_simquote_str(Mapfile * mf,int flags,ld_map_tkval_t * tkv)1838 gettoken_simquote_str(Mapfile *mf, int flags, ld_map_tkval_t *tkv)
1839 {
1840 char *str, *end;
1841 char quote;
1842
1843 str = mf->mf_next++;
1844 quote = *str;
1845 end = mf->mf_next;
1846 while ((*end != '\0') && (*end != '\n') && (*end != quote))
1847 end++;
1848 if (*end != quote) {
1849 ld_map_npatch_t np;
1850
1851 null_patch_eol(end, &np);
1852 mf_fatal(mf, MSG_INTL(MSG_MAP_NOTERM), str);
1853 null_patch_undo(&np);
1854 return (TK_ERROR);
1855 }
1856
1857 /*
1858 * end is pointing at the closing quote. We can turn that into NULL
1859 * termination for the string without needing to restore it later.
1860 */
1861 *end = '\0';
1862 mf->mf_next = end + 1;
1863 tkv->tkv_str = str + 1; /* Skip opening quote */
1864 if (flags & TK_F_STRLC)
1865 ld_map_lowercase(tkv->tkv_str);
1866 return (TK_STRING);
1867 }
1868
1869 /*
1870 * Convert a quoted string into a TK_STRING token, using C string literal
1871 * quoting rules:
1872 * - Start and end quotes must be present and match
1873 * - Backslash is an escape, used to introduce special characters
1874 * This function is used exclusively by ld_map_gettoken().
1875 *
1876 * entry:
1877 * mf - Mapfile descriptor, positioned to the opening quote character.
1878 * flags - Bitmask of options to control ld_map_gettoken()s behavior
1879 * tkv- Address of pointer to variable to receive token value.
1880 *
1881 * exit:
1882 * On success, mf is advanced past the token, tkv is updated with
1883 * the string, and TK_STRING is returned. On error, TK_ERROR is returned.
1884 */
1885 inline static Token
gettoken_cquote_str(Mapfile * mf,int flags,ld_map_tkval_t * tkv)1886 gettoken_cquote_str(Mapfile *mf, int flags, ld_map_tkval_t *tkv)
1887 {
1888 char *str, *cur, *end;
1889 char quote;
1890 int c;
1891
1892 /*
1893 * This function goes through the quoted string and copies
1894 * it on top of itself, replacing escape sequences with the
1895 * characters they denote. There is always enough room for this,
1896 * because escapes are multi-character sequences that are converted
1897 * to single character results.
1898 */
1899 str = mf->mf_next++;
1900 quote = *str;
1901 cur = end = mf->mf_next;
1902 for (c = *end++; (c != '\0') && (c != '\n') && (c != quote);
1903 c = *end++) {
1904 if (c == '\\') {
1905 c = conv_translate_c_esc(&end);
1906 if (c == -1) {
1907 mf_fatal(mf, MSG_INTL(MSG_MAP_BADCESC), *end);
1908 return (TK_ERROR);
1909 }
1910 }
1911 *cur++ = c;
1912 }
1913 *cur = '\0'; /* terminate the result */
1914 if (c != quote) {
1915 ld_map_npatch_t np;
1916
1917 null_patch_eol(end, &np);
1918 mf_fatal(mf, MSG_INTL(MSG_MAP_NOTERM), str);
1919 null_patch_undo(&np);
1920 return (TK_ERROR);
1921 }
1922
1923 /* end is pointing one character past the closing quote */
1924 mf->mf_next = end;
1925 tkv->tkv_str = str + 1; /* Skip opening quote */
1926 if (flags & TK_F_STRLC)
1927 ld_map_lowercase(tkv->tkv_str);
1928 return (TK_STRING);
1929 }
1930
1931 /*
1932 * Peek ahead at the text token.
1933 *
1934 * entry:
1935 * mf - Mapfile descriptor
1936 *
1937 * exit:
1938 * Returns one of the TK_* values, including the TK_OP values (that is,
1939 * tokens are not processed into their necessarily final form).
1940 */
1941 Token
ld_map_peektoken(Mapfile * mf)1942 ld_map_peektoken(Mapfile *mf)
1943 {
1944 int ch;
1945
1946 if (mf->mf_next_ch == 0)
1947 ch = *mf->mf_next;
1948 else
1949 ch = mf->mf_next_ch;
1950
1951 return ((ch & 0x80) ? TK_OP_ILLCHR : mf->mf_tokdisp[ch]);
1952 }
1953
1954 /*
1955 * Get a token from the mapfile.
1956 *
1957 * entry:
1958 * mf - Mapfile descriptor
1959 * flags - Bitmask of options to control ld_map_gettoken()s behavior
1960 * tkv- Address of pointer to variable to receive token value.
1961 *
1962 * exit:
1963 * Returns one of the TK_* values, to report the result. If the resulting
1964 * token has a value (TK_STRING / TK_INT), and tkv is non-NULL, tkv
1965 * is filled in with the resulting value.
1966 */
1967 Token
ld_map_gettoken(Mapfile * mf,int flags,ld_map_tkval_t * tkv)1968 ld_map_gettoken(Mapfile *mf, int flags, ld_map_tkval_t *tkv)
1969 {
1970 int cdir_allow, ch;
1971 Token tok;
1972 ld_map_npatch_t np;
1973
1974 /*
1975 * Mapfile control directives all start with a '$' character. However,
1976 * they are only valid when they are the first thing on a line. That
1977 * happens on the first call to ld_map_gettoken() for a new a new
1978 * mapfile, as tracked with lms.lms_cdir_valid, and immediately
1979 * following each newline seen in the file.
1980 */
1981 cdir_allow = lms.lms_cdir_valid;
1982 lms.lms_cdir_valid = 0;
1983
1984 /* Cycle through the characters looking for tokens. */
1985 for (;;) {
1986 /*
1987 * Process the next character. This is normally *mf->mf_next,
1988 * but if mf->mf_next_ch is non-0, then it contains the
1989 * character, and *mf->mf_next contains a NULL termination
1990 * from the TK_STRING token returned on the previous call.
1991 *
1992 * gettoken_ident() ensures that this is never done to
1993 * a character that starts a string.
1994 */
1995 if (mf->mf_next_ch == 0) {
1996 ch = *mf->mf_next;
1997 } else {
1998 ch = mf->mf_next_ch;
1999 mf->mf_next_ch = 0; /* Reset */
2000 }
2001
2002 /* Map the character to a dispatch action */
2003 tok = (ch & 0x80) ? TK_OP_ILLCHR : mf->mf_tokdisp[ch];
2004
2005 /*
2006 * Items that require processing are identified as OP tokens.
2007 * We process them, and return a result non-OP token.
2008 *
2009 * Non-OP tokens are single character tokens, and we return
2010 * them immediately.
2011 */
2012 switch (tok) {
2013 case TK_OP_EOF:
2014 /* If EOFOK is set, quietly report it as TK_EOF */
2015 if ((flags & TK_F_EOFOK) != 0)
2016 return (TK_EOF);
2017
2018 /* Treat it as a standard error */
2019 mf_fatal0(mf, MSG_INTL(MSG_MAP_PREMEOF));
2020 return (TK_ERROR);
2021
2022 case TK_OP_ILLCHR:
2023 mf_fatal(mf, MSG_INTL(MSG_MAP_ILLCHAR), ch);
2024 mf->mf_next++;
2025 return (TK_ERROR);
2026
2027 case TK_OP_BADCHR:
2028 tk_op_badchr:
2029 null_patch_eol(mf->mf_next, &np);
2030 mf_fatal(mf, MSG_INTL(MSG_MAP_BADCHAR), mf->mf_next);
2031 null_patch_undo(&np);
2032 mf->mf_next++;
2033 return (TK_ERROR);
2034
2035 case TK_OP_WS: /* White space */
2036 mf->mf_next++;
2037 break;
2038
2039 case TK_OP_NL: /* White space too, but bump line number. */
2040 mf->mf_next++;
2041 mf->mf_lineno++;
2042 cdir_allow = 1;
2043 break;
2044
2045 case TK_OP_SIMQUOTE:
2046 if (flags & TK_F_KEYWORD)
2047 goto tk_op_badkwquote;
2048 return (gettoken_simquote_str(mf, flags, tkv));
2049
2050 case TK_OP_CQUOTE:
2051 if (flags & TK_F_KEYWORD) {
2052 tk_op_badkwquote:
2053 null_patch_eol(mf->mf_next, &np);
2054 mf_fatal(mf, MSG_INTL(MSG_MAP_BADKWQUOTE),
2055 mf->mf_next);
2056 null_patch_undo(&np);
2057 mf->mf_next++;
2058 return (TK_ERROR);
2059 }
2060 return (gettoken_cquote_str(mf, flags, tkv));
2061
2062 case TK_OP_CMT:
2063 advance_to_eol(&mf->mf_next);
2064 break;
2065
2066 case TK_OP_CDIR:
2067 /*
2068 * Control directives are only valid at the start
2069 * of a line.
2070 */
2071 if (!cdir_allow) {
2072 null_patch_eol(mf->mf_next, &np);
2073 mf_fatal(mf, MSG_INTL(MSG_MAP_CDIR_NOTBOL),
2074 mf->mf_next);
2075 null_patch_undo(&np);
2076 mf->mf_next++;
2077 return (TK_ERROR);
2078 }
2079 if (!cdir_process(mf))
2080 return (TK_ERROR);
2081 break;
2082
2083 case TK_OP_NUM: /* Decimal, hex(0x...), or octal (0...) value */
2084 if (!ld_map_getint(mf, tkv, FALSE))
2085 return (TK_ERROR);
2086 return (TK_INT);
2087
2088 case TK_OP_ID: /* Unquoted identifier */
2089 return (gettoken_ident(mf, flags, tkv));
2090
2091 case TK_OP_CEQUAL: /* += or -= */
2092 if (*(mf->mf_next + 1) != '=')
2093 goto tk_op_badchr;
2094 tok = (ch == '+') ? TK_PLUSEQ : TK_MINUSEQ;
2095 mf->mf_next += 2;
2096 return (tok);
2097
2098 default: /* Non-OP token */
2099 mf->mf_next++;
2100 return (tok);
2101 }
2102 }
2103 }
2104
2105 /*
2106 * Given a token and value returned by ld_map_gettoken(), return a string
2107 * representation of it suitable for use in an error message.
2108 *
2109 * entry:
2110 * tok - Token code. Must not be an OP-token
2111 * tkv - Token value
2112 */
2113 const char *
ld_map_tokenstr(Token tok,ld_map_tkval_t * tkv,Conv_inv_buf_t * inv_buf)2114 ld_map_tokenstr(Token tok, ld_map_tkval_t *tkv, Conv_inv_buf_t *inv_buf)
2115 {
2116 size_t cnt;
2117
2118 switch (tok) {
2119 case TK_ERROR:
2120 return (MSG_ORIG(MSG_STR_ERROR));
2121 case TK_EOF:
2122 return (MSG_ORIG(MSG_STR_EOF));
2123 case TK_STRING:
2124 return (tkv->tkv_str);
2125 case TK_COLON:
2126 return (MSG_ORIG(MSG_QSTR_COLON));
2127 case TK_SEMICOLON:
2128 return (MSG_ORIG(MSG_QSTR_SEMICOLON));
2129 case TK_EQUAL:
2130 return (MSG_ORIG(MSG_QSTR_EQUAL));
2131 case TK_PLUSEQ:
2132 return (MSG_ORIG(MSG_QSTR_PLUSEQ));
2133 case TK_MINUSEQ:
2134 return (MSG_ORIG(MSG_QSTR_MINUSEQ));
2135 case TK_ATSIGN:
2136 return (MSG_ORIG(MSG_QSTR_ATSIGN));
2137 case TK_DASH:
2138 return (MSG_ORIG(MSG_QSTR_DASH));
2139 case TK_LEFTBKT:
2140 return (MSG_ORIG(MSG_QSTR_LEFTBKT));
2141 case TK_RIGHTBKT:
2142 return (MSG_ORIG(MSG_QSTR_RIGHTBKT));
2143 case TK_LEFTSQR:
2144 return (MSG_ORIG(MSG_QSTR_LEFTSQR));
2145 case TK_RIGHTSQR:
2146 return (MSG_ORIG(MSG_QSTR_RIGHTSQR));
2147 case TK_PIPE:
2148 return (MSG_ORIG(MSG_QSTR_PIPE));
2149 case TK_INT:
2150 cnt = tkv->tkv_int.tkvi_cnt;
2151 if (cnt >= sizeof (inv_buf->buf))
2152 cnt = sizeof (inv_buf->buf) - 1;
2153 (void) memcpy(inv_buf->buf, tkv->tkv_int.tkvi_str, cnt);
2154 inv_buf->buf[cnt] = '\0';
2155 return (inv_buf->buf);
2156 case TK_STAR:
2157 return (MSG_ORIG(MSG_QSTR_STAR));
2158 case TK_BANG:
2159 return (MSG_ORIG(MSG_QSTR_BANG));
2160 default:
2161 assert(0);
2162 break;
2163 }
2164
2165 /*NOTREACHED*/
2166 return (MSG_INTL(MSG_MAP_INTERR));
2167 }
2168
2169 /*
2170 * Advance the input to the first non-empty line, and determine
2171 * the mapfile version. The version is specified by the mapfile
2172 * using a $mapfile_version directive. The original System V
2173 * syntax lacks this directive, and we use that fact to identify
2174 * such files. SysV mapfile are implicitly defined to have version 1.
2175 *
2176 * entry:
2177 * ofl - Output file descriptor
2178 * mf - Mapfile block
2179 *
2180 * exit:
2181 * On success, updates mf->mf_version, and returns TRUE (1).
2182 * On failure, returns FALSE (0).
2183 */
2184 static Boolean
mapfile_version(Mapfile * mf)2185 mapfile_version(Mapfile *mf)
2186 {
2187 char *line_start = mf->mf_next;
2188 Boolean cont = TRUE;
2189 Boolean status = TRUE; /* Assume success */
2190 Token tok;
2191
2192 mf->mf_version = MFV_SYSV;
2193
2194 /*
2195 * Cycle through the characters looking for tokens. Although the
2196 * true version is not known yet, we use the v2 dispatch table.
2197 * It contains control directives, which we need for this search,
2198 * and the other TK_OP_ tokens we will recognize and act on are the
2199 * same for both tables.
2200 *
2201 * It is important not to process any tokens that would lead to
2202 * a non-OP token:
2203 *
2204 * - The version is required to interpret them
2205 * - Our mapfile descriptor is not fully initialized,
2206 * attempts to run that code will crash the program.
2207 */
2208 while (cont) {
2209 /* Map the character to a dispatch action */
2210 tok = (*mf->mf_next & 0x80) ?
2211 TK_OP_ILLCHR : gettok_dispatch_v2[(unsigned)*mf->mf_next];
2212
2213 switch (tok) {
2214 case TK_OP_WS: /* White space */
2215 mf->mf_next++;
2216 break;
2217
2218 case TK_OP_NL: /* White space too, but bump line number. */
2219 mf->mf_next++;
2220 mf->mf_lineno++;
2221 break;
2222
2223 case TK_OP_CMT:
2224 advance_to_eol(&mf->mf_next);
2225 break;
2226
2227 case TK_OP_CDIR:
2228 /*
2229 * Control directives are only valid at the start
2230 * of a line. However, as we have not yet seen
2231 * a token, we do not need to test for this, and
2232 * can safely assume that we are at the start.
2233 */
2234 if (!strncasecmp(mf->mf_next,
2235 MSG_ORIG(MSG_STR_CDIR_MFVER),
2236 MSG_STR_CDIR_MFVER_SIZE) &&
2237 isspace_nonl(*(mf->mf_next +
2238 MSG_STR_CDIR_MFVER_SIZE))) {
2239 ld_map_tkval_t ver;
2240
2241 mf->mf_next += MSG_STR_CDIR_MFVER_SIZE + 1;
2242 if (!ld_map_getint(mf, &ver, TRUE)) {
2243 status = cont = FALSE;
2244 break;
2245 }
2246 /*
2247 * Is it a valid version? Note that we
2248 * intentionally do not allow you to
2249 * specify version 1 using the $mapfile_version
2250 * syntax, because that's reserved to version
2251 * 2 and up.
2252 */
2253 if ((ver.tkv_int.tkvi_value < 2) ||
2254 (ver.tkv_int.tkvi_value >= MFV_NUM)) {
2255 const char *fmt;
2256
2257 fmt = (ver.tkv_int.tkvi_value < 2) ?
2258 MSG_INTL(MSG_MAP_CDIR_BADVDIR) :
2259 MSG_INTL(MSG_MAP_CDIR_BADVER);
2260 mf_fatal(mf, fmt,
2261 EC_WORD(ver.tkv_int.tkvi_value));
2262 status = cont = FALSE;
2263 break;
2264 }
2265 mf->mf_version = ver.tkv_int.tkvi_value;
2266 cont = FALSE; /* Version recovered. All done */
2267 break;
2268 }
2269 /*
2270 * Not a version directive. Reset the current position
2271 * to the start of the current line and stop here.
2272 * SysV syntax applies.
2273 */
2274 mf->mf_next = line_start;
2275 cont = FALSE;
2276 break;
2277
2278 default:
2279 /*
2280 * If we see anything else, then stop at this point.
2281 * The file has System V syntax (version 1), and the
2282 * next token should be interpreted as such.
2283 */
2284 cont = FALSE;
2285 break;
2286 }
2287 }
2288
2289 return (status);
2290 }
2291
2292 /*
2293 * Parse the mapfile.
2294 */
2295 Boolean
ld_map_parse(const char * mapfile,Ofl_desc * ofl)2296 ld_map_parse(const char *mapfile, Ofl_desc *ofl)
2297 {
2298 struct stat stat_buf; /* stat of mapfile */
2299 int mapfile_fd; /* descriptor for mapfile */
2300 int err;
2301 Mapfile *mf; /* Mapfile descriptor */
2302 size_t name_len; /* strlen(mapfile) */
2303
2304 /*
2305 * Determine if we're dealing with a file or a directory.
2306 */
2307 if (stat(mapfile, &stat_buf) == -1) {
2308 err = errno;
2309 ld_eprintf(ofl, ERR_FATAL, MSG_INTL(MSG_SYS_STAT), mapfile,
2310 strerror(err));
2311 return (FALSE);
2312 }
2313 if (S_ISDIR(stat_buf.st_mode)) {
2314 DIR *dirp;
2315 struct dirent *denp;
2316
2317 /*
2318 * Open the directory and interpret each visible file as a
2319 * mapfile.
2320 */
2321 if ((dirp = opendir(mapfile)) == NULL)
2322 return (TRUE);
2323
2324 while ((denp = readdir(dirp)) != NULL) {
2325 char path[PATH_MAX];
2326
2327 /*
2328 * Ignore any hidden filenames. Construct the full
2329 * pathname to the new mapfile.
2330 */
2331 if (*denp->d_name == '.')
2332 continue;
2333 (void) snprintf(path, PATH_MAX, MSG_ORIG(MSG_STR_PATH),
2334 mapfile, denp->d_name);
2335 if (!ld_map_parse(path, ofl))
2336 return (FALSE);
2337 }
2338 (void) closedir(dirp);
2339 return (TRUE);
2340 } else if (!S_ISREG(stat_buf.st_mode)) {
2341 ld_eprintf(ofl, ERR_FATAL, MSG_INTL(MSG_SYS_NOTREG), mapfile);
2342 return (FALSE);
2343 }
2344
2345 /* Open file */
2346 if ((mapfile_fd = open(mapfile, O_RDONLY)) == -1) {
2347 err = errno;
2348 ld_eprintf(ofl, ERR_FATAL, MSG_INTL(MSG_SYS_OPEN), mapfile,
2349 strerror(err));
2350 return (FALSE);
2351 }
2352
2353 /*
2354 * Allocate enough memory to hold the state block, mapfile name,
2355 * and mapfile text. Text has alignment 1, so it can follow the
2356 * state block without padding.
2357 */
2358 name_len = strlen(mapfile) + 1;
2359 mf = libld_malloc(sizeof (*mf) + name_len + stat_buf.st_size + 1);
2360 if (mf == NULL)
2361 return (FALSE);
2362 mf->mf_ofl = ofl;
2363 mf->mf_name = (char *)(mf + 1);
2364 (void) strcpy(mf->mf_name, mapfile);
2365 mf->mf_text = mf->mf_name + name_len;
2366 if (read(mapfile_fd, mf->mf_text, stat_buf.st_size) !=
2367 stat_buf.st_size) {
2368 err = errno;
2369 ld_eprintf(ofl, ERR_FATAL, MSG_INTL(MSG_SYS_READ), mapfile,
2370 strerror(err));
2371 (void) close(mapfile_fd);
2372 return (FALSE);
2373 }
2374 (void) close(mapfile_fd);
2375 mf->mf_text[stat_buf.st_size] = '\0';
2376 mf->mf_next = mf->mf_text;
2377 mf->mf_lineno = 1;
2378 mf->mf_next_ch = 0; /* No "lookahead" character yet */
2379 mf->mf_ec_insndx = 0; /* Insert entrace criteria at top */
2380
2381 /*
2382 * Read just enough from the mapfile to determine the version,
2383 * and then dispatch to the appropriate code for further processing
2384 */
2385 if (!mapfile_version(mf))
2386 return (FALSE);
2387
2388 /*
2389 * Start and continuation masks for unquoted identifier at this
2390 * mapfile version level.
2391 */
2392 mf->mf_tkid_start = TKID_ATTR_START(mf->mf_version);
2393 mf->mf_tkid_cont = TKID_ATTR_CONT(mf->mf_version);
2394
2395 DBG_CALL(Dbg_map_parse(ofl->ofl_lml, mapfile, mf->mf_version));
2396
2397 switch (mf->mf_version) {
2398 case MFV_SYSV:
2399 /* Guidance: Use newer mapfile syntax */
2400 if (OFL_GUIDANCE(ofl, FLG_OFG_NO_MF))
2401 ld_eprintf(ofl, ERR_GUIDANCE,
2402 MSG_INTL(MSG_GUIDE_MAPFILE), mapfile);
2403
2404 mf->mf_tokdisp = gettok_dispatch_v1;
2405 if (!ld_map_parse_v1(mf))
2406 return (FALSE);
2407 break;
2408
2409 case MFV_SOLARIS:
2410 mf->mf_tokdisp = gettok_dispatch_v2;
2411 STACK_RESET(lms.lms_cdir_stack);
2412
2413 /*
2414 * If the conditional expression identifier tree has not been
2415 * initialized, set it up. This is only done on the first
2416 * mapfile, because the identifier control directives accumulate
2417 * across all the mapfiles.
2418 */
2419 if ((lms.lms_cexp_id == NULL) && !cexp_ident_init())
2420 return (FALSE);
2421
2422 /*
2423 * Tell ld_map_gettoken() we will accept a '$' as starting a
2424 * control directive on the first call. Normally, they are
2425 * only allowed after a newline.
2426 */
2427 lms.lms_cdir_valid = 1;
2428
2429 if (!ld_map_parse_v2(mf))
2430 return (FALSE);
2431
2432 /* Did we leave any open $if control directives? */
2433 if (!STACK_IS_EMPTY(lms.lms_cdir_stack)) {
2434 while (!STACK_IS_EMPTY(lms.lms_cdir_stack)) {
2435 cdir_level_t *level =
2436 &STACK_POP(lms.lms_cdir_stack);
2437
2438 mf_fatal(mf, MSG_INTL(MSG_MAP_CDIR_NOEND),
2439 EC_LINENO(level->cdl_if_lineno));
2440 }
2441 return (FALSE);
2442 }
2443 break;
2444 }
2445
2446 return (TRUE);
2447 }
2448
2449 /*
2450 * Sort the segment list. This is necessary if a mapfile has set explicit
2451 * virtual addresses for segments, or defined a SEGMENT_ORDER directive.
2452 *
2453 * Only PT_LOAD segments can be assigned a virtual address. These segments can
2454 * be one of two types:
2455 *
2456 * - Standard segments for text, data or bss. These segments will have been
2457 * inserted before the default text (first PT_LOAD) segment.
2458 *
2459 * - Empty (reservation) segments. These segment will have been inserted at
2460 * the end of any default PT_LOAD segments.
2461 *
2462 * Any standard segments that are assigned a virtual address will be sorted,
2463 * and as their definitions precede any default PT_LOAD segments, these segments
2464 * will be assigned sections before any defaults.
2465 *
2466 * Any reservation segments are also sorted amoung themselves, as these segments
2467 * must still follow the standard default segments.
2468 */
2469 static Boolean
sort_seg_list(Ofl_desc * ofl)2470 sort_seg_list(Ofl_desc *ofl)
2471 {
2472 APlist *sort_segs = NULL, *load_segs = NULL;
2473 Sg_desc *sgp1;
2474 Aliste idx1;
2475 Aliste nsegs;
2476
2477
2478 /*
2479 * We know the number of elements in the sorted list will be
2480 * the same as the original, so use this as the initial allocation
2481 * size for the replacement aplist.
2482 */
2483 nsegs = aplist_nitems(ofl->ofl_segs);
2484
2485
2486 /* Add the items below SGID_TEXT to the list */
2487 for (APLIST_TRAVERSE(ofl->ofl_segs, idx1, sgp1)) {
2488 if (sgp1->sg_id >= SGID_TEXT)
2489 break;
2490
2491 if (aplist_append(&sort_segs, sgp1, nsegs) == NULL)
2492 return (FALSE);
2493 }
2494
2495 /*
2496 * If there are any SEGMENT_ORDER items, add them, and set their
2497 * FLG_SG_ORDERED flag to identify them in debug output, and to
2498 * prevent them from being added again below.
2499 */
2500 for (APLIST_TRAVERSE(ofl->ofl_segs_order, idx1, sgp1)) {
2501 if (aplist_append(&sort_segs, sgp1, nsegs) == NULL)
2502 return (FALSE);
2503 sgp1->sg_flags |= FLG_SG_ORDERED;
2504 }
2505
2506 /*
2507 * Add the loadable segments to another list in sorted order.
2508 */
2509 DBG_CALL(Dbg_map_sort_title(ofl->ofl_lml, TRUE));
2510 for (APLIST_TRAVERSE(ofl->ofl_segs, idx1, sgp1)) {
2511 DBG_CALL(Dbg_map_sort_seg(ofl->ofl_lml, ELFOSABI_SOLARIS,
2512 ld_targ.t_m.m_mach, sgp1));
2513
2514 /* Only interested in PT_LOAD items not in SEGMENT_ORDER list */
2515 if ((sgp1->sg_phdr.p_type != PT_LOAD) ||
2516 (sgp1->sg_flags & FLG_SG_ORDERED))
2517 continue;
2518
2519 /*
2520 * If the loadable segment does not contain a vaddr, simply
2521 * append it to the new list.
2522 */
2523 if ((sgp1->sg_flags & FLG_SG_P_VADDR) == 0) {
2524 if (aplist_append(&load_segs, sgp1, AL_CNT_SEGMENTS) ==
2525 NULL)
2526 return (FALSE);
2527
2528 } else {
2529 Aliste idx2;
2530 Sg_desc *sgp2;
2531 int inserted = 0;
2532
2533 /*
2534 * Traverse the segment list we are creating, looking
2535 * for a segment that defines a vaddr.
2536 */
2537 for (APLIST_TRAVERSE(load_segs, idx2, sgp2)) {
2538 /*
2539 * Any real segments that contain vaddr's need
2540 * to be sorted. Any reservation segments also
2541 * need to be sorted. However, any reservation
2542 * segments should be placed after any real
2543 * segments.
2544 */
2545 if (((sgp2->sg_flags &
2546 (FLG_SG_P_VADDR | FLG_SG_EMPTY)) == 0) &&
2547 (sgp1->sg_flags & FLG_SG_EMPTY))
2548 continue;
2549
2550 if ((sgp2->sg_flags & FLG_SG_P_VADDR) &&
2551 ((sgp2->sg_flags & FLG_SG_EMPTY) ==
2552 (sgp1->sg_flags & FLG_SG_EMPTY))) {
2553 if (sgp1->sg_phdr.p_vaddr ==
2554 sgp2->sg_phdr.p_vaddr) {
2555 ld_eprintf(ofl, ERR_FATAL,
2556 MSG_INTL(MSG_MAP_SEGSAME),
2557 sgp1->sg_name,
2558 sgp2->sg_name);
2559 return (FALSE);
2560 }
2561
2562 if (sgp1->sg_phdr.p_vaddr >
2563 sgp2->sg_phdr.p_vaddr)
2564 continue;
2565 }
2566
2567 /*
2568 * Insert this segment before the segment on
2569 * the load_segs list.
2570 */
2571 if (aplist_insert(&load_segs, sgp1,
2572 AL_CNT_SEGMENTS, idx2) == NULL)
2573 return (FALSE);
2574 inserted = 1;
2575 break;
2576 }
2577
2578 /*
2579 * If the segment being inspected has not been inserted
2580 * in the segment list, simply append it to the list.
2581 */
2582 if ((inserted == 0) && (aplist_append(&load_segs,
2583 sgp1, AL_CNT_SEGMENTS) == NULL))
2584 return (FALSE);
2585 }
2586 }
2587
2588 /*
2589 * Add the sorted loadable segments to our initial segment list.
2590 */
2591 for (APLIST_TRAVERSE(load_segs, idx1, sgp1)) {
2592 if (aplist_append(&sort_segs, sgp1, AL_CNT_SEGMENTS) == NULL)
2593 return (FALSE);
2594 }
2595
2596 /*
2597 * Add all other segments to our list.
2598 */
2599 for (APLIST_TRAVERSE(ofl->ofl_segs, idx1, sgp1)) {
2600 if ((sgp1->sg_id < SGID_TEXT) ||
2601 (sgp1->sg_phdr.p_type == PT_LOAD) ||
2602 (sgp1->sg_flags & FLG_SG_ORDERED))
2603 continue;
2604
2605 if (aplist_append(&sort_segs, sgp1, AL_CNT_SEGMENTS) == NULL)
2606 return (FALSE);
2607 }
2608
2609 /*
2610 * Free the original list, and the pt_load list, and use
2611 * the new list as the segment list.
2612 */
2613 free(ofl->ofl_segs);
2614 if (load_segs) free(load_segs);
2615 ofl->ofl_segs = sort_segs;
2616
2617 if (DBG_ENABLED) {
2618 Dbg_map_sort_title(ofl->ofl_lml, FALSE);
2619 for (APLIST_TRAVERSE(ofl->ofl_segs, idx1, sgp1)) {
2620 Dbg_map_sort_seg(ofl->ofl_lml, ELFOSABI_SOLARIS,
2621 ld_targ.t_m.m_mach, sgp1);
2622 }
2623 }
2624
2625 return (TRUE);
2626 }
2627
2628 /*
2629 * After all mapfiles have been processed, this routine is used to
2630 * finish any remaining mapfile related work.
2631 *
2632 * exit:
2633 * Returns TRUE on success, and FALSE on failure.
2634 */
2635 Boolean
ld_map_post_process(Ofl_desc * ofl)2636 ld_map_post_process(Ofl_desc *ofl)
2637 {
2638 Aliste idx, idx2;
2639 Is_desc *isp;
2640 Sg_desc *sgp;
2641 Ent_desc *enp;
2642 Sg_desc *first_seg = NULL;
2643
2644
2645 DBG_CALL(Dbg_map_post_title(ofl->ofl_lml));
2646
2647 /*
2648 * Per-segment processing:
2649 * - Identify segments with explicit virtual address
2650 * - Details of input and output section order
2651 */
2652 for (APLIST_TRAVERSE(ofl->ofl_segs, idx, sgp)) {
2653 /*
2654 * We are looking for segments. Program headers that represent
2655 * segments are required to have a non-NULL name pointer,
2656 * while that those that do not are required to have a
2657 * NULL name pointer.
2658 */
2659 if (sgp->sg_name == NULL)
2660 continue;
2661
2662 /* Remember the first non-disabled segment */
2663 if ((first_seg == NULL) && !(sgp->sg_flags & FLG_SG_DISABLED))
2664 first_seg = sgp;
2665
2666 /*
2667 * If a segment has an explicit virtual address, we will
2668 * need to sort the segments.
2669 */
2670 if (sgp->sg_flags & FLG_SG_P_VADDR)
2671 ofl->ofl_flags1 |= FLG_OF1_VADDR;
2672
2673 /*
2674 * The FLG_OF_OS_ORDER flag enables the code that does
2675 * output section ordering. Set if the segment has
2676 * a non-empty output section order list.
2677 */
2678 if (alist_nitems(sgp->sg_os_order) > 0)
2679 ofl->ofl_flags |= FLG_OF_OS_ORDER;
2680
2681 /*
2682 * The version 1 and version 2 syntaxes for input section
2683 * ordering are different and incompatible enough that we
2684 * only allow the use of one or the other for a given segment:
2685 *
2686 * v1) The version 1 syntax has the user set the ?O flag on
2687 * the segment. If this is done, all input sections placed
2688 * via an entrance criteria that has a section name are to
2689 * be sorted, using the order of the entrance criteria
2690 * as the sort key.
2691 *
2692 * v2) The version 2 syntax has the user specify a name for
2693 * the entry criteria, and then provide a list of entry
2694 * criteria names via the IS_ORDER segment attribute.
2695 * Sections placed via the criteria listed in IS_ORDER
2696 * are sorted, and the others are not.
2697 *
2698 * Regardless of the syntax version used, the section sorting
2699 * code expects the following:
2700 *
2701 * - Segments requiring input section sorting have the
2702 * FLG_SG_IS_ORDER flag set
2703 *
2704 * - Entrance criteria referencing the segment that
2705 * participate in input section sorting have a non-zero
2706 * sort key in their ec_ordndx field.
2707 *
2708 * At this point, the following are true:
2709 *
2710 * - All entrance criteria have ec_ordndx set to 0.
2711 * - Segments that require the version 1 behavior have
2712 * the FLG_SG_IS_ORDER flag set, and the segments
2713 * sg_is_order list is empty.
2714 * - Segments that require the version 2 behavior do not
2715 * have FLG_SG_IS_ORDER set, and the sg_is_order list is
2716 * non-empty. This list contains the names of the entrance
2717 * criteria that will participate in input section sorting,
2718 * and their relative order in the list provides the
2719 * sort key to use.
2720 *
2721 * We must detect these two cases, set the FLG_SG_IS_ORDER
2722 * flag as necessary, and fill in all entrance criteria
2723 * sort keys. If any input section sorting is to be done,
2724 * we also set the FLG_OF_IS_ORDER flag on the output descriptor
2725 * to enable the code that does that work.
2726 */
2727
2728 /* Version 1: ?O flag? */
2729 if (sgp->sg_flags & FLG_SG_IS_ORDER) {
2730 Word index = 0;
2731
2732 ofl->ofl_flags |= FLG_OF_IS_ORDER;
2733 DBG_CALL(Dbg_map_ent_ord_title(ofl->ofl_lml,
2734 sgp->sg_name));
2735
2736 /*
2737 * Give each user defined entrance criteria for this
2738 * segment that specifies a section name a
2739 * monotonically increasing sort key.
2740 */
2741 for (APLIST_TRAVERSE(ofl->ofl_ents, idx2, enp))
2742 if ((enp->ec_segment == sgp) &&
2743 (enp->ec_is_name != NULL) &&
2744 ((enp->ec_flags & FLG_EC_BUILTIN) == 0))
2745 enp->ec_ordndx = ++index;
2746 continue;
2747 }
2748
2749 /* Version 2: SEGMENT IS_ORDER list? */
2750 if (aplist_nitems(sgp->sg_is_order) > 0) {
2751 Word index = 0;
2752
2753 ofl->ofl_flags |= FLG_OF_IS_ORDER;
2754 DBG_CALL(Dbg_map_ent_ord_title(ofl->ofl_lml,
2755 sgp->sg_name));
2756
2757 /*
2758 * Give each entrance criteria in the sg_is_order
2759 * list a monotonically increasing sort key.
2760 */
2761 for (APLIST_TRAVERSE(sgp->sg_is_order, idx2, enp)) {
2762 enp->ec_ordndx = ++index;
2763 enp->ec_segment->sg_flags |= FLG_SG_IS_ORDER;
2764 }
2765 }
2766 }
2767
2768 /* Sort the segment descriptors if necessary */
2769 if (((ofl->ofl_flags1 & FLG_OF1_VADDR) ||
2770 (aplist_nitems(ofl->ofl_segs_order) > 0)) &&
2771 !sort_seg_list(ofl))
2772 return (FALSE);
2773
2774 /*
2775 * If the output file is a static file without an interpreter, and
2776 * if any virtual address is specified, then set the NOHDR flag for
2777 * backward compatibility.
2778 */
2779 if (!(ofl->ofl_flags & (FLG_OF_DYNAMIC | FLG_OF_RELOBJ)) &&
2780 !(ofl->ofl_osinterp) && (ofl->ofl_flags1 & FLG_OF1_VADDR))
2781 ofl->ofl_dtflags_1 |= DF_1_NOHDR;
2782
2783 if (ofl->ofl_flags & FLG_OF_RELOBJ) {
2784 /*
2785 * NOHDR has no effect on a relocatable file.
2786 * Make sure this flag isn't set.
2787 */
2788 ofl->ofl_dtflags_1 &= ~DF_1_NOHDR;
2789 } else if (first_seg != NULL) {
2790 /*
2791 * DF_1_NOHDR might have been set globally by the HDR_NOALLOC
2792 * directive. If not, then we want to check the per-segment
2793 * flag for the first loadable segment and propagate it
2794 * if set.
2795 */
2796 if ((ofl->ofl_dtflags_1 & DF_1_NOHDR) == 0) {
2797 /*
2798 * If we sorted the segments, the first segment
2799 * may have changed.
2800 */
2801 if ((ofl->ofl_flags1 & FLG_OF1_VADDR) ||
2802 (aplist_nitems(ofl->ofl_segs_order) > 0)) {
2803 for (APLIST_TRAVERSE(ofl->ofl_segs, idx, sgp)) {
2804 if (sgp->sg_name == NULL)
2805 continue;
2806 if ((sgp->sg_flags & FLG_SG_DISABLED) ==
2807 0) {
2808 first_seg = sgp;
2809 break;
2810 }
2811 }
2812 }
2813
2814 /*
2815 * If the per-segment NOHDR flag is set on our first
2816 * segment, then make it take effect.
2817 */
2818 if (first_seg->sg_flags & FLG_SG_NOHDR)
2819 ofl->ofl_dtflags_1 |= DF_1_NOHDR;
2820 }
2821
2822 /*
2823 * For executable and shared objects, the first segment must
2824 * be loadable unless NOHDR was specified, because the ELF
2825 * header must simultaneously lie at offset 0 of the file and
2826 * be included in the first loadable segment. This isn't
2827 * possible if some other segment type starts the file
2828 */
2829 if (!(ofl->ofl_dtflags_1 & DF_1_NOHDR) &&
2830 (first_seg->sg_phdr.p_type != PT_LOAD)) {
2831 Conv_inv_buf_t inv_buf;
2832
2833 ld_eprintf(ofl, ERR_FATAL,
2834 MSG_INTL(MSG_SEG_FIRNOTLOAD),
2835 conv_phdr_type(ELFOSABI_SOLARIS, ld_targ.t_m.m_mach,
2836 first_seg->sg_phdr.p_type, 0, &inv_buf),
2837 first_seg->sg_name);
2838 return (FALSE);
2839 }
2840 }
2841
2842 /*
2843 * Mapfiles may have been used to create symbol definitions
2844 * with backing storage. Although the backing storage is
2845 * associated with an input section, the association of the
2846 * section to an output section (and segment) is initially
2847 * deferred. Now that all mapfile processing is complete, any
2848 * entrance criteria requirements have been processed, and
2849 * these backing storage sections can be associated with the
2850 * appropriate output section (and segment).
2851 */
2852 if (ofl->ofl_maptext || ofl->ofl_mapdata)
2853 DBG_CALL(Dbg_sec_backing(ofl->ofl_lml));
2854
2855 for (APLIST_TRAVERSE(ofl->ofl_maptext, idx, isp)) {
2856 if (ld_place_section(ofl, isp, NULL,
2857 ld_targ.t_id.id_text, NULL) == (Os_desc *)S_ERROR)
2858 return (FALSE);
2859 }
2860
2861 for (APLIST_TRAVERSE(ofl->ofl_mapdata, idx, isp)) {
2862 if (ld_place_section(ofl, isp, NULL,
2863 ld_targ.t_id.id_data, NULL) == (Os_desc *)S_ERROR)
2864 return (FALSE);
2865 }
2866
2867 return (TRUE);
2868 }
2869