1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
24 * Use is subject to license terms.
25 */
26
27 #include <errno.h>
28 #include <stdarg.h>
29 #include "ndrgen.h"
30 #include "y.tab.h"
31
32 /*
33 * C-like lexical analysis.
34 *
35 * 1. Define a "struct node"
36 * 2. Define a "struct symbol" that encapsulates a struct node.
37 * 3. Define a "struct integer" that encapsulates a struct node.
38 * 4. Set the YACC stack type in the grammar:
39 * %{
40 * #define YYSTYPE struct node *
41 * %}
42 * 5. Define %token's in the grammer for IDENTIFIER, STRING and INTEGER.
43 * Using "_KW" as a suffix for keyword tokens, i.e. "struct" is
44 * "%token STRUCT_KW":
45 * // atomic values
46 * %token INTEGER STRING IDENTIFIER
47 * // keywords
48 * %token STRUCT_KW CASE_KW
49 * // operators
50 * %token PLUS MINUS ASSIGN ARROW
51 * // overloaded tokens (++ --, < > <= >=, == !=, += -= *= ...)
52 * %token INCOP RELOP EQUOP ASSOP
53 * 6. It's easiest to use the yacc(1) generated token numbers for node
54 * labels. For node labels that are not actually part of the grammer,
55 * use a %token with an L_ prefix:
56 * // node labels (can't be generated by lex)
57 * %token L_LT L_LTE L_GT L_GTE L_EQU L_NEQ
58 * 7. Call set_lex_input() before parsing.
59 */
60
61 #define SQ '\''
62 #define DQ '"'
63
64 #define isquote(c) ((c) == SQ || (c) == DQ)
65 #define iswhite(c) ((c) == ' ' || (c) == '\t' || (c) == '\n' || (c) == '\f')
66
67 #define is_between(c, l, u) ((l) <= (c) && (c) <= (u))
68 #define is_white(c) ((c) == ' ' || c == '\r' || c == '\t' || c == '\f')
69 #define is_lower(c) is_between((c), 'a', 'z')
70 #define is_upper(c) is_between((c), 'A', 'Z')
71 #define is_alpha(c) (is_lower(c) || is_upper(c))
72 #define is_digit(c) is_between((c), '0', '9')
73 #define is_sstart(c) (is_alpha(c) || (c) == '_')
74 #define is_sfollow(c) (is_sstart(c) || is_digit(c))
75 #define is_xdigit(c) \
76 (is_digit(c) || is_between((c), 'A', 'F') || is_between((c), 'a', 'f'))
77
78 ndr_symbol_t *symbol_list;
79 static ndr_integer_t *integer_list;
80 static FILE *lex_infp;
81 static ndr_symbol_t *file_name;
82 int line_number;
83 int n_compile_error;
84
85 static int lex_at_bol;
86
87 /* In yacc(1) generated parser */
88 extern struct node *yylval;
89
90 /*
91 * The keywtab[] and optable[] could be external to this lex
92 * and it would all still work.
93 */
94 static ndr_keyword_t keywtable[] = {
95 { "struct", STRUCT_KW, 0 },
96 { "union", UNION_KW, 0 },
97 { "typedef", TYPEDEF_KW, 0 },
98
99 { "interface", INTERFACE_KW, 0 },
100 { "uuid", UUID_KW, 0 },
101 { "_no_reorder", _NO_REORDER_KW, 0 },
102 { "extern", EXTERN_KW, 0 },
103 { "reference", REFERENCE_KW, 0 },
104
105 { "align", ALIGN_KW, 0 },
106 { "operation", OPERATION_KW, 0 },
107 { "in", IN_KW, 0 },
108 { "out", OUT_KW, 0 },
109
110 { "string", STRING_KW, 0 },
111 { "size_is", SIZE_IS_KW, 0 },
112 { "length_is", LENGTH_IS_KW, 0 },
113
114 { "switch_is", SWITCH_IS_KW, 0 },
115 { "case", CASE_KW, 0 },
116 { "default", DEFAULT_KW, 0 },
117
118 { "transmit_as", TRANSMIT_AS_KW, 0 },
119 { "arg_is", ARG_IS_KW, 0 },
120
121 { "char", BASIC_TYPE, 1 },
122 { "uchar", BASIC_TYPE, 1 },
123 { "wchar", BASIC_TYPE, 2 },
124 { "short", BASIC_TYPE, 2 },
125 { "ushort", BASIC_TYPE, 2 },
126 { "long", BASIC_TYPE, 4 },
127 { "ulong", BASIC_TYPE, 4 },
128 {0}
129 };
130
131 static ndr_keyword_t optable[] = {
132 { "{", LC, 0 },
133 { "}", RC, 0 },
134 { "(", LP, 0 },
135 { ")", RP, 0 },
136 { "[", LB, 0 },
137 { "]", RB, 0 },
138 { "*", STAR, 0 },
139 { "/", DIV, 0 },
140 { "%", MOD, 0 },
141 { "-", MINUS, 0 },
142 { "+", PLUS, 0 },
143 { "&", AND, 0 },
144 { "|", OR, 0 },
145 { "^", XOR, 0 },
146 { ";", SEMI, 0 },
147 {0}
148 };
149
150 static int getch(FILE *fp);
151 static ndr_integer_t *int_enter(long);
152 static ndr_symbol_t *sym_enter(char *);
153 static ndr_symbol_t *sym_find(char *);
154 static int str_to_sv(char *, char *sv[]);
155
156 /*
157 * Enter the symbols for keyword.
158 */
159 static void
keyw_tab_init(ndr_keyword_t kwtable[])160 keyw_tab_init(ndr_keyword_t kwtable[])
161 {
162 int i;
163 ndr_keyword_t *kw;
164 ndr_symbol_t *sym;
165
166 for (i = 0; kwtable[i].name; i++) {
167 kw = &kwtable[i];
168
169 sym = sym_enter(kw->name);
170 sym->kw = kw;
171 }
172 }
173
174 void
set_lex_input(FILE * fp,char * name)175 set_lex_input(FILE *fp, char *name)
176 {
177 keyw_tab_init(keywtable);
178 keyw_tab_init(optable);
179
180 lex_infp = fp;
181 file_name = sym_enter(name);
182 line_number = 1;
183 lex_at_bol = 1;
184 }
185
186 static int
getch(FILE * fp)187 getch(FILE *fp)
188 {
189 return (getc(fp));
190 }
191
192 int
yylex(void)193 yylex(void)
194 {
195 char lexeme[512];
196 char *p = lexeme;
197 FILE *fp = lex_infp;
198 int c, xc;
199 ndr_symbol_t *sym;
200 ndr_integer_t *intg;
201
202 top:
203 p = lexeme;
204
205 c = getch(fp);
206 if (c == EOF)
207 return (EOF);
208
209 if (c == '\n') {
210 line_number++;
211 lex_at_bol = 1;
212 goto top;
213 }
214
215 /*
216 * Handle preprocessor lines. This just notes
217 * which file we're processing.
218 */
219 if (c == '#' && lex_at_bol) {
220 char *sv[10];
221 int sc;
222
223 while ((c = getch(fp)) != EOF && c != '\n')
224 *p++ = c;
225
226 *p = 0;
227 /* note: no ungetc() of newline, we don't want to count it */
228
229 if (*lexeme != ' ') {
230 /* not a line we know */
231 goto top;
232 }
233
234 sc = str_to_sv(lexeme, sv);
235 if (sc < 2)
236 goto top;
237
238 file_name = sym_enter(sv[1]);
239 line_number = atoi(sv[0]); /* for next input line */
240 lex_at_bol = 1;
241 goto top;
242 }
243
244 lex_at_bol = 0;
245
246 /*
247 * Skip white space
248 */
249 if (is_white(c))
250 goto top;
251
252 /*
253 * Symbol? Might be a keyword or just an identifier
254 */
255 if (is_sstart(c)) {
256 /* we got a symbol */
257 do {
258 *p++ = c;
259 c = getch(fp);
260 } while (is_sfollow(c));
261 (void) ungetc(c, fp);
262 *p = 0;
263
264 sym = sym_enter(lexeme);
265
266 yylval = &sym->s_node;
267
268 if (sym->kw) {
269 return (sym->kw->token);
270 } else {
271 return (IDENTIFIER);
272 }
273 }
274
275 /*
276 * Integer constant?
277 */
278 if (is_digit(c)) {
279 /* we got a number */
280 *p++ = c;
281 if (c == '0') {
282 c = getch(fp);
283 if (c == 'x' || c == 'X') {
284 /* handle hex specially */
285 do {
286 *p++ = c;
287 c = getch(fp);
288 } while (is_xdigit(c));
289 goto convert_icon;
290 } else if (c == 'b' || c == 'B' ||
291 c == 'd' || c == 'D' ||
292 c == 'o' || c == 'O') {
293 do {
294 *p++ = c;
295 c = getch(fp);
296 } while (is_digit(c));
297 goto convert_icon;
298 }
299 (void) ungetc(c, fp);
300 }
301 /* could be anything */
302 c = getch(fp);
303 while (is_digit(c)) {
304 *p++ = c;
305 c = getch(fp);
306 }
307
308 convert_icon:
309 *p = 0;
310 (void) ungetc(c, fp);
311
312 intg = int_enter(strtol(lexeme, 0, 0));
313 yylval = &intg->s_node;
314
315 return (INTEGER);
316 }
317
318 /* Could handle strings. We don't seem to need them yet */
319
320 yylval = 0; /* operator tokens have no value */
321 xc = getch(fp); /* get look-ahead for two-char lexemes */
322
323 lexeme[0] = c;
324 lexeme[1] = xc;
325 lexeme[2] = 0;
326
327 /*
328 * Look for to-end-of-line comment
329 */
330 if (c == '/' && xc == '/') {
331 /* eat the comment */
332 while ((c = getch(fp)) != EOF && c != '\n')
333 ;
334 (void) ungetc(c, fp); /* put back newline */
335 goto top;
336 }
337
338 /*
339 * Look for multi-line comment
340 */
341 if (c == '/' && xc == '*') {
342 /* eat the comment */
343 xc = -1;
344 while ((c = getch(fp)) != EOF) {
345 if (xc == '*' && c == '/') {
346 /* that's it */
347 break;
348 }
349 xc = c;
350 if (c == '\n')
351 line_number++;
352 }
353 goto top;
354 }
355
356 /*
357 * Use symbol table lookup for two-character and
358 * one character operator tokens.
359 */
360 sym = sym_find(lexeme);
361 if (sym) {
362 /* there better be a keyword attached */
363 yylval = &sym->s_node;
364 return (sym->kw->token);
365 }
366
367 /* Try a one-character form */
368 (void) ungetc(xc, fp);
369 lexeme[1] = 0;
370 sym = sym_find(lexeme);
371 if (sym) {
372 /* there better be a keyword attached */
373 yylval = &sym->s_node;
374 return (sym->kw->token);
375 }
376
377 if (is_between(c, ' ', '~'))
378 compile_error("unrecognized character: 0x%02x (%c)", c, c);
379 else
380 compile_error("unrecognized character: 0x%02x", c);
381 goto top;
382 }
383
384 static ndr_symbol_t *
sym_find(char * name)385 sym_find(char *name)
386 {
387 ndr_symbol_t **pp;
388 ndr_symbol_t *p;
389
390 for (pp = &symbol_list; (p = *pp) != 0; pp = &p->next) {
391 if (strcmp(p->name, name) == 0)
392 return (p);
393 }
394
395 return (0);
396 }
397
398 static ndr_symbol_t *
sym_enter(char * name)399 sym_enter(char *name)
400 {
401 ndr_symbol_t **pp;
402 ndr_symbol_t *p;
403
404 for (pp = &symbol_list; (p = *pp) != 0; pp = &p->next) {
405 if (strcmp(p->name, name) == 0)
406 return (p);
407 }
408
409 p = ndr_alloc(1, sizeof (ndr_symbol_t));
410
411 if ((p->name = strdup(name)) == NULL)
412 fatal_error("%s", strerror(ENOMEM));
413
414 p->s_node.label = IDENTIFIER;
415 p->s_node.n_sym = p;
416
417 *pp = p;
418
419 return (p);
420 }
421
422 static ndr_integer_t *
int_enter(long value)423 int_enter(long value)
424 {
425 ndr_integer_t **pp;
426 ndr_integer_t *p;
427
428 for (pp = &integer_list; (p = *pp) != 0; pp = &p->next) {
429 if (p->value == value)
430 return (p);
431 }
432
433 p = ndr_alloc(1, sizeof (ndr_integer_t));
434
435 p->value = value;
436 p->s_node.label = INTEGER;
437 p->s_node.n_int = value;
438
439 *pp = p;
440
441 return (p);
442 }
443
444 void *
ndr_alloc(size_t nelem,size_t elsize)445 ndr_alloc(size_t nelem, size_t elsize)
446 {
447 void *p;
448
449 if ((p = calloc(nelem, elsize)) == NULL) {
450 fatal_error("%s", strerror(ENOMEM));
451 /* NOTREACHED */
452 }
453
454 return (p);
455 }
456
457 /*
458 * The input context (filename, line number) is maintained by the
459 * lexical analysis, and we generally want such info reported for
460 * errors in a consistent manner.
461 */
462 void
compile_error(const char * fmt,...)463 compile_error(const char *fmt, ...)
464 {
465 char buf[NDLBUFSZ];
466 va_list ap;
467
468 va_start(ap, fmt);
469 (void) vsnprintf(buf, NDLBUFSZ, fmt, ap);
470 va_end(ap);
471
472 (void) fprintf(stderr, "ndrgen: compile error: %s:%d: %s\n",
473 file_name->name, line_number, buf);
474
475 n_compile_error++;
476 }
477
478 void
fatal_error(const char * fmt,...)479 fatal_error(const char *fmt, ...)
480 {
481 char buf[NDLBUFSZ];
482 va_list ap;
483
484 va_start(ap, fmt);
485 (void) vsnprintf(buf, NDLBUFSZ, fmt, ap);
486 va_end(ap);
487
488 (void) fprintf(stderr, "ndrgen: fatal error: %s\n", buf);
489 exit(1);
490 }
491
492 /*
493 * Setup nodes for the lexical analyzer.
494 */
495 struct node *
n_cons(int label,...)496 n_cons(int label, ...)
497 {
498 ndr_node_t *np;
499 va_list ap;
500
501 np = ndr_alloc(1, sizeof (ndr_node_t));
502
503 va_start(ap, label);
504 np->label = label;
505 np->n_arg[0] = va_arg(ap, void *);
506 np->n_arg[1] = va_arg(ap, void *);
507 np->n_arg[2] = va_arg(ap, void *);
508 va_end(ap);
509
510 np->line_number = line_number;
511 np->file_name = file_name;
512
513 return (np);
514 }
515
516 /*
517 * list: item
518 * | list item ={ n_splice($1, $2); }
519 * ;
520 */
521 void
n_splice(struct node * np1,struct node * np2)522 n_splice(struct node *np1, struct node *np2)
523 {
524 while (np1->n_next)
525 np1 = np1->n_next;
526
527 np1->n_next = np2;
528 }
529
530 /*
531 * Convert a string of words to a vector of strings.
532 * Returns the number of words.
533 */
534 static int
str_to_sv(char * buf,char * sv[])535 str_to_sv(char *buf, char *sv[])
536 {
537 char **pp = sv;
538 char *p = buf;
539 char *q = buf;
540 int in_word = 0;
541 int c;
542
543 for (;;) {
544 c = *p++;
545 if (c == 0)
546 break;
547
548 if (!in_word) {
549 if (iswhite(c))
550 continue;
551
552 *pp++ = q;
553 in_word = 1;
554 }
555
556 if (isquote(c)) {
557 int qc = c;
558
559 while (((c = *p++) != 0) && (c != qc))
560 *q++ = c;
561 if (c == 0)
562 break;
563 } else if (iswhite(c)) {
564 /* end of word */
565 *q++ = 0;
566 in_word = 0;
567 } else {
568 /* still inside word */
569 *q++ = c;
570 }
571 }
572
573 if (in_word)
574 *q++ = 0;
575
576 *pp = (char *)0;
577 return (pp - sv);
578 }
579