1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
24 * Use is subject to license terms.
25 */
26
27 /*
28 * Copyright 2020 Tintri by DDN, Inc. All rights reserved.
29 */
30
31 #include <errno.h>
32 #include <stdarg.h>
33 #include "ndrgen.h"
34 #include "y.tab.h"
35
36 /*
37 * C-like lexical analysis.
38 *
39 * 1. Define a "struct node"
40 * 2. Define a "struct symbol" that encapsulates a struct node.
41 * 3. Define a "struct integer" that encapsulates a struct node.
42 * 4. Set the YACC stack type in the grammar:
43 * %{
44 * #define YYSTYPE struct node *
45 * %}
46 * 5. Define %token's in the grammer for IDENTIFIER, STRING and INTEGER.
47 * Using "_KW" as a suffix for keyword tokens, i.e. "struct" is
48 * "%token STRUCT_KW":
49 * // atomic values
50 * %token INTEGER STRING IDENTIFIER
51 * // keywords
52 * %token STRUCT_KW CASE_KW
53 * // operators
54 * %token PLUS MINUS ASSIGN ARROW
55 * // overloaded tokens (++ --, < > <= >=, == !=, += -= *= ...)
56 * %token INCOP RELOP EQUOP ASSOP
57 * 6. It's easiest to use the yacc(1) generated token numbers for node
58 * labels. For node labels that are not actually part of the grammer,
59 * use a %token with an L_ prefix:
60 * // node labels (can't be generated by lex)
61 * %token L_LT L_LTE L_GT L_GTE L_EQU L_NEQ
62 * 7. Call set_lex_input() before parsing.
63 */
64
65 #define SQ '\''
66 #define DQ '"'
67
68 #define isquote(c) ((c) == SQ || (c) == DQ)
69 #define iswhite(c) ((c) == ' ' || (c) == '\t' || (c) == '\n' || (c) == '\f')
70
71 #define is_between(c, l, u) ((l) <= (c) && (c) <= (u))
72 #define is_white(c) ((c) == ' ' || c == '\r' || c == '\t' || c == '\f')
73 #define is_lower(c) is_between((c), 'a', 'z')
74 #define is_upper(c) is_between((c), 'A', 'Z')
75 #define is_alpha(c) (is_lower(c) || is_upper(c))
76 #define is_digit(c) is_between((c), '0', '9')
77 #define is_sstart(c) (is_alpha(c) || (c) == '_')
78 #define is_sfollow(c) (is_sstart(c) || is_digit(c))
79 #define is_xdigit(c) \
80 (is_digit(c) || is_between((c), 'A', 'F') || is_between((c), 'a', 'f'))
81
82 ndr_symbol_t *symbol_list;
83 static ndr_integer_t *integer_list;
84 static FILE *lex_infp;
85 static ndr_symbol_t *file_name;
86 int line_number;
87 int n_compile_error;
88
89 static int lex_at_bol;
90
91 /* In yacc(1) generated parser */
92 extern struct node *yylval;
93
94 /*
95 * The keywtab[] and optable[] could be external to this lex
96 * and it would all still work.
97 */
98 static ndr_keyword_t keywtable[] = {
99 { "struct", STRUCT_KW, 0 },
100 { "union", UNION_KW, 0 },
101 { "typedef", TYPEDEF_KW, 0 },
102
103 { "interface", INTERFACE_KW, 0 },
104 { "uuid", UUID_KW, 0 },
105 { "_no_reorder", _NO_REORDER_KW, 0 },
106 { "extern", EXTERN_KW, 0 },
107 { "reference", REFERENCE_KW, 0 },
108
109 { "align", ALIGN_KW, 0 },
110 { "operation", OPERATION_KW, 0 },
111 { "in", IN_KW, 0 },
112 { "out", OUT_KW, 0 },
113
114 { "string", STRING_KW, 0 },
115 { "size_is", SIZE_IS_KW, 0 },
116 { "length_is", LENGTH_IS_KW, 0 },
117
118 { "switch_is", SWITCH_IS_KW, 0 },
119 { "case", CASE_KW, 0 },
120 { "default", DEFAULT_KW, 0 },
121
122 { "transmit_as", TRANSMIT_AS_KW, 0 },
123 { "arg_is", ARG_IS_KW, 0 },
124 { "fake", FAKE_KW, 0 },
125
126 { "char", BASIC_TYPE, 1 },
127 { "uchar", BASIC_TYPE, 1 },
128 { "wchar", BASIC_TYPE, 2 },
129 { "short", BASIC_TYPE, 2 },
130 { "ushort", BASIC_TYPE, 2 },
131 { "long", BASIC_TYPE, 4 },
132 { "ulong", BASIC_TYPE, 4 },
133 {0}
134 };
135
136 static ndr_keyword_t optable[] = {
137 { "{", LC, 0 },
138 { "}", RC, 0 },
139 { "(", LP, 0 },
140 { ")", RP, 0 },
141 { "[", LB, 0 },
142 { "]", RB, 0 },
143 { "*", STAR, 0 },
144 { "/", DIV, 0 },
145 { "%", MOD, 0 },
146 { "-", MINUS, 0 },
147 { "+", PLUS, 0 },
148 { "&", AND, 0 },
149 { "|", OR, 0 },
150 { "^", XOR, 0 },
151 { ";", SEMI, 0 },
152 {0}
153 };
154
155 static int getch(FILE *fp);
156 static ndr_integer_t *int_enter(long);
157 static ndr_symbol_t *sym_enter(char *);
158 static ndr_symbol_t *sym_find(char *);
159 static int str_to_sv(char *, char *sv[]);
160
161 /*
162 * Enter the symbols for keyword.
163 */
164 static void
keyw_tab_init(ndr_keyword_t kwtable[])165 keyw_tab_init(ndr_keyword_t kwtable[])
166 {
167 int i;
168 ndr_keyword_t *kw;
169 ndr_symbol_t *sym;
170
171 for (i = 0; kwtable[i].name; i++) {
172 kw = &kwtable[i];
173
174 sym = sym_enter(kw->name);
175 sym->kw = kw;
176 }
177 }
178
179 void
set_lex_input(FILE * fp,char * name)180 set_lex_input(FILE *fp, char *name)
181 {
182 keyw_tab_init(keywtable);
183 keyw_tab_init(optable);
184
185 lex_infp = fp;
186 file_name = sym_enter(name);
187 line_number = 1;
188 lex_at_bol = 1;
189 }
190
191 static int
getch(FILE * fp)192 getch(FILE *fp)
193 {
194 return (getc(fp));
195 }
196
197 int
yylex(void)198 yylex(void)
199 {
200 char lexeme[512];
201 char *p = lexeme;
202 FILE *fp = lex_infp;
203 int c, xc;
204 ndr_symbol_t *sym;
205 ndr_integer_t *intg;
206
207 top:
208 p = lexeme;
209
210 c = getch(fp);
211 if (c == EOF)
212 return (EOF);
213
214 if (c == '\n') {
215 line_number++;
216 lex_at_bol = 1;
217 goto top;
218 }
219
220 /*
221 * Handle preprocessor lines. This just notes
222 * which file we're processing.
223 */
224 if (c == '#' && lex_at_bol) {
225 char *sv[10];
226 int sc;
227
228 while ((c = getch(fp)) != EOF && c != '\n')
229 *p++ = c;
230
231 *p = 0;
232 /* note: no ungetc() of newline, we don't want to count it */
233
234 if (*lexeme != ' ') {
235 /* not a line we know */
236 goto top;
237 }
238
239 sc = str_to_sv(lexeme, sv);
240 if (sc < 2)
241 goto top;
242
243 file_name = sym_enter(sv[1]);
244 line_number = atoi(sv[0]); /* for next input line */
245 lex_at_bol = 1;
246 goto top;
247 }
248
249 lex_at_bol = 0;
250
251 /*
252 * Skip white space
253 */
254 if (is_white(c))
255 goto top;
256
257 /*
258 * Symbol? Might be a keyword or just an identifier
259 */
260 if (is_sstart(c)) {
261 /* we got a symbol */
262 do {
263 *p++ = c;
264 c = getch(fp);
265 } while (is_sfollow(c));
266 (void) ungetc(c, fp);
267 *p = 0;
268
269 sym = sym_enter(lexeme);
270
271 yylval = &sym->s_node;
272
273 if (sym->kw) {
274 return (sym->kw->token);
275 } else {
276 return (IDENTIFIER);
277 }
278 }
279
280 /*
281 * Integer constant?
282 */
283 if (is_digit(c)) {
284 /* we got a number */
285 *p++ = c;
286 if (c == '0') {
287 c = getch(fp);
288 if (c == 'x' || c == 'X') {
289 /* handle hex specially */
290 do {
291 *p++ = c;
292 c = getch(fp);
293 } while (is_xdigit(c));
294 goto convert_icon;
295 } else if (c == 'b' || c == 'B' ||
296 c == 'd' || c == 'D' ||
297 c == 'o' || c == 'O') {
298 do {
299 *p++ = c;
300 c = getch(fp);
301 } while (is_digit(c));
302 goto convert_icon;
303 }
304 (void) ungetc(c, fp);
305 }
306 /* could be anything */
307 c = getch(fp);
308 while (is_digit(c)) {
309 *p++ = c;
310 c = getch(fp);
311 }
312
313 convert_icon:
314 *p = 0;
315 (void) ungetc(c, fp);
316
317 intg = int_enter(strtol(lexeme, 0, 0));
318 yylval = &intg->s_node;
319
320 return (INTEGER);
321 }
322
323 /* Could handle strings. We don't seem to need them yet */
324
325 yylval = 0; /* operator tokens have no value */
326 xc = getch(fp); /* get look-ahead for two-char lexemes */
327
328 lexeme[0] = c;
329 lexeme[1] = xc;
330 lexeme[2] = 0;
331
332 /*
333 * Look for to-end-of-line comment
334 */
335 if (c == '/' && xc == '/') {
336 /* eat the comment */
337 while ((c = getch(fp)) != EOF && c != '\n')
338 ;
339 (void) ungetc(c, fp); /* put back newline */
340 goto top;
341 }
342
343 /*
344 * Look for multi-line comment
345 */
346 if (c == '/' && xc == '*') {
347 /* eat the comment */
348 xc = -1;
349 while ((c = getch(fp)) != EOF) {
350 if (xc == '*' && c == '/') {
351 /* that's it */
352 break;
353 }
354 xc = c;
355 if (c == '\n')
356 line_number++;
357 }
358 goto top;
359 }
360
361 /*
362 * Use symbol table lookup for two-character and
363 * one character operator tokens.
364 */
365 sym = sym_find(lexeme);
366 if (sym) {
367 /* there better be a keyword attached */
368 yylval = &sym->s_node;
369 return (sym->kw->token);
370 }
371
372 /* Try a one-character form */
373 (void) ungetc(xc, fp);
374 lexeme[1] = 0;
375 sym = sym_find(lexeme);
376 if (sym) {
377 /* there better be a keyword attached */
378 yylval = &sym->s_node;
379 return (sym->kw->token);
380 }
381
382 if (is_between(c, ' ', '~'))
383 compile_error("unrecognized character: 0x%02x (%c)", c, c);
384 else
385 compile_error("unrecognized character: 0x%02x", c);
386 goto top;
387 }
388
389 static ndr_symbol_t *
sym_find(char * name)390 sym_find(char *name)
391 {
392 ndr_symbol_t **pp;
393 ndr_symbol_t *p;
394
395 for (pp = &symbol_list; (p = *pp) != 0; pp = &p->next) {
396 if (strcmp(p->name, name) == 0)
397 return (p);
398 }
399
400 return (0);
401 }
402
403 static ndr_symbol_t *
sym_enter(char * name)404 sym_enter(char *name)
405 {
406 ndr_symbol_t **pp;
407 ndr_symbol_t *p;
408
409 for (pp = &symbol_list; (p = *pp) != 0; pp = &p->next) {
410 if (strcmp(p->name, name) == 0)
411 return (p);
412 }
413
414 p = ndr_alloc(1, sizeof (ndr_symbol_t));
415
416 if ((p->name = strdup(name)) == NULL)
417 fatal_error("%s", strerror(ENOMEM));
418
419 p->s_node.label = IDENTIFIER;
420 p->s_node.n_sym = p;
421
422 *pp = p;
423
424 return (p);
425 }
426
427 static ndr_integer_t *
int_enter(long value)428 int_enter(long value)
429 {
430 ndr_integer_t **pp;
431 ndr_integer_t *p;
432
433 for (pp = &integer_list; (p = *pp) != 0; pp = &p->next) {
434 if (p->value == value)
435 return (p);
436 }
437
438 p = ndr_alloc(1, sizeof (ndr_integer_t));
439
440 p->value = value;
441 p->s_node.label = INTEGER;
442 p->s_node.n_int = value;
443
444 *pp = p;
445
446 return (p);
447 }
448
449 void *
ndr_alloc(size_t nelem,size_t elsize)450 ndr_alloc(size_t nelem, size_t elsize)
451 {
452 void *p;
453
454 if ((p = calloc(nelem, elsize)) == NULL) {
455 fatal_error("%s", strerror(ENOMEM));
456 /* NOTREACHED */
457 }
458
459 return (p);
460 }
461
462 /*
463 * The input context (filename, line number) is maintained by the
464 * lexical analysis, and we generally want such info reported for
465 * errors in a consistent manner.
466 */
467 void
compile_error(const char * fmt,...)468 compile_error(const char *fmt, ...)
469 {
470 char buf[NDLBUFSZ];
471 va_list ap;
472
473 va_start(ap, fmt);
474 (void) vsnprintf(buf, NDLBUFSZ, fmt, ap);
475 va_end(ap);
476
477 (void) fprintf(stderr, "ndrgen: compile error: %s:%d: %s\n",
478 file_name->name, line_number, buf);
479
480 n_compile_error++;
481 }
482
483 void
fatal_error(const char * fmt,...)484 fatal_error(const char *fmt, ...)
485 {
486 char buf[NDLBUFSZ];
487 va_list ap;
488
489 va_start(ap, fmt);
490 (void) vsnprintf(buf, NDLBUFSZ, fmt, ap);
491 va_end(ap);
492
493 (void) fprintf(stderr, "ndrgen: fatal error: %s\n", buf);
494 exit(1);
495 }
496
497 /*
498 * Setup nodes for the lexical analyzer.
499 */
500 struct node *
n_cons(int label,...)501 n_cons(int label, ...)
502 {
503 ndr_node_t *np;
504 va_list ap;
505
506 np = ndr_alloc(1, sizeof (ndr_node_t));
507
508 va_start(ap, label);
509 np->label = label;
510 np->n_arg[0] = va_arg(ap, void *);
511 np->n_arg[1] = va_arg(ap, void *);
512 np->n_arg[2] = va_arg(ap, void *);
513 va_end(ap);
514
515 np->line_number = line_number;
516 np->file_name = file_name;
517
518 return (np);
519 }
520
521 /*
522 * list: item
523 * | list item ={ n_splice($1, $2); }
524 * ;
525 */
526 void
n_splice(struct node * np1,struct node * np2)527 n_splice(struct node *np1, struct node *np2)
528 {
529 while (np1->n_next)
530 np1 = np1->n_next;
531
532 np1->n_next = np2;
533 }
534
535 /*
536 * Convert a string of words to a vector of strings.
537 * Returns the number of words.
538 */
539 static int
str_to_sv(char * buf,char * sv[])540 str_to_sv(char *buf, char *sv[])
541 {
542 char **pp = sv;
543 char *p = buf;
544 char *q = buf;
545 int in_word = 0;
546 int c;
547
548 for (;;) {
549 c = *p++;
550 if (c == 0)
551 break;
552
553 if (!in_word) {
554 if (iswhite(c))
555 continue;
556
557 *pp++ = q;
558 in_word = 1;
559 }
560
561 if (isquote(c)) {
562 int qc = c;
563
564 while (((c = *p++) != 0) && (c != qc))
565 *q++ = c;
566 if (c == 0)
567 break;
568 } else if (iswhite(c)) {
569 /* end of word */
570 *q++ = 0;
571 in_word = 0;
572 } else {
573 /* still inside word */
574 *q++ = c;
575 }
576 }
577
578 if (in_word)
579 *q++ = 0;
580
581 *pp = (char *)0;
582 return (pp - sv);
583 }
584