1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 /* 28 * Copyright 2020 Tintri by DDN, Inc. All rights reserved. 29 */ 30 31 #include <errno.h> 32 #include <stdarg.h> 33 #include "ndrgen.h" 34 #include "y.tab.h" 35 36 /* 37 * C-like lexical analysis. 38 * 39 * 1. Define a "struct node" 40 * 2. Define a "struct symbol" that encapsulates a struct node. 41 * 3. Define a "struct integer" that encapsulates a struct node. 42 * 4. Set the YACC stack type in the grammar: 43 * %{ 44 * #define YYSTYPE struct node * 45 * %} 46 * 5. Define %token's in the grammer for IDENTIFIER, STRING and INTEGER. 47 * Using "_KW" as a suffix for keyword tokens, i.e. "struct" is 48 * "%token STRUCT_KW": 49 * // atomic values 50 * %token INTEGER STRING IDENTIFIER 51 * // keywords 52 * %token STRUCT_KW CASE_KW 53 * // operators 54 * %token PLUS MINUS ASSIGN ARROW 55 * // overloaded tokens (++ --, < > <= >=, == !=, += -= *= ...) 56 * %token INCOP RELOP EQUOP ASSOP 57 * 6. It's easiest to use the yacc(1) generated token numbers for node 58 * labels. For node labels that are not actually part of the grammer, 59 * use a %token with an L_ prefix: 60 * // node labels (can't be generated by lex) 61 * %token L_LT L_LTE L_GT L_GTE L_EQU L_NEQ 62 * 7. Call set_lex_input() before parsing. 63 */ 64 65 #define SQ '\'' 66 #define DQ '"' 67 68 #define isquote(c) ((c) == SQ || (c) == DQ) 69 #define iswhite(c) ((c) == ' ' || (c) == '\t' || (c) == '\n' || (c) == '\f') 70 71 #define is_between(c, l, u) ((l) <= (c) && (c) <= (u)) 72 #define is_white(c) ((c) == ' ' || c == '\r' || c == '\t' || c == '\f') 73 #define is_lower(c) is_between((c), 'a', 'z') 74 #define is_upper(c) is_between((c), 'A', 'Z') 75 #define is_alpha(c) (is_lower(c) || is_upper(c)) 76 #define is_digit(c) is_between((c), '0', '9') 77 #define is_sstart(c) (is_alpha(c) || (c) == '_') 78 #define is_sfollow(c) (is_sstart(c) || is_digit(c)) 79 #define is_xdigit(c) \ 80 (is_digit(c) || is_between((c), 'A', 'F') || is_between((c), 'a', 'f')) 81 82 ndr_symbol_t *symbol_list; 83 static ndr_integer_t *integer_list; 84 static FILE *lex_infp; 85 static ndr_symbol_t *file_name; 86 int line_number; 87 int n_compile_error; 88 89 static int lex_at_bol; 90 91 /* In yacc(1) generated parser */ 92 extern struct node *yylval; 93 94 /* 95 * The keywtab[] and optable[] could be external to this lex 96 * and it would all still work. 97 */ 98 static ndr_keyword_t keywtable[] = { 99 { "struct", STRUCT_KW, 0 }, 100 { "union", UNION_KW, 0 }, 101 { "typedef", TYPEDEF_KW, 0 }, 102 103 { "interface", INTERFACE_KW, 0 }, 104 { "uuid", UUID_KW, 0 }, 105 { "_no_reorder", _NO_REORDER_KW, 0 }, 106 { "extern", EXTERN_KW, 0 }, 107 { "reference", REFERENCE_KW, 0 }, 108 109 { "align", ALIGN_KW, 0 }, 110 { "operation", OPERATION_KW, 0 }, 111 { "in", IN_KW, 0 }, 112 { "out", OUT_KW, 0 }, 113 114 { "string", STRING_KW, 0 }, 115 { "size_is", SIZE_IS_KW, 0 }, 116 { "length_is", LENGTH_IS_KW, 0 }, 117 118 { "switch_is", SWITCH_IS_KW, 0 }, 119 { "case", CASE_KW, 0 }, 120 { "default", DEFAULT_KW, 0 }, 121 122 { "transmit_as", TRANSMIT_AS_KW, 0 }, 123 { "arg_is", ARG_IS_KW, 0 }, 124 { "fake", FAKE_KW, 0 }, 125 126 { "char", BASIC_TYPE, 1 }, 127 { "uchar", BASIC_TYPE, 1 }, 128 { "wchar", BASIC_TYPE, 2 }, 129 { "short", BASIC_TYPE, 2 }, 130 { "ushort", BASIC_TYPE, 2 }, 131 { "long", BASIC_TYPE, 4 }, 132 { "ulong", BASIC_TYPE, 4 }, 133 {0} 134 }; 135 136 static ndr_keyword_t optable[] = { 137 { "{", LC, 0 }, 138 { "}", RC, 0 }, 139 { "(", LP, 0 }, 140 { ")", RP, 0 }, 141 { "[", LB, 0 }, 142 { "]", RB, 0 }, 143 { "*", STAR, 0 }, 144 { "/", DIV, 0 }, 145 { "%", MOD, 0 }, 146 { "-", MINUS, 0 }, 147 { "+", PLUS, 0 }, 148 { "&", AND, 0 }, 149 { "|", OR, 0 }, 150 { "^", XOR, 0 }, 151 { ";", SEMI, 0 }, 152 {0} 153 }; 154 155 static int getch(FILE *fp); 156 static ndr_integer_t *int_enter(long); 157 static ndr_symbol_t *sym_enter(char *); 158 static ndr_symbol_t *sym_find(char *); 159 static int str_to_sv(char *, char *sv[]); 160 161 /* 162 * Enter the symbols for keyword. 163 */ 164 static void 165 keyw_tab_init(ndr_keyword_t kwtable[]) 166 { 167 int i; 168 ndr_keyword_t *kw; 169 ndr_symbol_t *sym; 170 171 for (i = 0; kwtable[i].name; i++) { 172 kw = &kwtable[i]; 173 174 sym = sym_enter(kw->name); 175 sym->kw = kw; 176 } 177 } 178 179 void 180 set_lex_input(FILE *fp, char *name) 181 { 182 keyw_tab_init(keywtable); 183 keyw_tab_init(optable); 184 185 lex_infp = fp; 186 file_name = sym_enter(name); 187 line_number = 1; 188 lex_at_bol = 1; 189 } 190 191 static int 192 getch(FILE *fp) 193 { 194 return (getc(fp)); 195 } 196 197 int 198 yylex(void) 199 { 200 char lexeme[512]; 201 char *p = lexeme; 202 FILE *fp = lex_infp; 203 int c, xc; 204 ndr_symbol_t *sym; 205 ndr_integer_t *intg; 206 207 top: 208 p = lexeme; 209 210 c = getch(fp); 211 if (c == EOF) 212 return (EOF); 213 214 if (c == '\n') { 215 line_number++; 216 lex_at_bol = 1; 217 goto top; 218 } 219 220 /* 221 * Handle preprocessor lines. This just notes 222 * which file we're processing. 223 */ 224 if (c == '#' && lex_at_bol) { 225 char *sv[10]; 226 int sc; 227 228 while ((c = getch(fp)) != EOF && c != '\n') 229 *p++ = c; 230 231 *p = 0; 232 /* note: no ungetc() of newline, we don't want to count it */ 233 234 if (*lexeme != ' ') { 235 /* not a line we know */ 236 goto top; 237 } 238 239 sc = str_to_sv(lexeme, sv); 240 if (sc < 2) 241 goto top; 242 243 file_name = sym_enter(sv[1]); 244 line_number = atoi(sv[0]); /* for next input line */ 245 lex_at_bol = 1; 246 goto top; 247 } 248 249 lex_at_bol = 0; 250 251 /* 252 * Skip white space 253 */ 254 if (is_white(c)) 255 goto top; 256 257 /* 258 * Symbol? Might be a keyword or just an identifier 259 */ 260 if (is_sstart(c)) { 261 /* we got a symbol */ 262 do { 263 *p++ = c; 264 c = getch(fp); 265 } while (is_sfollow(c)); 266 (void) ungetc(c, fp); 267 *p = 0; 268 269 sym = sym_enter(lexeme); 270 271 yylval = &sym->s_node; 272 273 if (sym->kw) { 274 return (sym->kw->token); 275 } else { 276 return (IDENTIFIER); 277 } 278 } 279 280 /* 281 * Integer constant? 282 */ 283 if (is_digit(c)) { 284 /* we got a number */ 285 *p++ = c; 286 if (c == '0') { 287 c = getch(fp); 288 if (c == 'x' || c == 'X') { 289 /* handle hex specially */ 290 do { 291 *p++ = c; 292 c = getch(fp); 293 } while (is_xdigit(c)); 294 goto convert_icon; 295 } else if (c == 'b' || c == 'B' || 296 c == 'd' || c == 'D' || 297 c == 'o' || c == 'O') { 298 do { 299 *p++ = c; 300 c = getch(fp); 301 } while (is_digit(c)); 302 goto convert_icon; 303 } 304 (void) ungetc(c, fp); 305 } 306 /* could be anything */ 307 c = getch(fp); 308 while (is_digit(c)) { 309 *p++ = c; 310 c = getch(fp); 311 } 312 313 convert_icon: 314 *p = 0; 315 (void) ungetc(c, fp); 316 317 intg = int_enter(strtol(lexeme, 0, 0)); 318 yylval = &intg->s_node; 319 320 return (INTEGER); 321 } 322 323 /* Could handle strings. We don't seem to need them yet */ 324 325 yylval = 0; /* operator tokens have no value */ 326 xc = getch(fp); /* get look-ahead for two-char lexemes */ 327 328 lexeme[0] = c; 329 lexeme[1] = xc; 330 lexeme[2] = 0; 331 332 /* 333 * Look for to-end-of-line comment 334 */ 335 if (c == '/' && xc == '/') { 336 /* eat the comment */ 337 while ((c = getch(fp)) != EOF && c != '\n') 338 ; 339 (void) ungetc(c, fp); /* put back newline */ 340 goto top; 341 } 342 343 /* 344 * Look for multi-line comment 345 */ 346 if (c == '/' && xc == '*') { 347 /* eat the comment */ 348 xc = -1; 349 while ((c = getch(fp)) != EOF) { 350 if (xc == '*' && c == '/') { 351 /* that's it */ 352 break; 353 } 354 xc = c; 355 if (c == '\n') 356 line_number++; 357 } 358 goto top; 359 } 360 361 /* 362 * Use symbol table lookup for two-character and 363 * one character operator tokens. 364 */ 365 sym = sym_find(lexeme); 366 if (sym) { 367 /* there better be a keyword attached */ 368 yylval = &sym->s_node; 369 return (sym->kw->token); 370 } 371 372 /* Try a one-character form */ 373 (void) ungetc(xc, fp); 374 lexeme[1] = 0; 375 sym = sym_find(lexeme); 376 if (sym) { 377 /* there better be a keyword attached */ 378 yylval = &sym->s_node; 379 return (sym->kw->token); 380 } 381 382 if (is_between(c, ' ', '~')) 383 compile_error("unrecognized character: 0x%02x (%c)", c, c); 384 else 385 compile_error("unrecognized character: 0x%02x", c); 386 goto top; 387 } 388 389 static ndr_symbol_t * 390 sym_find(char *name) 391 { 392 ndr_symbol_t **pp; 393 ndr_symbol_t *p; 394 395 for (pp = &symbol_list; (p = *pp) != 0; pp = &p->next) { 396 if (strcmp(p->name, name) == 0) 397 return (p); 398 } 399 400 return (0); 401 } 402 403 static ndr_symbol_t * 404 sym_enter(char *name) 405 { 406 ndr_symbol_t **pp; 407 ndr_symbol_t *p; 408 409 for (pp = &symbol_list; (p = *pp) != 0; pp = &p->next) { 410 if (strcmp(p->name, name) == 0) 411 return (p); 412 } 413 414 p = ndr_alloc(1, sizeof (ndr_symbol_t)); 415 416 if ((p->name = strdup(name)) == NULL) 417 fatal_error("%s", strerror(ENOMEM)); 418 419 p->s_node.label = IDENTIFIER; 420 p->s_node.n_sym = p; 421 422 *pp = p; 423 424 return (p); 425 } 426 427 static ndr_integer_t * 428 int_enter(long value) 429 { 430 ndr_integer_t **pp; 431 ndr_integer_t *p; 432 433 for (pp = &integer_list; (p = *pp) != 0; pp = &p->next) { 434 if (p->value == value) 435 return (p); 436 } 437 438 p = ndr_alloc(1, sizeof (ndr_integer_t)); 439 440 p->value = value; 441 p->s_node.label = INTEGER; 442 p->s_node.n_int = value; 443 444 *pp = p; 445 446 return (p); 447 } 448 449 void * 450 ndr_alloc(size_t nelem, size_t elsize) 451 { 452 void *p; 453 454 if ((p = calloc(nelem, elsize)) == NULL) { 455 fatal_error("%s", strerror(ENOMEM)); 456 /* NOTREACHED */ 457 } 458 459 return (p); 460 } 461 462 /* 463 * The input context (filename, line number) is maintained by the 464 * lexical analysis, and we generally want such info reported for 465 * errors in a consistent manner. 466 */ 467 void 468 compile_error(const char *fmt, ...) 469 { 470 char buf[NDLBUFSZ]; 471 va_list ap; 472 473 va_start(ap, fmt); 474 (void) vsnprintf(buf, NDLBUFSZ, fmt, ap); 475 va_end(ap); 476 477 (void) fprintf(stderr, "ndrgen: compile error: %s:%d: %s\n", 478 file_name->name, line_number, buf); 479 480 n_compile_error++; 481 } 482 483 void 484 fatal_error(const char *fmt, ...) 485 { 486 char buf[NDLBUFSZ]; 487 va_list ap; 488 489 va_start(ap, fmt); 490 (void) vsnprintf(buf, NDLBUFSZ, fmt, ap); 491 va_end(ap); 492 493 (void) fprintf(stderr, "ndrgen: fatal error: %s\n", buf); 494 exit(1); 495 } 496 497 /* 498 * Setup nodes for the lexical analyzer. 499 */ 500 struct node * 501 n_cons(int label, ...) 502 { 503 ndr_node_t *np; 504 va_list ap; 505 506 np = ndr_alloc(1, sizeof (ndr_node_t)); 507 508 va_start(ap, label); 509 np->label = label; 510 np->n_arg[0] = va_arg(ap, void *); 511 np->n_arg[1] = va_arg(ap, void *); 512 np->n_arg[2] = va_arg(ap, void *); 513 va_end(ap); 514 515 np->line_number = line_number; 516 np->file_name = file_name; 517 518 return (np); 519 } 520 521 /* 522 * list: item 523 * | list item ={ n_splice($1, $2); } 524 * ; 525 */ 526 void 527 n_splice(struct node *np1, struct node *np2) 528 { 529 while (np1->n_next) 530 np1 = np1->n_next; 531 532 np1->n_next = np2; 533 } 534 535 /* 536 * Convert a string of words to a vector of strings. 537 * Returns the number of words. 538 */ 539 static int 540 str_to_sv(char *buf, char *sv[]) 541 { 542 char **pp = sv; 543 char *p = buf; 544 char *q = buf; 545 int in_word = 0; 546 int c; 547 548 for (;;) { 549 c = *p++; 550 if (c == 0) 551 break; 552 553 if (!in_word) { 554 if (iswhite(c)) 555 continue; 556 557 *pp++ = q; 558 in_word = 1; 559 } 560 561 if (isquote(c)) { 562 int qc = c; 563 564 while (((c = *p++) != 0) && (c != qc)) 565 *q++ = c; 566 if (c == 0) 567 break; 568 } else if (iswhite(c)) { 569 /* end of word */ 570 *q++ = 0; 571 in_word = 0; 572 } else { 573 /* still inside word */ 574 *q++ = c; 575 } 576 } 577 578 if (in_word) 579 *q++ = 0; 580 581 *pp = (char *)0; 582 return (pp - sv); 583 } 584