1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #include <errno.h> 28 #include <stdarg.h> 29 #include "ndrgen.h" 30 #include "y.tab.h" 31 32 /* 33 * C-like lexical analysis. 34 * 35 * 1. Define a "struct node" 36 * 2. Define a "struct symbol" that encapsulates a struct node. 37 * 3. Define a "struct integer" that encapsulates a struct node. 38 * 4. Set the YACC stack type in the grammar: 39 * %{ 40 * #define YYSTYPE struct node * 41 * %} 42 * 5. Define %token's in the grammer for IDENTIFIER, STRING and INTEGER. 43 * Using "_KW" as a suffix for keyword tokens, i.e. "struct" is 44 * "%token STRUCT_KW": 45 * // atomic values 46 * %token INTEGER STRING IDENTIFIER 47 * // keywords 48 * %token STRUCT_KW CASE_KW 49 * // operators 50 * %token PLUS MINUS ASSIGN ARROW 51 * // overloaded tokens (++ --, < > <= >=, == !=, += -= *= ...) 52 * %token INCOP RELOP EQUOP ASSOP 53 * 6. It's easiest to use the yacc(1) generated token numbers for node 54 * labels. For node labels that are not actually part of the grammer, 55 * use a %token with an L_ prefix: 56 * // node labels (can't be generated by lex) 57 * %token L_LT L_LTE L_GT L_GTE L_EQU L_NEQ 58 * 7. Call set_lex_input() before parsing. 59 */ 60 61 #define SQ '\'' 62 #define DQ '"' 63 64 #define isquote(c) ((c) == SQ || (c) == DQ) 65 #define iswhite(c) ((c) == ' ' || (c) == '\t' || (c) == '\n' || (c) == '\f') 66 67 #define is_between(c, l, u) ((l) <= (c) && (c) <= (u)) 68 #define is_white(c) ((c) == ' ' || c == '\r' || c == '\t' || c == '\f') 69 #define is_lower(c) is_between((c), 'a', 'z') 70 #define is_upper(c) is_between((c), 'A', 'Z') 71 #define is_alpha(c) (is_lower(c) || is_upper(c)) 72 #define is_digit(c) is_between((c), '0', '9') 73 #define is_sstart(c) (is_alpha(c) || (c) == '_') 74 #define is_sfollow(c) (is_sstart(c) || is_digit(c)) 75 #define is_xdigit(c) \ 76 (is_digit(c) || is_between((c), 'A', 'F') || is_between((c), 'a', 'f')) 77 78 ndr_symbol_t *symbol_list; 79 static ndr_integer_t *integer_list; 80 static FILE *lex_infp; 81 static ndr_symbol_t *file_name; 82 int line_number; 83 int n_compile_error; 84 85 static int lex_at_bol; 86 87 /* In yacc(1) generated parser */ 88 extern struct node *yylval; 89 90 /* 91 * The keywtab[] and optable[] could be external to this lex 92 * and it would all still work. 93 */ 94 static ndr_keyword_t keywtable[] = { 95 { "struct", STRUCT_KW, 0 }, 96 { "union", UNION_KW, 0 }, 97 { "typedef", TYPEDEF_KW, 0 }, 98 99 { "interface", INTERFACE_KW, 0 }, 100 { "uuid", UUID_KW, 0 }, 101 { "_no_reorder", _NO_REORDER_KW, 0 }, 102 { "extern", EXTERN_KW, 0 }, 103 { "reference", REFERENCE_KW, 0 }, 104 105 { "align", ALIGN_KW, 0 }, 106 { "operation", OPERATION_KW, 0 }, 107 { "in", IN_KW, 0 }, 108 { "out", OUT_KW, 0 }, 109 110 { "string", STRING_KW, 0 }, 111 { "size_is", SIZE_IS_KW, 0 }, 112 { "length_is", LENGTH_IS_KW, 0 }, 113 114 { "switch_is", SWITCH_IS_KW, 0 }, 115 { "case", CASE_KW, 0 }, 116 { "default", DEFAULT_KW, 0 }, 117 118 { "transmit_as", TRANSMIT_AS_KW, 0 }, 119 { "arg_is", ARG_IS_KW, 0 }, 120 121 { "char", BASIC_TYPE, 1 }, 122 { "uchar", BASIC_TYPE, 1 }, 123 { "wchar", BASIC_TYPE, 2 }, 124 { "short", BASIC_TYPE, 2 }, 125 { "ushort", BASIC_TYPE, 2 }, 126 { "long", BASIC_TYPE, 4 }, 127 { "ulong", BASIC_TYPE, 4 }, 128 {0} 129 }; 130 131 static ndr_keyword_t optable[] = { 132 { "{", LC, 0 }, 133 { "}", RC, 0 }, 134 { "(", LP, 0 }, 135 { ")", RP, 0 }, 136 { "[", LB, 0 }, 137 { "]", RB, 0 }, 138 { "*", STAR, 0 }, 139 { "/", DIV, 0 }, 140 { "%", MOD, 0 }, 141 { "-", MINUS, 0 }, 142 { "+", PLUS, 0 }, 143 { "&", AND, 0 }, 144 { "|", OR, 0 }, 145 { "^", XOR, 0 }, 146 { ";", SEMI, 0 }, 147 {0} 148 }; 149 150 static int getch(FILE *fp); 151 static ndr_integer_t *int_enter(long); 152 static ndr_symbol_t *sym_enter(char *); 153 static ndr_symbol_t *sym_find(char *); 154 static int str_to_sv(char *, char *sv[]); 155 156 /* 157 * Enter the symbols for keyword. 158 */ 159 static void 160 keyw_tab_init(ndr_keyword_t kwtable[]) 161 { 162 int i; 163 ndr_keyword_t *kw; 164 ndr_symbol_t *sym; 165 166 for (i = 0; kwtable[i].name; i++) { 167 kw = &kwtable[i]; 168 169 sym = sym_enter(kw->name); 170 sym->kw = kw; 171 } 172 } 173 174 void 175 set_lex_input(FILE *fp, char *name) 176 { 177 keyw_tab_init(keywtable); 178 keyw_tab_init(optable); 179 180 lex_infp = fp; 181 file_name = sym_enter(name); 182 line_number = 1; 183 lex_at_bol = 1; 184 } 185 186 static int 187 getch(FILE *fp) 188 { 189 return (getc(fp)); 190 } 191 192 int 193 yylex(void) 194 { 195 char lexeme[512]; 196 char *p = lexeme; 197 FILE *fp = lex_infp; 198 int c, xc; 199 ndr_symbol_t *sym; 200 ndr_integer_t *intg; 201 202 top: 203 p = lexeme; 204 205 c = getch(fp); 206 if (c == EOF) 207 return (EOF); 208 209 if (c == '\n') { 210 line_number++; 211 lex_at_bol = 1; 212 goto top; 213 } 214 215 /* 216 * Handle preprocessor lines. This just notes 217 * which file we're processing. 218 */ 219 if (c == '#' && lex_at_bol) { 220 char *sv[10]; 221 int sc; 222 223 while ((c = getch(fp)) != EOF && c != '\n') 224 *p++ = c; 225 226 *p = 0; 227 /* note: no ungetc() of newline, we don't want to count it */ 228 229 if (*lexeme != ' ') { 230 /* not a line we know */ 231 goto top; 232 } 233 234 sc = str_to_sv(lexeme, sv); 235 if (sc < 2) 236 goto top; 237 238 file_name = sym_enter(sv[1]); 239 line_number = atoi(sv[0]); /* for next input line */ 240 lex_at_bol = 1; 241 goto top; 242 } 243 244 lex_at_bol = 0; 245 246 /* 247 * Skip white space 248 */ 249 if (is_white(c)) 250 goto top; 251 252 /* 253 * Symbol? Might be a keyword or just an identifier 254 */ 255 if (is_sstart(c)) { 256 /* we got a symbol */ 257 do { 258 *p++ = c; 259 c = getch(fp); 260 } while (is_sfollow(c)); 261 (void) ungetc(c, fp); 262 *p = 0; 263 264 sym = sym_enter(lexeme); 265 266 yylval = &sym->s_node; 267 268 if (sym->kw) { 269 return (sym->kw->token); 270 } else { 271 return (IDENTIFIER); 272 } 273 } 274 275 /* 276 * Integer constant? 277 */ 278 if (is_digit(c)) { 279 /* we got a number */ 280 *p++ = c; 281 if (c == '0') { 282 c = getch(fp); 283 if (c == 'x' || c == 'X') { 284 /* handle hex specially */ 285 do { 286 *p++ = c; 287 c = getch(fp); 288 } while (is_xdigit(c)); 289 goto convert_icon; 290 } else if (c == 'b' || c == 'B' || 291 c == 'd' || c == 'D' || 292 c == 'o' || c == 'O') { 293 do { 294 *p++ = c; 295 c = getch(fp); 296 } while (is_digit(c)); 297 goto convert_icon; 298 } 299 (void) ungetc(c, fp); 300 } 301 /* could be anything */ 302 c = getch(fp); 303 while (is_digit(c)) { 304 *p++ = c; 305 c = getch(fp); 306 } 307 308 convert_icon: 309 *p = 0; 310 (void) ungetc(c, fp); 311 312 intg = int_enter(strtol(lexeme, 0, 0)); 313 yylval = &intg->s_node; 314 315 return (INTEGER); 316 } 317 318 /* Could handle strings. We don't seem to need them yet */ 319 320 yylval = 0; /* operator tokens have no value */ 321 xc = getch(fp); /* get look-ahead for two-char lexemes */ 322 323 lexeme[0] = c; 324 lexeme[1] = xc; 325 lexeme[2] = 0; 326 327 /* 328 * Look for to-end-of-line comment 329 */ 330 if (c == '/' && xc == '/') { 331 /* eat the comment */ 332 while ((c = getch(fp)) != EOF && c != '\n') 333 ; 334 (void) ungetc(c, fp); /* put back newline */ 335 goto top; 336 } 337 338 /* 339 * Look for multi-line comment 340 */ 341 if (c == '/' && xc == '*') { 342 /* eat the comment */ 343 xc = -1; 344 while ((c = getch(fp)) != EOF) { 345 if (xc == '*' && c == '/') { 346 /* that's it */ 347 break; 348 } 349 xc = c; 350 if (c == '\n') 351 line_number++; 352 } 353 goto top; 354 } 355 356 /* 357 * Use symbol table lookup for two-character and 358 * one character operator tokens. 359 */ 360 sym = sym_find(lexeme); 361 if (sym) { 362 /* there better be a keyword attached */ 363 yylval = &sym->s_node; 364 return (sym->kw->token); 365 } 366 367 /* Try a one-character form */ 368 (void) ungetc(xc, fp); 369 lexeme[1] = 0; 370 sym = sym_find(lexeme); 371 if (sym) { 372 /* there better be a keyword attached */ 373 yylval = &sym->s_node; 374 return (sym->kw->token); 375 } 376 377 if (is_between(c, ' ', '~')) 378 compile_error("unrecognized character: 0x%02x (%c)", c, c); 379 else 380 compile_error("unrecognized character: 0x%02x", c); 381 goto top; 382 } 383 384 static ndr_symbol_t * 385 sym_find(char *name) 386 { 387 ndr_symbol_t **pp; 388 ndr_symbol_t *p; 389 390 for (pp = &symbol_list; (p = *pp) != 0; pp = &p->next) { 391 if (strcmp(p->name, name) == 0) 392 return (p); 393 } 394 395 return (0); 396 } 397 398 static ndr_symbol_t * 399 sym_enter(char *name) 400 { 401 ndr_symbol_t **pp; 402 ndr_symbol_t *p; 403 404 for (pp = &symbol_list; (p = *pp) != 0; pp = &p->next) { 405 if (strcmp(p->name, name) == 0) 406 return (p); 407 } 408 409 p = ndr_alloc(1, sizeof (ndr_symbol_t)); 410 411 if ((p->name = strdup(name)) == NULL) 412 fatal_error("%s", strerror(ENOMEM)); 413 414 p->s_node.label = IDENTIFIER; 415 p->s_node.n_sym = p; 416 417 *pp = p; 418 419 return (p); 420 } 421 422 static ndr_integer_t * 423 int_enter(long value) 424 { 425 ndr_integer_t **pp; 426 ndr_integer_t *p; 427 428 for (pp = &integer_list; (p = *pp) != 0; pp = &p->next) { 429 if (p->value == value) 430 return (p); 431 } 432 433 p = ndr_alloc(1, sizeof (ndr_integer_t)); 434 435 p->value = value; 436 p->s_node.label = INTEGER; 437 p->s_node.n_int = value; 438 439 *pp = p; 440 441 return (p); 442 } 443 444 void * 445 ndr_alloc(size_t nelem, size_t elsize) 446 { 447 void *p; 448 449 if ((p = calloc(nelem, elsize)) == NULL) { 450 fatal_error("%s", strerror(ENOMEM)); 451 /* NOTREACHED */ 452 } 453 454 return (p); 455 } 456 457 /* 458 * The input context (filename, line number) is maintained by the 459 * lexical analysis, and we generally want such info reported for 460 * errors in a consistent manner. 461 */ 462 void 463 compile_error(const char *fmt, ...) 464 { 465 char buf[NDLBUFSZ]; 466 va_list ap; 467 468 va_start(ap, fmt); 469 (void) vsnprintf(buf, NDLBUFSZ, fmt, ap); 470 va_end(ap); 471 472 (void) fprintf(stderr, "ndrgen: compile error: %s:%d: %s\n", 473 file_name->name, line_number, buf); 474 475 n_compile_error++; 476 } 477 478 void 479 fatal_error(const char *fmt, ...) 480 { 481 char buf[NDLBUFSZ]; 482 va_list ap; 483 484 va_start(ap, fmt); 485 (void) vsnprintf(buf, NDLBUFSZ, fmt, ap); 486 va_end(ap); 487 488 (void) fprintf(stderr, "ndrgen: fatal error: %s\n", buf); 489 exit(1); 490 } 491 492 /* 493 * Setup nodes for the lexical analyzer. 494 */ 495 struct node * 496 n_cons(int label, ...) 497 { 498 ndr_node_t *np; 499 va_list ap; 500 501 np = ndr_alloc(1, sizeof (ndr_node_t)); 502 503 va_start(ap, label); 504 np->label = label; 505 np->n_arg[0] = va_arg(ap, void *); 506 np->n_arg[1] = va_arg(ap, void *); 507 np->n_arg[2] = va_arg(ap, void *); 508 va_end(ap); 509 510 np->line_number = line_number; 511 np->file_name = file_name; 512 513 return (np); 514 } 515 516 /* 517 * list: item 518 * | list item ={ n_splice($1, $2); } 519 * ; 520 */ 521 void 522 n_splice(struct node *np1, struct node *np2) 523 { 524 while (np1->n_next) 525 np1 = np1->n_next; 526 527 np1->n_next = np2; 528 } 529 530 /* 531 * Convert a string of words to a vector of strings. 532 * Returns the number of words. 533 */ 534 static int 535 str_to_sv(char *buf, char *sv[]) 536 { 537 char **pp = sv; 538 char *p = buf; 539 char *q = buf; 540 int in_word = 0; 541 int c; 542 543 for (;;) { 544 c = *p++; 545 if (c == 0) 546 break; 547 548 if (!in_word) { 549 if (iswhite(c)) 550 continue; 551 552 *pp++ = q; 553 in_word = 1; 554 } 555 556 if (isquote(c)) { 557 int qc = c; 558 559 while (((c = *p++) != 0) && (c != qc)) 560 *q++ = c; 561 if (c == 0) 562 break; 563 } else if (iswhite(c)) { 564 /* end of word */ 565 *q++ = 0; 566 in_word = 0; 567 } else { 568 /* still inside word */ 569 *q++ = c; 570 } 571 } 572 573 if (in_word) 574 *q++ = 0; 575 576 *pp = (char *)0; 577 return (pp - sv); 578 } 579