1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2007 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #pragma ident "%Z%%M% %I% %E% SMI" 28 29 #include <errno.h> 30 #include <stdarg.h> 31 #include "ndrgen.h" 32 #include "y.tab.h" 33 34 /* 35 * C-like lexical analysis. 36 * 37 * 1. Define a "struct node" 38 * 2. Define a "struct symbol" that encapsulates a struct node. 39 * 3. Define a "struct integer" that encapsulates a struct node. 40 * 4. Set the YACC stack type in the grammar: 41 * %{ 42 * #define YYSTYPE struct node * 43 * %} 44 * 5. Define %token's in the grammer for IDENTIFIER, STRING and INTEGER. 45 * Using "_KW" as a suffix for keyword tokens, i.e. "struct" is 46 * "%token STRUCT_KW": 47 * // atomic values 48 * %token INTEGER STRING IDENTIFIER 49 * // keywords 50 * %token STRUCT_KW CASE_KW 51 * // operators 52 * %token PLUS MINUS ASSIGN ARROW 53 * // overloaded tokens (++ --, < > <= >=, == !=, += -= *= ...) 54 * %token INCOP RELOP EQUOP ASSOP 55 * 6. It's easiest to use the yacc(1) generated token numbers for node 56 * labels. For node labels that are not actually part of the grammer, 57 * use a %token with an L_ prefix: 58 * // node labels (can't be generated by lex) 59 * %token L_LT L_LTE L_GT L_GTE L_EQU L_NEQ 60 * 7. Call set_lex_input() before parsing. 61 */ 62 63 #define SQ '\'' 64 #define DQ '"' 65 66 #define isquote(c) ((c) == SQ || (c) == DQ) 67 #define iswhite(c) ((c) == ' ' || (c) == '\t' || (c) == '\n' || (c) == '\f') 68 69 #define is_between(c, l, u) ((l) <= (c) && (c) <= (u)) 70 #define is_white(c) ((c) == ' ' || c == '\r' || c == '\t' || c == '\f') 71 #define is_lower(c) is_between((c), 'a', 'z') 72 #define is_upper(c) is_between((c), 'A', 'Z') 73 #define is_alpha(c) (is_lower(c) || is_upper(c)) 74 #define is_digit(c) is_between((c), '0', '9') 75 #define is_sstart(c) (is_alpha(c) || (c) == '_') 76 #define is_sfollow(c) (is_sstart(c) || is_digit(c)) 77 #define is_xdigit(c) \ 78 (is_digit(c) || is_between((c), 'A', 'F') || is_between((c), 'a', 'f')) 79 80 ndr_symbol_t *symbol_list; 81 static ndr_integer_t *integer_list; 82 static FILE *lex_infp; 83 static ndr_symbol_t *file_name; 84 int line_number; 85 int n_compile_error; 86 87 static int lex_at_bol; 88 89 /* In yacc(1) generated parser */ 90 extern struct node *yylval; 91 92 /* 93 * The keywtab[] and optable[] could be external to this lex 94 * and it would all still work. 95 */ 96 static ndr_keyword_t keywtable[] = { 97 { "struct", STRUCT_KW, 0 }, 98 { "union", UNION_KW, 0 }, 99 { "typedef", TYPEDEF_KW, 0 }, 100 101 { "interface", INTERFACE_KW, 0 }, 102 { "uuid", UUID_KW, 0 }, 103 { "_no_reorder", _NO_REORDER_KW, 0 }, 104 { "extern", EXTERN_KW, 0 }, 105 { "reference", REFERENCE_KW, 0 }, 106 107 { "align", ALIGN_KW, 0 }, 108 { "operation", OPERATION_KW, 0 }, 109 { "in", IN_KW, 0 }, 110 { "out", OUT_KW, 0 }, 111 112 { "string", STRING_KW, 0 }, 113 { "size_is", SIZE_IS_KW, 0 }, 114 { "length_is", LENGTH_IS_KW, 0 }, 115 116 { "switch_is", SWITCH_IS_KW, 0 }, 117 { "case", CASE_KW, 0 }, 118 { "default", DEFAULT_KW, 0 }, 119 120 { "transmit_as", TRANSMIT_AS_KW, 0 }, 121 { "arg_is", ARG_IS_KW, 0 }, 122 123 { "char", BASIC_TYPE, 1 }, 124 { "uchar", BASIC_TYPE, 1 }, 125 { "wchar", BASIC_TYPE, 2 }, 126 { "short", BASIC_TYPE, 2 }, 127 { "ushort", BASIC_TYPE, 2 }, 128 { "long", BASIC_TYPE, 4 }, 129 { "ulong", BASIC_TYPE, 4 }, 130 {0} 131 }; 132 133 static ndr_keyword_t optable[] = { 134 { "{", LC, 0 }, 135 { "}", RC, 0 }, 136 { "(", LP, 0 }, 137 { ")", RP, 0 }, 138 { "[", LB, 0 }, 139 { "]", RB, 0 }, 140 { "*", STAR, 0 }, 141 { ";", SEMI, 0 }, 142 {0} 143 }; 144 145 static int getch(FILE *fp); 146 static ndr_integer_t *int_enter(long); 147 static ndr_symbol_t *sym_find(char *); 148 static int str_to_sv(char *, char *sv[]); 149 150 /* 151 * Enter the symbols for keyword. 152 */ 153 static void 154 keyw_tab_init(ndr_keyword_t kwtable[]) 155 { 156 int i; 157 ndr_keyword_t *kw; 158 ndr_symbol_t *sym; 159 160 for (i = 0; kwtable[i].name; i++) { 161 kw = &kwtable[i]; 162 163 sym = sym_enter(kw->name); 164 sym->kw = kw; 165 } 166 } 167 168 void 169 set_lex_input(FILE *fp, char *name) 170 { 171 keyw_tab_init(keywtable); 172 keyw_tab_init(optable); 173 174 lex_infp = fp; 175 file_name = sym_enter(name); 176 line_number = 1; 177 lex_at_bol = 1; 178 } 179 180 static int 181 getch(FILE *fp) 182 { 183 return (getc(fp)); 184 } 185 186 int 187 yylex(void) 188 { 189 char lexeme[512]; 190 char *p = lexeme; 191 FILE *fp = lex_infp; 192 int c, xc; 193 ndr_symbol_t *sym; 194 ndr_integer_t *intg; 195 196 top: 197 p = lexeme; 198 199 c = getch(fp); 200 if (c == EOF) 201 return (EOF); 202 203 if (c == '\n') { 204 line_number++; 205 lex_at_bol = 1; 206 goto top; 207 } 208 209 /* 210 * Handle preprocessor lines. This just notes 211 * which file we're processing. 212 */ 213 if (c == '#' && lex_at_bol) { 214 char *sv[10]; 215 int sc; 216 217 while ((c = getch(fp)) != EOF && c != '\n') 218 *p++ = c; 219 220 *p = 0; 221 /* note: no ungetc() of newline, we don't want to count it */ 222 223 if (*lexeme != ' ') { 224 /* not a line we know */ 225 goto top; 226 } 227 228 sc = str_to_sv(lexeme, sv); 229 if (sc < 2) 230 goto top; 231 232 file_name = sym_enter(sv[1]); 233 line_number = atoi(sv[0]); /* for next input line */ 234 lex_at_bol = 1; 235 goto top; 236 } 237 238 lex_at_bol = 0; 239 240 /* 241 * Skip white space 242 */ 243 if (is_white(c)) 244 goto top; 245 246 /* 247 * Symbol? Might be a keyword or just an identifier 248 */ 249 if (is_sstart(c)) { 250 /* we got a symbol */ 251 do { 252 *p++ = c; 253 c = getch(fp); 254 } while (is_sfollow(c)); 255 (void) ungetc(c, fp); 256 *p = 0; 257 258 sym = sym_enter(lexeme); 259 260 yylval = &sym->s_node; 261 262 if (sym->kw) { 263 return (sym->kw->token); 264 } else { 265 return (IDENTIFIER); 266 } 267 } 268 269 /* 270 * Integer constant? 271 */ 272 if (is_digit(c)) { 273 /* we got a number */ 274 *p++ = c; 275 if (c == '0') { 276 c = getch(fp); 277 if (c == 'x' || c == 'X') { 278 /* handle hex specially */ 279 do { 280 *p++ = c; 281 c = getch(fp); 282 } while (is_xdigit(c)); 283 goto convert_icon; 284 } else if (c == 'b' || c == 'B' || 285 c == 'd' || c == 'D' || 286 c == 'o' || c == 'O') { 287 do { 288 *p++ = c; 289 c = getch(fp); 290 } while (is_digit(c)); 291 goto convert_icon; 292 } 293 (void) ungetc(c, fp); 294 } 295 /* could be anything */ 296 c = getch(fp); 297 while (is_digit(c)) { 298 *p++ = c; 299 c = getch(fp); 300 } 301 302 convert_icon: 303 *p = 0; 304 (void) ungetc(c, fp); 305 306 intg = int_enter(strtol(lexeme, 0, 0)); 307 yylval = &intg->s_node; 308 309 return (INTEGER); 310 } 311 312 /* Could handle strings. We don't seem to need them yet */ 313 314 yylval = 0; /* operator tokens have no value */ 315 xc = getch(fp); /* get look-ahead for two-char lexemes */ 316 317 lexeme[0] = c; 318 lexeme[1] = xc; 319 lexeme[2] = 0; 320 321 /* 322 * Look for to-end-of-line comment 323 */ 324 if (c == '/' && xc == '/') { 325 /* eat the comment */ 326 while ((c = getch(fp)) != EOF && c != '\n') 327 ; 328 (void) ungetc(c, fp); /* put back newline */ 329 goto top; 330 } 331 332 /* 333 * Look for multi-line comment 334 */ 335 if (c == '/' && xc == '*') { 336 /* eat the comment */ 337 xc = -1; 338 while ((c = getch(fp)) != EOF) { 339 if (xc == '*' && c == '/') { 340 /* that's it */ 341 break; 342 } 343 xc = c; 344 if (c == '\n') 345 line_number++; 346 } 347 goto top; 348 } 349 350 /* 351 * Use symbol table lookup for two-character and 352 * one character operator tokens. 353 */ 354 sym = sym_find(lexeme); 355 if (sym) { 356 /* there better be a keyword attached */ 357 yylval = &sym->s_node; 358 return (sym->kw->token); 359 } 360 361 /* Try a one-character form */ 362 (void) ungetc(xc, fp); 363 lexeme[1] = 0; 364 sym = sym_find(lexeme); 365 if (sym) { 366 /* there better be a keyword attached */ 367 yylval = &sym->s_node; 368 return (sym->kw->token); 369 } 370 371 compile_error("unrecognized character 0x%02x", c); 372 goto top; 373 } 374 375 static ndr_symbol_t * 376 sym_find(char *name) 377 { 378 ndr_symbol_t **pp; 379 ndr_symbol_t *p; 380 381 for (pp = &symbol_list; (p = *pp) != 0; pp = &p->next) { 382 if (strcmp(p->name, name) == 0) 383 return (p); 384 } 385 386 return (0); 387 } 388 389 ndr_symbol_t * 390 sym_enter(char *name) 391 { 392 ndr_symbol_t **pp; 393 ndr_symbol_t *p; 394 395 for (pp = &symbol_list; (p = *pp) != 0; pp = &p->next) { 396 if (strcmp(p->name, name) == 0) 397 return (p); 398 } 399 400 p = ndr_alloc(1, sizeof (ndr_symbol_t)); 401 402 if ((p->name = strdup(name)) == NULL) 403 fatal_error("%s", strerror(ENOMEM)); 404 405 p->s_node.label = IDENTIFIER; 406 p->s_node.n_sym = p; 407 408 *pp = p; 409 410 return (p); 411 } 412 413 static ndr_integer_t * 414 int_enter(long value) 415 { 416 ndr_integer_t **pp; 417 ndr_integer_t *p; 418 419 for (pp = &integer_list; (p = *pp) != 0; pp = &p->next) { 420 if (p->value == value) 421 return (p); 422 } 423 424 p = ndr_alloc(1, sizeof (ndr_integer_t)); 425 426 p->value = value; 427 p->s_node.label = INTEGER; 428 p->s_node.n_int = value; 429 430 *pp = p; 431 432 return (p); 433 } 434 435 void * 436 ndr_alloc(size_t nelem, size_t elsize) 437 { 438 void *p; 439 440 if ((p = calloc(nelem, elsize)) == NULL) { 441 fatal_error("%s", strerror(ENOMEM)); 442 /* NOTREACHED */ 443 } 444 445 return (p); 446 } 447 448 /* 449 * The input context (filename, line number) is maintained by the 450 * lexical analysis, and we generally want such info reported for 451 * errors in a consistent manner. 452 */ 453 void 454 compile_error(const char *fmt, ...) 455 { 456 char buf[NDLBUFSZ]; 457 va_list ap; 458 459 va_start(ap, fmt); 460 (void) vsnprintf(buf, NDLBUFSZ, fmt, ap); 461 va_end(ap); 462 463 (void) fprintf(stderr, "ndrgen: compile error: %s:%d: %s\n", 464 file_name->name, line_number, buf); 465 466 n_compile_error++; 467 } 468 469 void 470 fatal_error(const char *fmt, ...) 471 { 472 char buf[NDLBUFSZ]; 473 va_list ap; 474 475 va_start(ap, fmt); 476 (void) vsnprintf(buf, NDLBUFSZ, fmt, ap); 477 va_end(ap); 478 479 (void) fprintf(stderr, "ndrgen: fatal error: %s\n", buf); 480 exit(1); 481 } 482 483 /* 484 * Setup nodes for the lexical analyzer. 485 */ 486 struct node * 487 n_cons(int label, ...) 488 { 489 ndr_node_t *np; 490 va_list ap; 491 492 np = ndr_alloc(1, sizeof (ndr_node_t)); 493 494 va_start(ap, label); 495 np->label = label; 496 np->n_arg[0] = va_arg(ap, void *); 497 np->n_arg[1] = va_arg(ap, void *); 498 np->n_arg[2] = va_arg(ap, void *); 499 va_end(ap); 500 501 np->line_number = line_number; 502 np->file_name = file_name; 503 504 return (np); 505 } 506 507 /* 508 * list: item 509 * | list item ={ n_splice($1, $2); } 510 * ; 511 */ 512 void 513 n_splice(struct node *np1, struct node *np2) 514 { 515 while (np1->n_next) 516 np1 = np1->n_next; 517 518 np1->n_next = np2; 519 } 520 521 /* 522 * Convert a string of words to a vector of strings. 523 * Returns the number of words. 524 */ 525 static int 526 str_to_sv(char *buf, char *sv[]) 527 { 528 char **pp = sv; 529 char *p = buf; 530 char *q = buf; 531 int in_word = 0; 532 int c; 533 534 for (;;) { 535 c = *p++; 536 if (c == 0) 537 break; 538 539 if (!in_word) { 540 if (iswhite(c)) 541 continue; 542 543 *pp++ = q; 544 in_word = 1; 545 } 546 547 if (isquote(c)) { 548 int qc = c; 549 550 while (((c = *p++) != 0) && (c != qc)) 551 *q++ = c; 552 if (c == 0) 553 break; 554 } else if (iswhite(c)) { 555 /* end of word */ 556 *q++ = 0; 557 in_word = 0; 558 } else { 559 /* still inside word */ 560 *q++ = c; 561 } 562 } 563 564 if (in_word) 565 *q++ = 0; 566 567 *pp = (char *)0; 568 return (pp - sv); 569 } 570