1 /*- 2 * Copyright (c) 1985 Sun Microsystems, Inc. 3 * Copyright (c) 1980, 1993 4 * The Regents of the University of California. All rights reserved. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 3. All advertising materials mentioning features or use of this software 16 * must display the following acknowledgement: 17 * This product includes software developed by the University of 18 * California, Berkeley and its contributors. 19 * 4. Neither the name of the University nor the names of its contributors 20 * may be used to endorse or promote products derived from this software 21 * without specific prior written permission. 22 * 23 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 24 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 25 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 26 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 27 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 28 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 29 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 30 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 31 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 32 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 33 * SUCH DAMAGE. 34 */ 35 36 #if 0 37 #ifndef lint 38 static char sccsid[] = "@(#)lexi.c 8.1 (Berkeley) 6/6/93"; 39 #endif /* not lint */ 40 #endif 41 #include <sys/cdefs.h> 42 __FBSDID("$FreeBSD$"); 43 44 /* 45 * Here we have the token scanner for indent. It scans off one token and puts 46 * it in the global variable "token". It returns a code, indicating the type 47 * of token scanned. 48 */ 49 50 #include <err.h> 51 #include <stdio.h> 52 #include <ctype.h> 53 #include <stdlib.h> 54 #include <string.h> 55 #include "indent_globs.h" 56 #include "indent_codes.h" 57 #include "indent.h" 58 59 #define alphanum 1 60 #ifdef undef 61 #define opchar 3 62 #endif 63 64 struct templ { 65 const char *rwd; 66 int rwcode; 67 }; 68 69 /* 70 * This table has to be sorted alphabetically, because it'll be used in binary 71 * search. For the same reason, string must be the first thing in struct templ. 72 */ 73 struct templ specials[] = 74 { 75 {"auto", 10}, 76 {"break", 9}, 77 {"case", 8}, 78 {"char", 4}, 79 {"const", 4}, 80 {"default", 8}, 81 {"do", 6}, 82 {"double", 4}, 83 {"else", 6}, 84 {"enum", 3}, 85 {"extern", 10}, 86 {"float", 4}, 87 {"for", 5}, 88 {"global", 4}, 89 {"goto", 9}, 90 {"if", 5}, 91 {"int", 4}, 92 {"long", 4}, 93 {"offsetof", 1}, 94 {"register", 10}, 95 {"return", 9}, 96 {"short", 4}, 97 {"sizeof", 2}, 98 {"static", 10}, 99 {"struct", 3}, 100 {"switch", 7}, 101 {"typedef", 10}, 102 {"union", 3}, 103 {"unsigned", 4}, 104 {"void", 4}, 105 {"volatile", 4}, 106 {"while", 5} 107 }; 108 109 const char **typenames; 110 int typename_count; 111 int typename_top = -1; 112 113 char chartype[128] = 114 { /* this is used to facilitate the decision of 115 * what type (alphanumeric, operator) each 116 * character is */ 117 0, 0, 0, 0, 0, 0, 0, 0, 118 0, 0, 0, 0, 0, 0, 0, 0, 119 0, 0, 0, 0, 0, 0, 0, 0, 120 0, 0, 0, 0, 0, 0, 0, 0, 121 0, 3, 0, 0, 1, 3, 3, 0, 122 0, 0, 3, 3, 0, 3, 0, 3, 123 1, 1, 1, 1, 1, 1, 1, 1, 124 1, 1, 0, 0, 3, 3, 3, 3, 125 0, 1, 1, 1, 1, 1, 1, 1, 126 1, 1, 1, 1, 1, 1, 1, 1, 127 1, 1, 1, 1, 1, 1, 1, 1, 128 1, 1, 1, 0, 0, 0, 3, 1, 129 0, 1, 1, 1, 1, 1, 1, 1, 130 1, 1, 1, 1, 1, 1, 1, 1, 131 1, 1, 1, 1, 1, 1, 1, 1, 132 1, 1, 1, 0, 3, 0, 3, 0 133 }; 134 135 static int 136 strcmp_type(const void *e1, const void *e2) 137 { 138 return (strcmp(e1, *(const char * const *)e2)); 139 } 140 141 int 142 lexi(void) 143 { 144 int unary_delim; /* this is set to 1 if the current token 145 * forces a following operator to be unary */ 146 static int last_code; /* the last token type returned */ 147 static int l_struct; /* set to 1 if the last token was 'struct' */ 148 int code; /* internal code to be returned */ 149 char qchar; /* the delimiter character for a string */ 150 151 e_token = s_token; /* point to start of place to save token */ 152 unary_delim = false; 153 ps.col_1 = ps.last_nl; /* tell world that this token started in 154 * column 1 iff the last thing scanned was nl */ 155 ps.last_nl = false; 156 157 while (*buf_ptr == ' ' || *buf_ptr == '\t') { /* get rid of blanks */ 158 ps.col_1 = false; /* leading blanks imply token is not in column 159 * 1 */ 160 if (++buf_ptr >= buf_end) 161 fill_buffer(); 162 } 163 164 /* Scan an alphanumeric token */ 165 if (chartype[(int)*buf_ptr] == alphanum || (buf_ptr[0] == '.' && isdigit(buf_ptr[1]))) { 166 /* 167 * we have a character or number 168 */ 169 struct templ *p; 170 171 if (isdigit(*buf_ptr) || (buf_ptr[0] == '.' && isdigit(buf_ptr[1]))) { 172 int seendot = 0, 173 seenexp = 0, 174 seensfx = 0; 175 if (*buf_ptr == '0' && 176 (buf_ptr[1] == 'x' || buf_ptr[1] == 'X')) { 177 *e_token++ = *buf_ptr++; 178 *e_token++ = *buf_ptr++; 179 while (isxdigit(*buf_ptr)) { 180 CHECK_SIZE_TOKEN; 181 *e_token++ = *buf_ptr++; 182 } 183 } 184 else 185 while (1) { 186 if (*buf_ptr == '.') { 187 if (seendot) 188 break; 189 else 190 seendot++; 191 } 192 CHECK_SIZE_TOKEN; 193 *e_token++ = *buf_ptr++; 194 if (!isdigit(*buf_ptr) && *buf_ptr != '.') { 195 if ((*buf_ptr != 'E' && *buf_ptr != 'e') || seenexp) 196 break; 197 else { 198 seenexp++; 199 seendot++; 200 CHECK_SIZE_TOKEN; 201 *e_token++ = *buf_ptr++; 202 if (*buf_ptr == '+' || *buf_ptr == '-') 203 *e_token++ = *buf_ptr++; 204 } 205 } 206 } 207 while (1) { 208 if (!(seensfx & 1) && (*buf_ptr == 'U' || *buf_ptr == 'u')) { 209 CHECK_SIZE_TOKEN; 210 *e_token++ = *buf_ptr++; 211 seensfx |= 1; 212 continue; 213 } 214 if (!(seensfx & 2) && (strchr("fFlL", *buf_ptr) != NULL)) { 215 CHECK_SIZE_TOKEN; 216 if (buf_ptr[1] == buf_ptr[0]) 217 *e_token++ = *buf_ptr++; 218 *e_token++ = *buf_ptr++; 219 seensfx |= 2; 220 continue; 221 } 222 break; 223 } 224 } 225 else 226 while (chartype[(int)*buf_ptr] == alphanum || *buf_ptr == BACKSLASH) { 227 /* fill_buffer() terminates buffer with newline */ 228 if (*buf_ptr == BACKSLASH) { 229 if (*(buf_ptr + 1) == '\n') { 230 buf_ptr += 2; 231 if (buf_ptr >= buf_end) 232 fill_buffer(); 233 } else 234 break; 235 } 236 CHECK_SIZE_TOKEN; 237 /* copy it over */ 238 *e_token++ = *buf_ptr++; 239 if (buf_ptr >= buf_end) 240 fill_buffer(); 241 } 242 *e_token++ = '\0'; 243 244 if (s_token[0] == 'L' && s_token[1] == '\0' && 245 (*buf_ptr == '"' || *buf_ptr == '\'')) 246 return (strpfx); 247 248 while (*buf_ptr == ' ' || *buf_ptr == '\t') { /* get rid of blanks */ 249 if (++buf_ptr >= buf_end) 250 fill_buffer(); 251 } 252 ps.keyword = 0; 253 if (l_struct && !ps.p_l_follow) { 254 /* if last token was 'struct' and we're not 255 * in parentheses, then this token 256 * should be treated as a declaration */ 257 l_struct = false; 258 last_code = ident; 259 ps.last_u_d = true; 260 return (decl); 261 } 262 ps.last_u_d = l_struct; /* Operator after identifier is binary 263 * unless last token was 'struct' */ 264 l_struct = false; 265 last_code = ident; /* Remember that this is the code we will 266 * return */ 267 268 p = bsearch(s_token, 269 specials, 270 sizeof(specials) / sizeof(specials[0]), 271 sizeof(specials[0]), 272 strcmp_type); 273 if (p == NULL) { /* not a special keyword... */ 274 char *u; 275 276 /* ... so maybe a type_t or a typedef */ 277 if ((auto_typedefs && ((u = strrchr(s_token, '_')) != NULL) && 278 strcmp(u, "_t") == 0) || (typename_top >= 0 && 279 bsearch(s_token, typenames, typename_top + 1, 280 sizeof(typenames[0]), strcmp_type))) { 281 ps.keyword = 4; /* a type name */ 282 ps.last_u_d = true; 283 goto found_typename; 284 } 285 } else { /* we have a keyword */ 286 ps.keyword = p->rwcode; 287 ps.last_u_d = true; 288 switch (p->rwcode) { 289 case 7: /* it is a switch */ 290 return (swstmt); 291 case 8: /* a case or default */ 292 return (casestmt); 293 294 case 3: /* a "struct" */ 295 /* 296 * Next time around, we will want to know that we have had a 297 * 'struct' 298 */ 299 l_struct = true; 300 /* FALLTHROUGH */ 301 302 case 4: /* one of the declaration keywords */ 303 found_typename: 304 if (ps.p_l_follow) { 305 /* inside parens: cast, param list, offsetof or sizeof */ 306 ps.cast_mask |= (1 << ps.p_l_follow) & ~ps.not_cast_mask; 307 break; 308 } 309 last_code = decl; 310 return (decl); 311 312 case 5: /* if, while, for */ 313 return (sp_paren); 314 315 case 6: /* do, else */ 316 return (sp_nparen); 317 318 case 10: /* storage class specifier */ 319 return (storage); 320 321 default: /* all others are treated like any other 322 * identifier */ 323 return (ident); 324 } /* end of switch */ 325 } /* end of if (found_it) */ 326 if (*buf_ptr == '(' && ps.tos <= 1 && ps.ind_level == 0) { 327 char *tp = buf_ptr; 328 while (tp < buf_end) 329 if (*tp++ == ')' && (*tp == ';' || *tp == ',')) 330 goto not_proc; 331 strncpy(ps.procname, token, sizeof ps.procname - 1); 332 if (ps.in_decl) 333 ps.in_parameter_declaration = 1; 334 rparen_count = 1; 335 not_proc:; 336 } 337 /* 338 * The following hack attempts to guess whether or not the current 339 * token is in fact a declaration keyword -- one that has been 340 * typedefd 341 */ 342 if (((*buf_ptr == '*' && buf_ptr[1] != '=') || isalpha(*buf_ptr) || *buf_ptr == '_') 343 && !ps.p_l_follow 344 && !ps.block_init 345 && (ps.last_token == rparen || ps.last_token == semicolon || 346 ps.last_token == decl || 347 ps.last_token == lbrace || ps.last_token == rbrace)) { 348 ps.keyword = 4; /* a type name */ 349 ps.last_u_d = true; 350 last_code = decl; 351 return decl; 352 } 353 if (last_code == decl) /* if this is a declared variable, then 354 * following sign is unary */ 355 ps.last_u_d = true; /* will make "int a -1" work */ 356 last_code = ident; 357 return (ident); /* the ident is not in the list */ 358 } /* end of procesing for alpanum character */ 359 360 /* Scan a non-alphanumeric token */ 361 362 *e_token++ = *buf_ptr; /* if it is only a one-character token, it is 363 * moved here */ 364 *e_token = '\0'; 365 if (++buf_ptr >= buf_end) 366 fill_buffer(); 367 368 switch (*token) { 369 case '\n': 370 unary_delim = ps.last_u_d; 371 ps.last_nl = true; /* remember that we just had a newline */ 372 code = (had_eof ? 0 : newline); 373 374 /* 375 * if data has been exhausted, the newline is a dummy, and we should 376 * return code to stop 377 */ 378 break; 379 380 case '\'': /* start of quoted character */ 381 case '"': /* start of string */ 382 qchar = *token; 383 if (troff) { 384 e_token[-1] = '`'; 385 if (qchar == '"') 386 *e_token++ = '`'; 387 e_token = chfont(&bodyf, &stringf, e_token); 388 } 389 do { /* copy the string */ 390 while (1) { /* move one character or [/<char>]<char> */ 391 if (*buf_ptr == '\n') { 392 diag2(1, "Unterminated literal"); 393 goto stop_lit; 394 } 395 CHECK_SIZE_TOKEN; /* Only have to do this once in this loop, 396 * since CHECK_SIZE guarantees that there 397 * are at least 5 entries left */ 398 *e_token = *buf_ptr++; 399 if (buf_ptr >= buf_end) 400 fill_buffer(); 401 if (*e_token == BACKSLASH) { /* if escape, copy extra char */ 402 if (*buf_ptr == '\n') /* check for escaped newline */ 403 ++line_no; 404 if (troff) { 405 *++e_token = BACKSLASH; 406 if (*buf_ptr == BACKSLASH) 407 *++e_token = BACKSLASH; 408 } 409 *++e_token = *buf_ptr++; 410 ++e_token; /* we must increment this again because we 411 * copied two chars */ 412 if (buf_ptr >= buf_end) 413 fill_buffer(); 414 } 415 else 416 break; /* we copied one character */ 417 } /* end of while (1) */ 418 } while (*e_token++ != qchar); 419 if (troff) { 420 e_token = chfont(&stringf, &bodyf, e_token - 1); 421 if (qchar == '"') 422 *e_token++ = '\''; 423 } 424 stop_lit: 425 code = ident; 426 break; 427 428 case ('('): 429 case ('['): 430 unary_delim = true; 431 code = lparen; 432 break; 433 434 case (')'): 435 case (']'): 436 code = rparen; 437 break; 438 439 case '#': 440 unary_delim = ps.last_u_d; 441 code = preesc; 442 break; 443 444 case '?': 445 unary_delim = true; 446 code = question; 447 break; 448 449 case (':'): 450 code = colon; 451 unary_delim = true; 452 break; 453 454 case (';'): 455 unary_delim = true; 456 code = semicolon; 457 break; 458 459 case ('{'): 460 unary_delim = true; 461 462 /* 463 * if (ps.in_or_st) ps.block_init = 1; 464 */ 465 /* ? code = ps.block_init ? lparen : lbrace; */ 466 code = lbrace; 467 break; 468 469 case ('}'): 470 unary_delim = true; 471 /* ? code = ps.block_init ? rparen : rbrace; */ 472 code = rbrace; 473 break; 474 475 case 014: /* a form feed */ 476 unary_delim = ps.last_u_d; 477 ps.last_nl = true; /* remember this so we can set 'ps.col_1' 478 * right */ 479 code = form_feed; 480 break; 481 482 case (','): 483 unary_delim = true; 484 code = comma; 485 break; 486 487 case '.': 488 unary_delim = false; 489 code = period; 490 break; 491 492 case '-': 493 case '+': /* check for -, +, --, ++ */ 494 code = (ps.last_u_d ? unary_op : binary_op); 495 unary_delim = true; 496 497 if (*buf_ptr == token[0]) { 498 /* check for doubled character */ 499 *e_token++ = *buf_ptr++; 500 /* buffer overflow will be checked at end of loop */ 501 if (last_code == ident || last_code == rparen) { 502 code = (ps.last_u_d ? unary_op : postop); 503 /* check for following ++ or -- */ 504 unary_delim = false; 505 } 506 } 507 else if (*buf_ptr == '=') 508 /* check for operator += */ 509 *e_token++ = *buf_ptr++; 510 else if (*buf_ptr == '>') { 511 /* check for operator -> */ 512 *e_token++ = *buf_ptr++; 513 if (!pointer_as_binop) { 514 unary_delim = false; 515 code = unary_op; 516 ps.want_blank = false; 517 } 518 } 519 break; /* buffer overflow will be checked at end of 520 * switch */ 521 522 case '=': 523 if (ps.in_or_st) 524 ps.block_init = 1; 525 #ifdef undef 526 if (chartype[*buf_ptr] == opchar) { /* we have two char assignment */ 527 e_token[-1] = *buf_ptr++; 528 if ((e_token[-1] == '<' || e_token[-1] == '>') && e_token[-1] == *buf_ptr) 529 *e_token++ = *buf_ptr++; 530 *e_token++ = '='; /* Flip =+ to += */ 531 *e_token = 0; 532 } 533 #else 534 if (*buf_ptr == '=') {/* == */ 535 *e_token++ = '='; /* Flip =+ to += */ 536 buf_ptr++; 537 *e_token = 0; 538 } 539 #endif 540 code = binary_op; 541 unary_delim = true; 542 break; 543 /* can drop thru!!! */ 544 545 case '>': 546 case '<': 547 case '!': /* ops like <, <<, <=, !=, etc */ 548 if (*buf_ptr == '>' || *buf_ptr == '<' || *buf_ptr == '=') { 549 *e_token++ = *buf_ptr; 550 if (++buf_ptr >= buf_end) 551 fill_buffer(); 552 } 553 if (*buf_ptr == '=') 554 *e_token++ = *buf_ptr++; 555 code = (ps.last_u_d ? unary_op : binary_op); 556 unary_delim = true; 557 break; 558 559 default: 560 if (token[0] == '/' && *buf_ptr == '*') { 561 /* it is start of comment */ 562 *e_token++ = '*'; 563 564 if (++buf_ptr >= buf_end) 565 fill_buffer(); 566 567 code = comment; 568 unary_delim = ps.last_u_d; 569 break; 570 } 571 while (*(e_token - 1) == *buf_ptr || *buf_ptr == '=') { 572 /* 573 * handle ||, &&, etc, and also things as in int *****i 574 */ 575 *e_token++ = *buf_ptr; 576 if (++buf_ptr >= buf_end) 577 fill_buffer(); 578 } 579 code = (ps.last_u_d ? unary_op : binary_op); 580 unary_delim = true; 581 582 583 } /* end of switch */ 584 if (code != newline) { 585 l_struct = false; 586 last_code = code; 587 } 588 if (buf_ptr >= buf_end) /* check for input buffer empty */ 589 fill_buffer(); 590 ps.last_u_d = unary_delim; 591 *e_token = '\0'; /* null terminate the token */ 592 return (code); 593 } 594 595 void 596 alloc_typenames(void) 597 { 598 599 typenames = (const char **)malloc(sizeof(typenames[0]) * 600 (typename_count = 16)); 601 if (typenames == NULL) 602 err(1, NULL); 603 } 604 605 void 606 add_typename(const char *key) 607 { 608 int comparison; 609 const char *copy; 610 611 if (typename_top + 1 >= typename_count) { 612 typenames = realloc((void *)typenames, 613 sizeof(typenames[0]) * (typename_count *= 2)); 614 if (typenames == NULL) 615 err(1, NULL); 616 } 617 if (typename_top == -1) 618 typenames[++typename_top] = copy = strdup(key); 619 else if ((comparison = strcmp(key, typenames[typename_top])) >= 0) { 620 /* take advantage of sorted input */ 621 if (comparison == 0) /* remove duplicates */ 622 return; 623 typenames[++typename_top] = copy = strdup(key); 624 } 625 else { 626 int p; 627 628 for (p = 0; (comparison = strcmp(key, typenames[p])) > 0; p++) 629 /* find place for the new key */; 630 if (comparison == 0) /* remove duplicates */ 631 return; 632 memmove(&typenames[p + 1], &typenames[p], 633 sizeof(typenames[0]) * (++typename_top - p)); 634 typenames[p] = copy = strdup(key); 635 } 636 637 if (copy == NULL) 638 err(1, NULL); 639 } 640