1 /*- 2 * SPDX-License-Identifier: BSD-4-Clause 3 * 4 * Copyright (c) 1985 Sun Microsystems, Inc. 5 * Copyright (c) 1980, 1993 6 * The Regents of the University of California. All rights reserved. 7 * All rights reserved. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 3. All advertising materials mentioning features or use of this software 18 * must display the following acknowledgement: 19 * This product includes software developed by the University of 20 * California, Berkeley and its contributors. 21 * 4. Neither the name of the University nor the names of its contributors 22 * may be used to endorse or promote products derived from this software 23 * without specific prior written permission. 24 * 25 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 26 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 27 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 28 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 29 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 30 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 31 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 32 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 33 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 34 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 35 * SUCH DAMAGE. 36 */ 37 38 #if 0 39 #ifndef lint 40 static char sccsid[] = "@(#)lexi.c 8.1 (Berkeley) 6/6/93"; 41 #endif /* not lint */ 42 #endif 43 #include <sys/cdefs.h> 44 __FBSDID("$FreeBSD$"); 45 46 /* 47 * Here we have the token scanner for indent. It scans off one token and puts 48 * it in the global variable "token". It returns a code, indicating the type 49 * of token scanned. 50 */ 51 52 #include <err.h> 53 #include <stdio.h> 54 #include <ctype.h> 55 #include <stdlib.h> 56 #include <string.h> 57 #include "indent_globs.h" 58 #include "indent_codes.h" 59 #include "indent.h" 60 61 #define alphanum 1 62 #ifdef undef 63 #define opchar 3 64 #endif 65 66 struct templ { 67 const char *rwd; 68 int rwcode; 69 }; 70 71 /* 72 * This table has to be sorted alphabetically, because it'll be used in binary 73 * search. For the same reason, string must be the first thing in struct templ. 74 */ 75 struct templ specials[] = 76 { 77 {"_Bool", 4}, 78 {"_Complex", 4}, 79 {"_Imaginary", 4}, 80 {"auto", 10}, 81 {"bool", 4}, 82 {"break", 9}, 83 {"case", 8}, 84 {"char", 4}, 85 {"complex", 4}, 86 {"const", 4}, 87 {"continue", 12}, 88 {"default", 8}, 89 {"do", 6}, 90 {"double", 4}, 91 {"else", 6}, 92 {"enum", 3}, 93 {"extern", 10}, 94 {"float", 4}, 95 {"for", 5}, 96 {"global", 4}, 97 {"goto", 9}, 98 {"if", 5}, 99 {"imaginary", 4}, 100 {"inline", 12}, 101 {"int", 4}, 102 {"long", 4}, 103 {"offsetof", 1}, 104 {"register", 10}, 105 {"restrict", 12}, 106 {"return", 9}, 107 {"short", 4}, 108 {"signed", 4}, 109 {"sizeof", 2}, 110 {"static", 10}, 111 {"struct", 3}, 112 {"switch", 7}, 113 {"typedef", 11}, 114 {"union", 3}, 115 {"unsigned", 4}, 116 {"void", 4}, 117 {"volatile", 4}, 118 {"while", 5} 119 }; 120 121 const char **typenames; 122 int typename_count; 123 int typename_top = -1; 124 125 char chartype[128] = 126 { /* this is used to facilitate the decision of 127 * what type (alphanumeric, operator) each 128 * character is */ 129 0, 0, 0, 0, 0, 0, 0, 0, 130 0, 0, 0, 0, 0, 0, 0, 0, 131 0, 0, 0, 0, 0, 0, 0, 0, 132 0, 0, 0, 0, 0, 0, 0, 0, 133 0, 3, 0, 0, 1, 3, 3, 0, 134 0, 0, 3, 3, 0, 3, 0, 3, 135 1, 1, 1, 1, 1, 1, 1, 1, 136 1, 1, 0, 0, 3, 3, 3, 3, 137 0, 1, 1, 1, 1, 1, 1, 1, 138 1, 1, 1, 1, 1, 1, 1, 1, 139 1, 1, 1, 1, 1, 1, 1, 1, 140 1, 1, 1, 0, 0, 0, 3, 1, 141 0, 1, 1, 1, 1, 1, 1, 1, 142 1, 1, 1, 1, 1, 1, 1, 1, 143 1, 1, 1, 1, 1, 1, 1, 1, 144 1, 1, 1, 0, 3, 0, 3, 0 145 }; 146 147 static int 148 strcmp_type(const void *e1, const void *e2) 149 { 150 return (strcmp(e1, *(const char * const *)e2)); 151 } 152 153 int 154 lexi(struct parser_state *state) 155 { 156 int unary_delim; /* this is set to 1 if the current token 157 * forces a following operator to be unary */ 158 int code; /* internal code to be returned */ 159 char qchar; /* the delimiter character for a string */ 160 161 e_token = s_token; /* point to start of place to save token */ 162 unary_delim = false; 163 state->col_1 = state->last_nl; /* tell world that this token started 164 * in column 1 iff the last thing 165 * scanned was a newline */ 166 state->last_nl = false; 167 168 while (*buf_ptr == ' ' || *buf_ptr == '\t') { /* get rid of blanks */ 169 state->col_1 = false; /* leading blanks imply token is not in column 170 * 1 */ 171 if (++buf_ptr >= buf_end) 172 fill_buffer(); 173 } 174 175 /* Scan an alphanumeric token */ 176 if (chartype[(int)*buf_ptr] == alphanum || (buf_ptr[0] == '.' && isdigit(buf_ptr[1]))) { 177 /* 178 * we have a character or number 179 */ 180 struct templ *p; 181 182 if (isdigit(*buf_ptr) || (buf_ptr[0] == '.' && isdigit(buf_ptr[1]))) { 183 enum base { 184 BASE_2, BASE_8, BASE_10, BASE_16 185 }; 186 int seendot = 0, 187 seenexp = 0, 188 seensfx = 0; 189 enum base in_base = BASE_10; 190 191 if (*buf_ptr == '0') { 192 if (buf_ptr[1] == 'b' || buf_ptr[1] == 'B') 193 in_base = BASE_2; 194 else if (buf_ptr[1] == 'x' || buf_ptr[1] == 'X') 195 in_base = BASE_16; 196 else if (isdigit(buf_ptr[1])) 197 in_base = BASE_8; 198 } 199 switch (in_base) { 200 case BASE_2: 201 *e_token++ = *buf_ptr++; 202 *e_token++ = *buf_ptr++; 203 while (*buf_ptr == '0' || *buf_ptr == '1') { 204 CHECK_SIZE_TOKEN; 205 *e_token++ = *buf_ptr++; 206 } 207 break; 208 case BASE_8: 209 *e_token++ = *buf_ptr++; 210 while (*buf_ptr >= '0' && *buf_ptr <= '8') { 211 CHECK_SIZE_TOKEN; 212 *e_token++ = *buf_ptr++; 213 } 214 break; 215 case BASE_16: 216 *e_token++ = *buf_ptr++; 217 *e_token++ = *buf_ptr++; 218 while (isxdigit(*buf_ptr)) { 219 CHECK_SIZE_TOKEN; 220 *e_token++ = *buf_ptr++; 221 } 222 break; 223 case BASE_10: 224 while (1) { 225 if (*buf_ptr == '.') { 226 if (seendot) 227 break; 228 else 229 seendot++; 230 } 231 CHECK_SIZE_TOKEN; 232 *e_token++ = *buf_ptr++; 233 if (!isdigit(*buf_ptr) && *buf_ptr != '.') { 234 if ((*buf_ptr != 'E' && *buf_ptr != 'e') || seenexp) 235 break; 236 else { 237 seenexp++; 238 seendot++; 239 CHECK_SIZE_TOKEN; 240 *e_token++ = *buf_ptr++; 241 if (*buf_ptr == '+' || *buf_ptr == '-') 242 *e_token++ = *buf_ptr++; 243 } 244 } 245 } 246 break; 247 } 248 while (1) { 249 if (!(seensfx & 1) && (*buf_ptr == 'U' || *buf_ptr == 'u')) { 250 CHECK_SIZE_TOKEN; 251 *e_token++ = *buf_ptr++; 252 seensfx |= 1; 253 continue; 254 } 255 if (!(seensfx & 2) && (strchr("fFlL", *buf_ptr) != NULL)) { 256 CHECK_SIZE_TOKEN; 257 if (buf_ptr[1] == buf_ptr[0]) 258 *e_token++ = *buf_ptr++; 259 *e_token++ = *buf_ptr++; 260 seensfx |= 2; 261 continue; 262 } 263 break; 264 } 265 } 266 else 267 while (chartype[(int)*buf_ptr] == alphanum || *buf_ptr == BACKSLASH) { 268 /* fill_buffer() terminates buffer with newline */ 269 if (*buf_ptr == BACKSLASH) { 270 if (*(buf_ptr + 1) == '\n') { 271 buf_ptr += 2; 272 if (buf_ptr >= buf_end) 273 fill_buffer(); 274 } else 275 break; 276 } 277 CHECK_SIZE_TOKEN; 278 /* copy it over */ 279 *e_token++ = *buf_ptr++; 280 if (buf_ptr >= buf_end) 281 fill_buffer(); 282 } 283 *e_token++ = '\0'; 284 285 if (s_token[0] == 'L' && s_token[1] == '\0' && 286 (*buf_ptr == '"' || *buf_ptr == '\'')) 287 return (strpfx); 288 289 while (*buf_ptr == ' ' || *buf_ptr == '\t') { /* get rid of blanks */ 290 if (++buf_ptr >= buf_end) 291 fill_buffer(); 292 } 293 state->keyword = 0; 294 if (state->last_token == structure && !state->p_l_follow) { 295 /* if last token was 'struct' and we're not 296 * in parentheses, then this token 297 * should be treated as a declaration */ 298 state->last_u_d = true; 299 return (decl); 300 } 301 /* 302 * Operator after identifier is binary unless last token was 'struct' 303 */ 304 state->last_u_d = (state->last_token == structure); 305 306 p = bsearch(s_token, 307 specials, 308 sizeof(specials) / sizeof(specials[0]), 309 sizeof(specials[0]), 310 strcmp_type); 311 if (p == NULL) { /* not a special keyword... */ 312 char *u; 313 314 /* ... so maybe a type_t or a typedef */ 315 if ((auto_typedefs && ((u = strrchr(s_token, '_')) != NULL) && 316 strcmp(u, "_t") == 0) || (typename_top >= 0 && 317 bsearch(s_token, typenames, typename_top + 1, 318 sizeof(typenames[0]), strcmp_type))) { 319 state->keyword = 4; /* a type name */ 320 state->last_u_d = true; 321 goto found_typename; 322 } 323 } else { /* we have a keyword */ 324 state->keyword = p->rwcode; 325 state->last_u_d = true; 326 switch (p->rwcode) { 327 case 7: /* it is a switch */ 328 return (swstmt); 329 case 8: /* a case or default */ 330 return (casestmt); 331 332 case 3: /* a "struct" */ 333 /* FALLTHROUGH */ 334 case 4: /* one of the declaration keywords */ 335 found_typename: 336 if (state->p_l_follow) { 337 /* inside parens: cast, param list, offsetof or sizeof */ 338 state->cast_mask |= (1 << state->p_l_follow) & ~state->not_cast_mask; 339 } 340 if (p != NULL && p->rwcode == 3) 341 return (structure); 342 if (state->p_l_follow) 343 break; 344 return (decl); 345 346 case 5: /* if, while, for */ 347 return (sp_paren); 348 349 case 6: /* do, else */ 350 return (sp_nparen); 351 352 case 10: /* storage class specifier */ 353 return (storage); 354 355 case 11: /* typedef */ 356 return (type_def); 357 358 default: /* all others are treated like any other 359 * identifier */ 360 return (ident); 361 } /* end of switch */ 362 } /* end of if (found_it) */ 363 if (*buf_ptr == '(' && state->tos <= 1 && state->ind_level == 0 && 364 state->in_parameter_declaration == 0 && state->block_init == 0) { 365 char *tp = buf_ptr; 366 while (tp < buf_end) 367 if (*tp++ == ')' && (*tp == ';' || *tp == ',')) 368 goto not_proc; 369 strncpy(state->procname, token, sizeof state->procname - 1); 370 if (state->in_decl) 371 state->in_parameter_declaration = 1; 372 return (funcname); 373 not_proc:; 374 } 375 /* 376 * The following hack attempts to guess whether or not the current 377 * token is in fact a declaration keyword -- one that has been 378 * typedefd 379 */ 380 else if (!state->p_l_follow && !state->block_init && 381 !state->in_stmt && 382 ((*buf_ptr == '*' && buf_ptr[1] != '=') || 383 isalpha((unsigned char)*buf_ptr)) && 384 (state->last_token == semicolon || state->last_token == lbrace || 385 state->last_token == rbrace)) { 386 state->keyword = 4; /* a type name */ 387 state->last_u_d = true; 388 return decl; 389 } 390 if (state->last_token == decl) /* if this is a declared variable, 391 * then following sign is unary */ 392 state->last_u_d = true; /* will make "int a -1" work */ 393 return (ident); /* the ident is not in the list */ 394 } /* end of procesing for alpanum character */ 395 396 /* Scan a non-alphanumeric token */ 397 398 *e_token++ = *buf_ptr; /* if it is only a one-character token, it is 399 * moved here */ 400 *e_token = '\0'; 401 if (++buf_ptr >= buf_end) 402 fill_buffer(); 403 404 switch (*token) { 405 case '\n': 406 unary_delim = state->last_u_d; 407 state->last_nl = true; /* remember that we just had a newline */ 408 code = (had_eof ? 0 : newline); 409 410 /* 411 * if data has been exhausted, the newline is a dummy, and we should 412 * return code to stop 413 */ 414 break; 415 416 case '\'': /* start of quoted character */ 417 case '"': /* start of string */ 418 qchar = *token; 419 if (troff) { 420 e_token[-1] = '`'; 421 if (qchar == '"') 422 *e_token++ = '`'; 423 e_token = chfont(&bodyf, &stringf, e_token); 424 } 425 do { /* copy the string */ 426 while (1) { /* move one character or [/<char>]<char> */ 427 if (*buf_ptr == '\n') { 428 diag2(1, "Unterminated literal"); 429 goto stop_lit; 430 } 431 CHECK_SIZE_TOKEN; /* Only have to do this once in this loop, 432 * since CHECK_SIZE guarantees that there 433 * are at least 5 entries left */ 434 *e_token = *buf_ptr++; 435 if (buf_ptr >= buf_end) 436 fill_buffer(); 437 if (*e_token == BACKSLASH) { /* if escape, copy extra char */ 438 if (*buf_ptr == '\n') /* check for escaped newline */ 439 ++line_no; 440 if (troff) { 441 *++e_token = BACKSLASH; 442 if (*buf_ptr == BACKSLASH) 443 *++e_token = BACKSLASH; 444 } 445 *++e_token = *buf_ptr++; 446 ++e_token; /* we must increment this again because we 447 * copied two chars */ 448 if (buf_ptr >= buf_end) 449 fill_buffer(); 450 } 451 else 452 break; /* we copied one character */ 453 } /* end of while (1) */ 454 } while (*e_token++ != qchar); 455 if (troff) { 456 e_token = chfont(&stringf, &bodyf, e_token - 1); 457 if (qchar == '"') 458 *e_token++ = '\''; 459 } 460 stop_lit: 461 code = ident; 462 break; 463 464 case ('('): 465 case ('['): 466 unary_delim = true; 467 code = lparen; 468 break; 469 470 case (')'): 471 case (']'): 472 code = rparen; 473 break; 474 475 case '#': 476 unary_delim = state->last_u_d; 477 code = preesc; 478 break; 479 480 case '?': 481 unary_delim = true; 482 code = question; 483 break; 484 485 case (':'): 486 code = colon; 487 unary_delim = true; 488 break; 489 490 case (';'): 491 unary_delim = true; 492 code = semicolon; 493 break; 494 495 case ('{'): 496 unary_delim = true; 497 498 /* 499 * if (state->in_or_st) state->block_init = 1; 500 */ 501 /* ? code = state->block_init ? lparen : lbrace; */ 502 code = lbrace; 503 break; 504 505 case ('}'): 506 unary_delim = true; 507 /* ? code = state->block_init ? rparen : rbrace; */ 508 code = rbrace; 509 break; 510 511 case 014: /* a form feed */ 512 unary_delim = state->last_u_d; 513 state->last_nl = true; /* remember this so we can set 'state->col_1' 514 * right */ 515 code = form_feed; 516 break; 517 518 case (','): 519 unary_delim = true; 520 code = comma; 521 break; 522 523 case '.': 524 unary_delim = false; 525 code = period; 526 break; 527 528 case '-': 529 case '+': /* check for -, +, --, ++ */ 530 code = (state->last_u_d ? unary_op : binary_op); 531 unary_delim = true; 532 533 if (*buf_ptr == token[0]) { 534 /* check for doubled character */ 535 *e_token++ = *buf_ptr++; 536 /* buffer overflow will be checked at end of loop */ 537 if (state->last_token == ident || state->last_token == rparen) { 538 code = (state->last_u_d ? unary_op : postop); 539 /* check for following ++ or -- */ 540 unary_delim = false; 541 } 542 } 543 else if (*buf_ptr == '=') 544 /* check for operator += */ 545 *e_token++ = *buf_ptr++; 546 else if (*buf_ptr == '>') { 547 /* check for operator -> */ 548 *e_token++ = *buf_ptr++; 549 unary_delim = false; 550 code = unary_op; 551 state->want_blank = false; 552 } 553 break; /* buffer overflow will be checked at end of 554 * switch */ 555 556 case '=': 557 if (state->in_or_st) 558 state->block_init = 1; 559 #ifdef undef 560 if (chartype[*buf_ptr] == opchar) { /* we have two char assignment */ 561 e_token[-1] = *buf_ptr++; 562 if ((e_token[-1] == '<' || e_token[-1] == '>') && e_token[-1] == *buf_ptr) 563 *e_token++ = *buf_ptr++; 564 *e_token++ = '='; /* Flip =+ to += */ 565 *e_token = 0; 566 } 567 #else 568 if (*buf_ptr == '=') {/* == */ 569 *e_token++ = '='; /* Flip =+ to += */ 570 buf_ptr++; 571 *e_token = 0; 572 } 573 #endif 574 code = binary_op; 575 unary_delim = true; 576 break; 577 /* can drop thru!!! */ 578 579 case '>': 580 case '<': 581 case '!': /* ops like <, <<, <=, !=, etc */ 582 if (*buf_ptr == '>' || *buf_ptr == '<' || *buf_ptr == '=') { 583 *e_token++ = *buf_ptr; 584 if (++buf_ptr >= buf_end) 585 fill_buffer(); 586 } 587 if (*buf_ptr == '=') 588 *e_token++ = *buf_ptr++; 589 code = (state->last_u_d ? unary_op : binary_op); 590 unary_delim = true; 591 break; 592 593 case '*': 594 unary_delim = true; 595 if (!state->last_u_d) { 596 if (*buf_ptr == '=') 597 *e_token++ = *buf_ptr++; 598 code = binary_op; 599 break; 600 } 601 while (*buf_ptr == '*' || isspace((unsigned char)*buf_ptr)) { 602 if (*buf_ptr == '*') 603 *e_token++ = *buf_ptr; 604 if (++buf_ptr >= buf_end) 605 fill_buffer(); 606 } 607 if (ps.in_decl) { 608 char *tp = buf_ptr; 609 610 while (isalpha((unsigned char)*tp) || 611 isspace((unsigned char)*tp)) { 612 if (++tp >= buf_end) 613 fill_buffer(); 614 } 615 if (*tp == '(') 616 ps.procname[0] = ' '; 617 } 618 code = unary_op; 619 break; 620 621 default: 622 if (token[0] == '/' && *buf_ptr == '*') { 623 /* it is start of comment */ 624 *e_token++ = '*'; 625 626 if (++buf_ptr >= buf_end) 627 fill_buffer(); 628 629 code = comment; 630 unary_delim = state->last_u_d; 631 break; 632 } 633 while (*(e_token - 1) == *buf_ptr || *buf_ptr == '=') { 634 /* 635 * handle ||, &&, etc, and also things as in int *****i 636 */ 637 *e_token++ = *buf_ptr; 638 if (++buf_ptr >= buf_end) 639 fill_buffer(); 640 } 641 code = (state->last_u_d ? unary_op : binary_op); 642 unary_delim = true; 643 644 645 } /* end of switch */ 646 if (buf_ptr >= buf_end) /* check for input buffer empty */ 647 fill_buffer(); 648 state->last_u_d = unary_delim; 649 *e_token = '\0'; /* null terminate the token */ 650 return (code); 651 } 652 653 void 654 alloc_typenames(void) 655 { 656 657 typenames = (const char **)malloc(sizeof(typenames[0]) * 658 (typename_count = 16)); 659 if (typenames == NULL) 660 err(1, NULL); 661 } 662 663 void 664 add_typename(const char *key) 665 { 666 int comparison; 667 const char *copy; 668 669 if (typename_top + 1 >= typename_count) { 670 typenames = realloc((void *)typenames, 671 sizeof(typenames[0]) * (typename_count *= 2)); 672 if (typenames == NULL) 673 err(1, NULL); 674 } 675 if (typename_top == -1) 676 typenames[++typename_top] = copy = strdup(key); 677 else if ((comparison = strcmp(key, typenames[typename_top])) >= 0) { 678 /* take advantage of sorted input */ 679 if (comparison == 0) /* remove duplicates */ 680 return; 681 typenames[++typename_top] = copy = strdup(key); 682 } 683 else { 684 int p; 685 686 for (p = 0; (comparison = strcmp(key, typenames[p])) > 0; p++) 687 /* find place for the new key */; 688 if (comparison == 0) /* remove duplicates */ 689 return; 690 memmove(&typenames[p + 1], &typenames[p], 691 sizeof(typenames[0]) * (++typename_top - p)); 692 typenames[p] = copy = strdup(key); 693 } 694 695 if (copy == NULL) 696 err(1, NULL); 697 } 698