1 /*- 2 * SPDX-License-Identifier: BSD-4-Clause 3 * 4 * Copyright (c) 1985 Sun Microsystems, Inc. 5 * Copyright (c) 1980, 1993 6 * The Regents of the University of California. All rights reserved. 7 * All rights reserved. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 3. All advertising materials mentioning features or use of this software 18 * must display the following acknowledgement: 19 * This product includes software developed by the University of 20 * California, Berkeley and its contributors. 21 * 4. Neither the name of the University nor the names of its contributors 22 * may be used to endorse or promote products derived from this software 23 * without specific prior written permission. 24 * 25 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 26 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 27 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 28 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 29 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 30 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 31 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 32 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 33 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 34 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 35 * SUCH DAMAGE. 36 */ 37 38 #if 0 39 #ifndef lint 40 static char sccsid[] = "@(#)lexi.c 8.1 (Berkeley) 6/6/93"; 41 #endif /* not lint */ 42 #endif 43 #include <sys/cdefs.h> 44 __FBSDID("$FreeBSD$"); 45 46 /* 47 * Here we have the token scanner for indent. It scans off one token and puts 48 * it in the global variable "token". It returns a code, indicating the type 49 * of token scanned. 50 */ 51 52 #include <err.h> 53 #include <stdio.h> 54 #include <ctype.h> 55 #include <stdlib.h> 56 #include <string.h> 57 #include "indent_globs.h" 58 #include "indent_codes.h" 59 #include "indent.h" 60 61 #define alphanum 1 62 #ifdef undef 63 #define opchar 3 64 #endif 65 66 struct templ { 67 const char *rwd; 68 int rwcode; 69 }; 70 71 /* 72 * This table has to be sorted alphabetically, because it'll be used in binary 73 * search. For the same reason, string must be the first thing in struct templ. 74 */ 75 struct templ specials[] = 76 { 77 {"_Bool", 4}, 78 {"_Complex", 4}, 79 {"_Imaginary", 4}, 80 {"auto", 10}, 81 {"bool", 4}, 82 {"break", 9}, 83 {"case", 8}, 84 {"char", 4}, 85 {"complex", 4}, 86 {"const", 4}, 87 {"continue", 12}, 88 {"default", 8}, 89 {"do", 6}, 90 {"double", 4}, 91 {"else", 6}, 92 {"enum", 3}, 93 {"extern", 10}, 94 {"float", 4}, 95 {"for", 5}, 96 {"global", 4}, 97 {"goto", 9}, 98 {"if", 5}, 99 {"imaginary", 4}, 100 {"inline", 12}, 101 {"int", 4}, 102 {"long", 4}, 103 {"offsetof", 1}, 104 {"register", 10}, 105 {"restrict", 12}, 106 {"return", 9}, 107 {"short", 4}, 108 {"signed", 4}, 109 {"sizeof", 2}, 110 {"static", 10}, 111 {"struct", 3}, 112 {"switch", 7}, 113 {"typedef", 11}, 114 {"union", 3}, 115 {"unsigned", 4}, 116 {"void", 4}, 117 {"volatile", 4}, 118 {"while", 5} 119 }; 120 121 const char **typenames; 122 int typename_count; 123 int typename_top = -1; 124 125 char chartype[128] = 126 { /* this is used to facilitate the decision of 127 * what type (alphanumeric, operator) each 128 * character is */ 129 0, 0, 0, 0, 0, 0, 0, 0, 130 0, 0, 0, 0, 0, 0, 0, 0, 131 0, 0, 0, 0, 0, 0, 0, 0, 132 0, 0, 0, 0, 0, 0, 0, 0, 133 0, 3, 0, 0, 1, 3, 3, 0, 134 0, 0, 3, 3, 0, 3, 0, 3, 135 1, 1, 1, 1, 1, 1, 1, 1, 136 1, 1, 0, 0, 3, 3, 3, 3, 137 0, 1, 1, 1, 1, 1, 1, 1, 138 1, 1, 1, 1, 1, 1, 1, 1, 139 1, 1, 1, 1, 1, 1, 1, 1, 140 1, 1, 1, 0, 0, 0, 3, 1, 141 0, 1, 1, 1, 1, 1, 1, 1, 142 1, 1, 1, 1, 1, 1, 1, 1, 143 1, 1, 1, 1, 1, 1, 1, 1, 144 1, 1, 1, 0, 3, 0, 3, 0 145 }; 146 147 static int 148 strcmp_type(const void *e1, const void *e2) 149 { 150 return (strcmp(e1, *(const char * const *)e2)); 151 } 152 153 int 154 lexi(struct parser_state *state) 155 { 156 int unary_delim; /* this is set to 1 if the current token 157 * forces a following operator to be unary */ 158 int code; /* internal code to be returned */ 159 char qchar; /* the delimiter character for a string */ 160 161 e_token = s_token; /* point to start of place to save token */ 162 unary_delim = false; 163 state->col_1 = state->last_nl; /* tell world that this token started 164 * in column 1 iff the last thing 165 * scanned was a newline */ 166 state->last_nl = false; 167 168 while (*buf_ptr == ' ' || *buf_ptr == '\t') { /* get rid of blanks */ 169 state->col_1 = false; /* leading blanks imply token is not in column 170 * 1 */ 171 if (++buf_ptr >= buf_end) 172 fill_buffer(); 173 } 174 175 /* Scan an alphanumeric token */ 176 if (chartype[*buf_ptr & 127] == alphanum || 177 (buf_ptr[0] == '.' && isdigit((unsigned char)buf_ptr[1]))) { 178 /* 179 * we have a character or number 180 */ 181 struct templ *p; 182 183 if (isdigit((unsigned char)*buf_ptr) || 184 (buf_ptr[0] == '.' && isdigit((unsigned char)buf_ptr[1]))) { 185 enum base { 186 BASE_2, BASE_8, BASE_10, BASE_16 187 }; 188 int seendot = 0, 189 seenexp = 0, 190 seensfx = 0; 191 enum base in_base = BASE_10; 192 193 if (*buf_ptr == '0') { 194 if (buf_ptr[1] == 'b' || buf_ptr[1] == 'B') 195 in_base = BASE_2; 196 else if (buf_ptr[1] == 'x' || buf_ptr[1] == 'X') 197 in_base = BASE_16; 198 else if (isdigit((unsigned char)buf_ptr[1])) 199 in_base = BASE_8; 200 } 201 switch (in_base) { 202 case BASE_2: 203 *e_token++ = *buf_ptr++; 204 *e_token++ = *buf_ptr++; 205 while (*buf_ptr == '0' || *buf_ptr == '1') { 206 CHECK_SIZE_TOKEN; 207 *e_token++ = *buf_ptr++; 208 } 209 break; 210 case BASE_8: 211 *e_token++ = *buf_ptr++; 212 while (*buf_ptr >= '0' && *buf_ptr <= '8') { 213 CHECK_SIZE_TOKEN; 214 *e_token++ = *buf_ptr++; 215 } 216 break; 217 case BASE_16: 218 *e_token++ = *buf_ptr++; 219 *e_token++ = *buf_ptr++; 220 while (isxdigit((unsigned char)*buf_ptr)) { 221 CHECK_SIZE_TOKEN; 222 *e_token++ = *buf_ptr++; 223 } 224 break; 225 case BASE_10: 226 while (1) { 227 if (*buf_ptr == '.') { 228 if (seendot) 229 break; 230 else 231 seendot++; 232 } 233 CHECK_SIZE_TOKEN; 234 *e_token++ = *buf_ptr++; 235 if (!isdigit((unsigned char)*buf_ptr) && *buf_ptr != '.') { 236 if ((*buf_ptr != 'E' && *buf_ptr != 'e') || seenexp) 237 break; 238 else { 239 seenexp++; 240 seendot++; 241 CHECK_SIZE_TOKEN; 242 *e_token++ = *buf_ptr++; 243 if (*buf_ptr == '+' || *buf_ptr == '-') 244 *e_token++ = *buf_ptr++; 245 } 246 } 247 } 248 break; 249 } 250 while (1) { 251 if (!(seensfx & 1) && (*buf_ptr == 'U' || *buf_ptr == 'u')) { 252 CHECK_SIZE_TOKEN; 253 *e_token++ = *buf_ptr++; 254 seensfx |= 1; 255 continue; 256 } 257 if (!(seensfx & 2) && (strchr("fFlL", *buf_ptr) != NULL)) { 258 CHECK_SIZE_TOKEN; 259 if (buf_ptr[1] == buf_ptr[0]) 260 *e_token++ = *buf_ptr++; 261 *e_token++ = *buf_ptr++; 262 seensfx |= 2; 263 continue; 264 } 265 break; 266 } 267 } 268 else 269 while (chartype[*buf_ptr & 127] == alphanum || *buf_ptr == BACKSLASH) { 270 /* fill_buffer() terminates buffer with newline */ 271 if (*buf_ptr == BACKSLASH) { 272 if (*(buf_ptr + 1) == '\n') { 273 buf_ptr += 2; 274 if (buf_ptr >= buf_end) 275 fill_buffer(); 276 } else 277 break; 278 } 279 CHECK_SIZE_TOKEN; 280 /* copy it over */ 281 *e_token++ = *buf_ptr++; 282 if (buf_ptr >= buf_end) 283 fill_buffer(); 284 } 285 *e_token++ = '\0'; 286 287 if (s_token[0] == 'L' && s_token[1] == '\0' && 288 (*buf_ptr == '"' || *buf_ptr == '\'')) 289 return (strpfx); 290 291 while (*buf_ptr == ' ' || *buf_ptr == '\t') { /* get rid of blanks */ 292 if (++buf_ptr >= buf_end) 293 fill_buffer(); 294 } 295 state->keyword = 0; 296 if (state->last_token == structure && !state->p_l_follow) { 297 /* if last token was 'struct' and we're not 298 * in parentheses, then this token 299 * should be treated as a declaration */ 300 state->last_u_d = true; 301 return (decl); 302 } 303 /* 304 * Operator after identifier is binary unless last token was 'struct' 305 */ 306 state->last_u_d = (state->last_token == structure); 307 308 p = bsearch(s_token, 309 specials, 310 sizeof(specials) / sizeof(specials[0]), 311 sizeof(specials[0]), 312 strcmp_type); 313 if (p == NULL) { /* not a special keyword... */ 314 char *u; 315 316 /* ... so maybe a type_t or a typedef */ 317 if ((auto_typedefs && ((u = strrchr(s_token, '_')) != NULL) && 318 strcmp(u, "_t") == 0) || (typename_top >= 0 && 319 bsearch(s_token, typenames, typename_top + 1, 320 sizeof(typenames[0]), strcmp_type))) { 321 state->keyword = 4; /* a type name */ 322 state->last_u_d = true; 323 goto found_typename; 324 } 325 } else { /* we have a keyword */ 326 state->keyword = p->rwcode; 327 state->last_u_d = true; 328 switch (p->rwcode) { 329 case 7: /* it is a switch */ 330 return (swstmt); 331 case 8: /* a case or default */ 332 return (casestmt); 333 334 case 3: /* a "struct" */ 335 /* FALLTHROUGH */ 336 case 4: /* one of the declaration keywords */ 337 found_typename: 338 if (state->p_l_follow) { 339 /* inside parens: cast, param list, offsetof or sizeof */ 340 state->cast_mask |= (1 << state->p_l_follow) & ~state->not_cast_mask; 341 } 342 if (p != NULL && p->rwcode == 3) 343 return (structure); 344 if (state->p_l_follow) 345 break; 346 return (decl); 347 348 case 5: /* if, while, for */ 349 return (sp_paren); 350 351 case 6: /* do, else */ 352 return (sp_nparen); 353 354 case 10: /* storage class specifier */ 355 return (storage); 356 357 case 11: /* typedef */ 358 return (type_def); 359 360 default: /* all others are treated like any other 361 * identifier */ 362 return (ident); 363 } /* end of switch */ 364 } /* end of if (found_it) */ 365 if (*buf_ptr == '(' && state->tos <= 1 && state->ind_level == 0 && 366 state->in_parameter_declaration == 0 && state->block_init == 0) { 367 char *tp = buf_ptr; 368 while (tp < buf_end) 369 if (*tp++ == ')' && (*tp == ';' || *tp == ',')) 370 goto not_proc; 371 strncpy(state->procname, token, sizeof state->procname - 1); 372 if (state->in_decl) 373 state->in_parameter_declaration = 1; 374 return (funcname); 375 not_proc:; 376 } 377 /* 378 * The following hack attempts to guess whether or not the current 379 * token is in fact a declaration keyword -- one that has been 380 * typedefd 381 */ 382 else if (!state->p_l_follow && !state->block_init && 383 !state->in_stmt && 384 ((*buf_ptr == '*' && buf_ptr[1] != '=') || 385 isalpha((unsigned char)*buf_ptr)) && 386 (state->last_token == semicolon || state->last_token == lbrace || 387 state->last_token == rbrace)) { 388 state->keyword = 4; /* a type name */ 389 state->last_u_d = true; 390 return decl; 391 } 392 if (state->last_token == decl) /* if this is a declared variable, 393 * then following sign is unary */ 394 state->last_u_d = true; /* will make "int a -1" work */ 395 return (ident); /* the ident is not in the list */ 396 } /* end of procesing for alpanum character */ 397 398 /* Scan a non-alphanumeric token */ 399 400 *e_token++ = *buf_ptr; /* if it is only a one-character token, it is 401 * moved here */ 402 *e_token = '\0'; 403 if (++buf_ptr >= buf_end) 404 fill_buffer(); 405 406 switch (*token) { 407 case '\n': 408 unary_delim = state->last_u_d; 409 state->last_nl = true; /* remember that we just had a newline */ 410 code = (had_eof ? 0 : newline); 411 412 /* 413 * if data has been exhausted, the newline is a dummy, and we should 414 * return code to stop 415 */ 416 break; 417 418 case '\'': /* start of quoted character */ 419 case '"': /* start of string */ 420 qchar = *token; 421 if (troff) { 422 e_token[-1] = '`'; 423 if (qchar == '"') 424 *e_token++ = '`'; 425 e_token = chfont(&bodyf, &stringf, e_token); 426 } 427 do { /* copy the string */ 428 while (1) { /* move one character or [/<char>]<char> */ 429 if (*buf_ptr == '\n') { 430 diag2(1, "Unterminated literal"); 431 goto stop_lit; 432 } 433 CHECK_SIZE_TOKEN; /* Only have to do this once in this loop, 434 * since CHECK_SIZE guarantees that there 435 * are at least 5 entries left */ 436 *e_token = *buf_ptr++; 437 if (buf_ptr >= buf_end) 438 fill_buffer(); 439 if (*e_token == BACKSLASH) { /* if escape, copy extra char */ 440 if (*buf_ptr == '\n') /* check for escaped newline */ 441 ++line_no; 442 if (troff) { 443 *++e_token = BACKSLASH; 444 if (*buf_ptr == BACKSLASH) 445 *++e_token = BACKSLASH; 446 } 447 *++e_token = *buf_ptr++; 448 ++e_token; /* we must increment this again because we 449 * copied two chars */ 450 if (buf_ptr >= buf_end) 451 fill_buffer(); 452 } 453 else 454 break; /* we copied one character */ 455 } /* end of while (1) */ 456 } while (*e_token++ != qchar); 457 if (troff) { 458 e_token = chfont(&stringf, &bodyf, e_token - 1); 459 if (qchar == '"') 460 *e_token++ = '\''; 461 } 462 stop_lit: 463 code = ident; 464 break; 465 466 case ('('): 467 case ('['): 468 unary_delim = true; 469 code = lparen; 470 break; 471 472 case (')'): 473 case (']'): 474 code = rparen; 475 break; 476 477 case '#': 478 unary_delim = state->last_u_d; 479 code = preesc; 480 break; 481 482 case '?': 483 unary_delim = true; 484 code = question; 485 break; 486 487 case (':'): 488 code = colon; 489 unary_delim = true; 490 break; 491 492 case (';'): 493 unary_delim = true; 494 code = semicolon; 495 break; 496 497 case ('{'): 498 unary_delim = true; 499 500 /* 501 * if (state->in_or_st) state->block_init = 1; 502 */ 503 /* ? code = state->block_init ? lparen : lbrace; */ 504 code = lbrace; 505 break; 506 507 case ('}'): 508 unary_delim = true; 509 /* ? code = state->block_init ? rparen : rbrace; */ 510 code = rbrace; 511 break; 512 513 case 014: /* a form feed */ 514 unary_delim = state->last_u_d; 515 state->last_nl = true; /* remember this so we can set 'state->col_1' 516 * right */ 517 code = form_feed; 518 break; 519 520 case (','): 521 unary_delim = true; 522 code = comma; 523 break; 524 525 case '.': 526 unary_delim = false; 527 code = period; 528 break; 529 530 case '-': 531 case '+': /* check for -, +, --, ++ */ 532 code = (state->last_u_d ? unary_op : binary_op); 533 unary_delim = true; 534 535 if (*buf_ptr == token[0]) { 536 /* check for doubled character */ 537 *e_token++ = *buf_ptr++; 538 /* buffer overflow will be checked at end of loop */ 539 if (state->last_token == ident || state->last_token == rparen) { 540 code = (state->last_u_d ? unary_op : postop); 541 /* check for following ++ or -- */ 542 unary_delim = false; 543 } 544 } 545 else if (*buf_ptr == '=') 546 /* check for operator += */ 547 *e_token++ = *buf_ptr++; 548 else if (*buf_ptr == '>') { 549 /* check for operator -> */ 550 *e_token++ = *buf_ptr++; 551 unary_delim = false; 552 code = unary_op; 553 state->want_blank = false; 554 } 555 break; /* buffer overflow will be checked at end of 556 * switch */ 557 558 case '=': 559 if (state->in_or_st) 560 state->block_init = 1; 561 #ifdef undef 562 if (chartype[*buf_ptr & 127] == opchar) { /* we have two char assignment */ 563 e_token[-1] = *buf_ptr++; 564 if ((e_token[-1] == '<' || e_token[-1] == '>') && e_token[-1] == *buf_ptr) 565 *e_token++ = *buf_ptr++; 566 *e_token++ = '='; /* Flip =+ to += */ 567 *e_token = 0; 568 } 569 #else 570 if (*buf_ptr == '=') {/* == */ 571 *e_token++ = '='; /* Flip =+ to += */ 572 buf_ptr++; 573 *e_token = 0; 574 } 575 #endif 576 code = binary_op; 577 unary_delim = true; 578 break; 579 /* can drop thru!!! */ 580 581 case '>': 582 case '<': 583 case '!': /* ops like <, <<, <=, !=, etc */ 584 if (*buf_ptr == '>' || *buf_ptr == '<' || *buf_ptr == '=') { 585 *e_token++ = *buf_ptr; 586 if (++buf_ptr >= buf_end) 587 fill_buffer(); 588 } 589 if (*buf_ptr == '=') 590 *e_token++ = *buf_ptr++; 591 code = (state->last_u_d ? unary_op : binary_op); 592 unary_delim = true; 593 break; 594 595 case '*': 596 unary_delim = true; 597 if (!state->last_u_d) { 598 if (*buf_ptr == '=') 599 *e_token++ = *buf_ptr++; 600 code = binary_op; 601 break; 602 } 603 while (*buf_ptr == '*' || isspace((unsigned char)*buf_ptr)) { 604 if (*buf_ptr == '*') 605 *e_token++ = *buf_ptr; 606 if (++buf_ptr >= buf_end) 607 fill_buffer(); 608 } 609 if (ps.in_decl) { 610 char *tp = buf_ptr; 611 612 while (isalpha((unsigned char)*tp) || 613 isspace((unsigned char)*tp)) { 614 if (++tp >= buf_end) 615 fill_buffer(); 616 } 617 if (*tp == '(') 618 ps.procname[0] = ' '; 619 } 620 code = unary_op; 621 break; 622 623 default: 624 if (token[0] == '/' && *buf_ptr == '*') { 625 /* it is start of comment */ 626 *e_token++ = '*'; 627 628 if (++buf_ptr >= buf_end) 629 fill_buffer(); 630 631 code = comment; 632 unary_delim = state->last_u_d; 633 break; 634 } 635 while (*(e_token - 1) == *buf_ptr || *buf_ptr == '=') { 636 /* 637 * handle ||, &&, etc, and also things as in int *****i 638 */ 639 *e_token++ = *buf_ptr; 640 if (++buf_ptr >= buf_end) 641 fill_buffer(); 642 } 643 code = (state->last_u_d ? unary_op : binary_op); 644 unary_delim = true; 645 646 647 } /* end of switch */ 648 if (buf_ptr >= buf_end) /* check for input buffer empty */ 649 fill_buffer(); 650 state->last_u_d = unary_delim; 651 *e_token = '\0'; /* null terminate the token */ 652 return (code); 653 } 654 655 void 656 alloc_typenames(void) 657 { 658 659 typenames = (const char **)malloc(sizeof(typenames[0]) * 660 (typename_count = 16)); 661 if (typenames == NULL) 662 err(1, NULL); 663 } 664 665 void 666 add_typename(const char *key) 667 { 668 int comparison; 669 const char *copy; 670 671 if (typename_top + 1 >= typename_count) { 672 typenames = realloc((void *)typenames, 673 sizeof(typenames[0]) * (typename_count *= 2)); 674 if (typenames == NULL) 675 err(1, NULL); 676 } 677 if (typename_top == -1) 678 typenames[++typename_top] = copy = strdup(key); 679 else if ((comparison = strcmp(key, typenames[typename_top])) >= 0) { 680 /* take advantage of sorted input */ 681 if (comparison == 0) /* remove duplicates */ 682 return; 683 typenames[++typename_top] = copy = strdup(key); 684 } 685 else { 686 int p; 687 688 for (p = 0; (comparison = strcmp(key, typenames[p])) > 0; p++) 689 /* find place for the new key */; 690 if (comparison == 0) /* remove duplicates */ 691 return; 692 memmove(&typenames[p + 1], &typenames[p], 693 sizeof(typenames[0]) * (++typename_top - p)); 694 typenames[p] = copy = strdup(key); 695 } 696 697 if (copy == NULL) 698 err(1, NULL); 699 } 700