1 /*- 2 * SPDX-License-Identifier: BSD-4-Clause 3 * 4 * Copyright (c) 1985 Sun Microsystems, Inc. 5 * Copyright (c) 1980, 1993 6 * The Regents of the University of California. All rights reserved. 7 * All rights reserved. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 3. All advertising materials mentioning features or use of this software 18 * must display the following acknowledgement: 19 * This product includes software developed by the University of 20 * California, Berkeley and its contributors. 21 * 4. Neither the name of the University nor the names of its contributors 22 * may be used to endorse or promote products derived from this software 23 * without specific prior written permission. 24 * 25 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 26 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 27 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 28 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 29 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 30 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 31 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 32 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 33 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 34 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 35 * SUCH DAMAGE. 36 */ 37 38 #if 0 39 #ifndef lint 40 static char sccsid[] = "@(#)lexi.c 8.1 (Berkeley) 6/6/93"; 41 #endif /* not lint */ 42 #endif 43 #include <sys/cdefs.h> 44 __FBSDID("$FreeBSD$"); 45 46 /* 47 * Here we have the token scanner for indent. It scans off one token and puts 48 * it in the global variable "token". It returns a code, indicating the type 49 * of token scanned. 50 */ 51 52 #include <err.h> 53 #include <stdio.h> 54 #include <ctype.h> 55 #include <stdlib.h> 56 #include <string.h> 57 #include "indent_globs.h" 58 #include "indent_codes.h" 59 #include "indent.h" 60 61 #define alphanum 1 62 #ifdef undef 63 #define opchar 3 64 #endif 65 66 struct templ { 67 const char *rwd; 68 int rwcode; 69 }; 70 71 /* 72 * This table has to be sorted alphabetically, because it'll be used in binary 73 * search. For the same reason, string must be the first thing in struct templ. 74 */ 75 struct templ specials[] = 76 { 77 {"auto", 10}, 78 {"break", 9}, 79 {"case", 8}, 80 {"char", 4}, 81 {"const", 4}, 82 {"default", 8}, 83 {"do", 6}, 84 {"double", 4}, 85 {"else", 6}, 86 {"enum", 3}, 87 {"extern", 10}, 88 {"float", 4}, 89 {"for", 5}, 90 {"global", 4}, 91 {"goto", 9}, 92 {"if", 5}, 93 {"int", 4}, 94 {"long", 4}, 95 {"offsetof", 1}, 96 {"register", 10}, 97 {"return", 9}, 98 {"short", 4}, 99 {"sizeof", 2}, 100 {"static", 10}, 101 {"struct", 3}, 102 {"switch", 7}, 103 {"typedef", 11}, 104 {"union", 3}, 105 {"unsigned", 4}, 106 {"void", 4}, 107 {"volatile", 4}, 108 {"while", 5} 109 }; 110 111 const char **typenames; 112 int typename_count; 113 int typename_top = -1; 114 115 char chartype[128] = 116 { /* this is used to facilitate the decision of 117 * what type (alphanumeric, operator) each 118 * character is */ 119 0, 0, 0, 0, 0, 0, 0, 0, 120 0, 0, 0, 0, 0, 0, 0, 0, 121 0, 0, 0, 0, 0, 0, 0, 0, 122 0, 0, 0, 0, 0, 0, 0, 0, 123 0, 3, 0, 0, 1, 3, 3, 0, 124 0, 0, 3, 3, 0, 3, 0, 3, 125 1, 1, 1, 1, 1, 1, 1, 1, 126 1, 1, 0, 0, 3, 3, 3, 3, 127 0, 1, 1, 1, 1, 1, 1, 1, 128 1, 1, 1, 1, 1, 1, 1, 1, 129 1, 1, 1, 1, 1, 1, 1, 1, 130 1, 1, 1, 0, 0, 0, 3, 1, 131 0, 1, 1, 1, 1, 1, 1, 1, 132 1, 1, 1, 1, 1, 1, 1, 1, 133 1, 1, 1, 1, 1, 1, 1, 1, 134 1, 1, 1, 0, 3, 0, 3, 0 135 }; 136 137 static int 138 strcmp_type(const void *e1, const void *e2) 139 { 140 return (strcmp(e1, *(const char * const *)e2)); 141 } 142 143 int 144 lexi(struct parser_state *state) 145 { 146 int unary_delim; /* this is set to 1 if the current token 147 * forces a following operator to be unary */ 148 int code; /* internal code to be returned */ 149 char qchar; /* the delimiter character for a string */ 150 151 e_token = s_token; /* point to start of place to save token */ 152 unary_delim = false; 153 state->col_1 = state->last_nl; /* tell world that this token started 154 * in column 1 iff the last thing 155 * scanned was a newline */ 156 state->last_nl = false; 157 158 while (*buf_ptr == ' ' || *buf_ptr == '\t') { /* get rid of blanks */ 159 state->col_1 = false; /* leading blanks imply token is not in column 160 * 1 */ 161 if (++buf_ptr >= buf_end) 162 fill_buffer(); 163 } 164 165 /* Scan an alphanumeric token */ 166 if (chartype[(int)*buf_ptr] == alphanum || (buf_ptr[0] == '.' && isdigit(buf_ptr[1]))) { 167 /* 168 * we have a character or number 169 */ 170 struct templ *p; 171 172 if (isdigit(*buf_ptr) || (buf_ptr[0] == '.' && isdigit(buf_ptr[1]))) { 173 enum base { 174 BASE_2, BASE_8, BASE_10, BASE_16 175 }; 176 int seendot = 0, 177 seenexp = 0, 178 seensfx = 0; 179 enum base in_base = BASE_10; 180 181 if (*buf_ptr == '0') { 182 if (buf_ptr[1] == 'b' || buf_ptr[1] == 'B') 183 in_base = BASE_2; 184 else if (buf_ptr[1] == 'x' || buf_ptr[1] == 'X') 185 in_base = BASE_16; 186 else if (isdigit(buf_ptr[1])) 187 in_base = BASE_8; 188 } 189 switch (in_base) { 190 case BASE_2: 191 *e_token++ = *buf_ptr++; 192 *e_token++ = *buf_ptr++; 193 while (*buf_ptr == '0' || *buf_ptr == '1') { 194 CHECK_SIZE_TOKEN; 195 *e_token++ = *buf_ptr++; 196 } 197 break; 198 case BASE_8: 199 *e_token++ = *buf_ptr++; 200 while (*buf_ptr >= '0' && *buf_ptr <= '8') { 201 CHECK_SIZE_TOKEN; 202 *e_token++ = *buf_ptr++; 203 } 204 break; 205 case BASE_16: 206 *e_token++ = *buf_ptr++; 207 *e_token++ = *buf_ptr++; 208 while (isxdigit(*buf_ptr)) { 209 CHECK_SIZE_TOKEN; 210 *e_token++ = *buf_ptr++; 211 } 212 break; 213 case BASE_10: 214 while (1) { 215 if (*buf_ptr == '.') { 216 if (seendot) 217 break; 218 else 219 seendot++; 220 } 221 CHECK_SIZE_TOKEN; 222 *e_token++ = *buf_ptr++; 223 if (!isdigit(*buf_ptr) && *buf_ptr != '.') { 224 if ((*buf_ptr != 'E' && *buf_ptr != 'e') || seenexp) 225 break; 226 else { 227 seenexp++; 228 seendot++; 229 CHECK_SIZE_TOKEN; 230 *e_token++ = *buf_ptr++; 231 if (*buf_ptr == '+' || *buf_ptr == '-') 232 *e_token++ = *buf_ptr++; 233 } 234 } 235 } 236 break; 237 } 238 while (1) { 239 if (!(seensfx & 1) && (*buf_ptr == 'U' || *buf_ptr == 'u')) { 240 CHECK_SIZE_TOKEN; 241 *e_token++ = *buf_ptr++; 242 seensfx |= 1; 243 continue; 244 } 245 if (!(seensfx & 2) && (strchr("fFlL", *buf_ptr) != NULL)) { 246 CHECK_SIZE_TOKEN; 247 if (buf_ptr[1] == buf_ptr[0]) 248 *e_token++ = *buf_ptr++; 249 *e_token++ = *buf_ptr++; 250 seensfx |= 2; 251 continue; 252 } 253 break; 254 } 255 } 256 else 257 while (chartype[(int)*buf_ptr] == alphanum || *buf_ptr == BACKSLASH) { 258 /* fill_buffer() terminates buffer with newline */ 259 if (*buf_ptr == BACKSLASH) { 260 if (*(buf_ptr + 1) == '\n') { 261 buf_ptr += 2; 262 if (buf_ptr >= buf_end) 263 fill_buffer(); 264 } else 265 break; 266 } 267 CHECK_SIZE_TOKEN; 268 /* copy it over */ 269 *e_token++ = *buf_ptr++; 270 if (buf_ptr >= buf_end) 271 fill_buffer(); 272 } 273 *e_token++ = '\0'; 274 275 if (s_token[0] == 'L' && s_token[1] == '\0' && 276 (*buf_ptr == '"' || *buf_ptr == '\'')) 277 return (strpfx); 278 279 while (*buf_ptr == ' ' || *buf_ptr == '\t') { /* get rid of blanks */ 280 if (++buf_ptr >= buf_end) 281 fill_buffer(); 282 } 283 state->keyword = 0; 284 if (state->last_token == structure && !state->p_l_follow) { 285 /* if last token was 'struct' and we're not 286 * in parentheses, then this token 287 * should be treated as a declaration */ 288 state->last_u_d = true; 289 return (decl); 290 } 291 /* 292 * Operator after identifier is binary unless last token was 'struct' 293 */ 294 state->last_u_d = (state->last_token == structure); 295 296 p = bsearch(s_token, 297 specials, 298 sizeof(specials) / sizeof(specials[0]), 299 sizeof(specials[0]), 300 strcmp_type); 301 if (p == NULL) { /* not a special keyword... */ 302 char *u; 303 304 /* ... so maybe a type_t or a typedef */ 305 if ((auto_typedefs && ((u = strrchr(s_token, '_')) != NULL) && 306 strcmp(u, "_t") == 0) || (typename_top >= 0 && 307 bsearch(s_token, typenames, typename_top + 1, 308 sizeof(typenames[0]), strcmp_type))) { 309 state->keyword = 4; /* a type name */ 310 state->last_u_d = true; 311 goto found_typename; 312 } 313 } else { /* we have a keyword */ 314 state->keyword = p->rwcode; 315 state->last_u_d = true; 316 switch (p->rwcode) { 317 case 7: /* it is a switch */ 318 return (swstmt); 319 case 8: /* a case or default */ 320 return (casestmt); 321 322 case 3: /* a "struct" */ 323 /* FALLTHROUGH */ 324 case 4: /* one of the declaration keywords */ 325 found_typename: 326 if (state->p_l_follow) { 327 /* inside parens: cast, param list, offsetof or sizeof */ 328 state->cast_mask |= (1 << state->p_l_follow) & ~state->not_cast_mask; 329 } 330 if (p != NULL && p->rwcode == 3) 331 return (structure); 332 if (state->p_l_follow) 333 break; 334 return (decl); 335 336 case 5: /* if, while, for */ 337 return (sp_paren); 338 339 case 6: /* do, else */ 340 return (sp_nparen); 341 342 case 10: /* storage class specifier */ 343 return (storage); 344 345 case 11: /* typedef */ 346 return (type_def); 347 348 default: /* all others are treated like any other 349 * identifier */ 350 return (ident); 351 } /* end of switch */ 352 } /* end of if (found_it) */ 353 if (*buf_ptr == '(' && state->tos <= 1 && state->ind_level == 0 && 354 state->in_parameter_declaration == 0 && state->block_init == 0) { 355 char *tp = buf_ptr; 356 while (tp < buf_end) 357 if (*tp++ == ')' && (*tp == ';' || *tp == ',')) 358 goto not_proc; 359 strncpy(state->procname, token, sizeof state->procname - 1); 360 if (state->in_decl) 361 state->in_parameter_declaration = 1; 362 return (funcname); 363 not_proc:; 364 } 365 /* 366 * The following hack attempts to guess whether or not the current 367 * token is in fact a declaration keyword -- one that has been 368 * typedefd 369 */ 370 if (((*buf_ptr == '*' && buf_ptr[1] != '=') || isalpha(*buf_ptr) || *buf_ptr == '_') 371 && !state->p_l_follow 372 && !state->block_init 373 && (state->last_token == rparen || state->last_token == semicolon || 374 state->last_token == decl || 375 state->last_token == lbrace || state->last_token == rbrace)) { 376 state->keyword = 4; /* a type name */ 377 state->last_u_d = true; 378 return decl; 379 } 380 if (state->last_token == decl) /* if this is a declared variable, 381 * then following sign is unary */ 382 state->last_u_d = true; /* will make "int a -1" work */ 383 return (ident); /* the ident is not in the list */ 384 } /* end of procesing for alpanum character */ 385 386 /* Scan a non-alphanumeric token */ 387 388 *e_token++ = *buf_ptr; /* if it is only a one-character token, it is 389 * moved here */ 390 *e_token = '\0'; 391 if (++buf_ptr >= buf_end) 392 fill_buffer(); 393 394 switch (*token) { 395 case '\n': 396 unary_delim = state->last_u_d; 397 state->last_nl = true; /* remember that we just had a newline */ 398 code = (had_eof ? 0 : newline); 399 400 /* 401 * if data has been exhausted, the newline is a dummy, and we should 402 * return code to stop 403 */ 404 break; 405 406 case '\'': /* start of quoted character */ 407 case '"': /* start of string */ 408 qchar = *token; 409 if (troff) { 410 e_token[-1] = '`'; 411 if (qchar == '"') 412 *e_token++ = '`'; 413 e_token = chfont(&bodyf, &stringf, e_token); 414 } 415 do { /* copy the string */ 416 while (1) { /* move one character or [/<char>]<char> */ 417 if (*buf_ptr == '\n') { 418 diag2(1, "Unterminated literal"); 419 goto stop_lit; 420 } 421 CHECK_SIZE_TOKEN; /* Only have to do this once in this loop, 422 * since CHECK_SIZE guarantees that there 423 * are at least 5 entries left */ 424 *e_token = *buf_ptr++; 425 if (buf_ptr >= buf_end) 426 fill_buffer(); 427 if (*e_token == BACKSLASH) { /* if escape, copy extra char */ 428 if (*buf_ptr == '\n') /* check for escaped newline */ 429 ++line_no; 430 if (troff) { 431 *++e_token = BACKSLASH; 432 if (*buf_ptr == BACKSLASH) 433 *++e_token = BACKSLASH; 434 } 435 *++e_token = *buf_ptr++; 436 ++e_token; /* we must increment this again because we 437 * copied two chars */ 438 if (buf_ptr >= buf_end) 439 fill_buffer(); 440 } 441 else 442 break; /* we copied one character */ 443 } /* end of while (1) */ 444 } while (*e_token++ != qchar); 445 if (troff) { 446 e_token = chfont(&stringf, &bodyf, e_token - 1); 447 if (qchar == '"') 448 *e_token++ = '\''; 449 } 450 stop_lit: 451 code = ident; 452 break; 453 454 case ('('): 455 case ('['): 456 unary_delim = true; 457 code = lparen; 458 break; 459 460 case (')'): 461 case (']'): 462 code = rparen; 463 break; 464 465 case '#': 466 unary_delim = state->last_u_d; 467 code = preesc; 468 break; 469 470 case '?': 471 unary_delim = true; 472 code = question; 473 break; 474 475 case (':'): 476 code = colon; 477 unary_delim = true; 478 break; 479 480 case (';'): 481 unary_delim = true; 482 code = semicolon; 483 break; 484 485 case ('{'): 486 unary_delim = true; 487 488 /* 489 * if (state->in_or_st) state->block_init = 1; 490 */ 491 /* ? code = state->block_init ? lparen : lbrace; */ 492 code = lbrace; 493 break; 494 495 case ('}'): 496 unary_delim = true; 497 /* ? code = state->block_init ? rparen : rbrace; */ 498 code = rbrace; 499 break; 500 501 case 014: /* a form feed */ 502 unary_delim = state->last_u_d; 503 state->last_nl = true; /* remember this so we can set 'state->col_1' 504 * right */ 505 code = form_feed; 506 break; 507 508 case (','): 509 unary_delim = true; 510 code = comma; 511 break; 512 513 case '.': 514 unary_delim = false; 515 code = period; 516 break; 517 518 case '-': 519 case '+': /* check for -, +, --, ++ */ 520 code = (state->last_u_d ? unary_op : binary_op); 521 unary_delim = true; 522 523 if (*buf_ptr == token[0]) { 524 /* check for doubled character */ 525 *e_token++ = *buf_ptr++; 526 /* buffer overflow will be checked at end of loop */ 527 if (state->last_token == ident || state->last_token == rparen) { 528 code = (state->last_u_d ? unary_op : postop); 529 /* check for following ++ or -- */ 530 unary_delim = false; 531 } 532 } 533 else if (*buf_ptr == '=') 534 /* check for operator += */ 535 *e_token++ = *buf_ptr++; 536 else if (*buf_ptr == '>') { 537 /* check for operator -> */ 538 *e_token++ = *buf_ptr++; 539 unary_delim = false; 540 code = unary_op; 541 state->want_blank = false; 542 } 543 break; /* buffer overflow will be checked at end of 544 * switch */ 545 546 case '=': 547 if (state->in_or_st) 548 state->block_init = 1; 549 #ifdef undef 550 if (chartype[*buf_ptr] == opchar) { /* we have two char assignment */ 551 e_token[-1] = *buf_ptr++; 552 if ((e_token[-1] == '<' || e_token[-1] == '>') && e_token[-1] == *buf_ptr) 553 *e_token++ = *buf_ptr++; 554 *e_token++ = '='; /* Flip =+ to += */ 555 *e_token = 0; 556 } 557 #else 558 if (*buf_ptr == '=') {/* == */ 559 *e_token++ = '='; /* Flip =+ to += */ 560 buf_ptr++; 561 *e_token = 0; 562 } 563 #endif 564 code = binary_op; 565 unary_delim = true; 566 break; 567 /* can drop thru!!! */ 568 569 case '>': 570 case '<': 571 case '!': /* ops like <, <<, <=, !=, etc */ 572 if (*buf_ptr == '>' || *buf_ptr == '<' || *buf_ptr == '=') { 573 *e_token++ = *buf_ptr; 574 if (++buf_ptr >= buf_end) 575 fill_buffer(); 576 } 577 if (*buf_ptr == '=') 578 *e_token++ = *buf_ptr++; 579 code = (state->last_u_d ? unary_op : binary_op); 580 unary_delim = true; 581 break; 582 583 default: 584 if (token[0] == '/' && *buf_ptr == '*') { 585 /* it is start of comment */ 586 *e_token++ = '*'; 587 588 if (++buf_ptr >= buf_end) 589 fill_buffer(); 590 591 code = comment; 592 unary_delim = state->last_u_d; 593 break; 594 } 595 while (*(e_token - 1) == *buf_ptr || *buf_ptr == '=') { 596 /* 597 * handle ||, &&, etc, and also things as in int *****i 598 */ 599 *e_token++ = *buf_ptr; 600 if (++buf_ptr >= buf_end) 601 fill_buffer(); 602 } 603 code = (state->last_u_d ? unary_op : binary_op); 604 unary_delim = true; 605 606 607 } /* end of switch */ 608 if (buf_ptr >= buf_end) /* check for input buffer empty */ 609 fill_buffer(); 610 state->last_u_d = unary_delim; 611 *e_token = '\0'; /* null terminate the token */ 612 return (code); 613 } 614 615 void 616 alloc_typenames(void) 617 { 618 619 typenames = (const char **)malloc(sizeof(typenames[0]) * 620 (typename_count = 16)); 621 if (typenames == NULL) 622 err(1, NULL); 623 } 624 625 void 626 add_typename(const char *key) 627 { 628 int comparison; 629 const char *copy; 630 631 if (typename_top + 1 >= typename_count) { 632 typenames = realloc((void *)typenames, 633 sizeof(typenames[0]) * (typename_count *= 2)); 634 if (typenames == NULL) 635 err(1, NULL); 636 } 637 if (typename_top == -1) 638 typenames[++typename_top] = copy = strdup(key); 639 else if ((comparison = strcmp(key, typenames[typename_top])) >= 0) { 640 /* take advantage of sorted input */ 641 if (comparison == 0) /* remove duplicates */ 642 return; 643 typenames[++typename_top] = copy = strdup(key); 644 } 645 else { 646 int p; 647 648 for (p = 0; (comparison = strcmp(key, typenames[p])) > 0; p++) 649 /* find place for the new key */; 650 if (comparison == 0) /* remove duplicates */ 651 return; 652 memmove(&typenames[p + 1], &typenames[p], 653 sizeof(typenames[0]) * (++typename_top - p)); 654 typenames[p] = copy = strdup(key); 655 } 656 657 if (copy == NULL) 658 err(1, NULL); 659 } 660