1 /*- 2 * SPDX-License-Identifier: BSD-4-Clause 3 * 4 * Copyright (c) 1985 Sun Microsystems, Inc. 5 * Copyright (c) 1980, 1993 6 * The Regents of the University of California. All rights reserved. 7 * All rights reserved. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 3. All advertising materials mentioning features or use of this software 18 * must display the following acknowledgement: 19 * This product includes software developed by the University of 20 * California, Berkeley and its contributors. 21 * 4. Neither the name of the University nor the names of its contributors 22 * may be used to endorse or promote products derived from this software 23 * without specific prior written permission. 24 * 25 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 26 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 27 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 28 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 29 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 30 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 31 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 32 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 33 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 34 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 35 * SUCH DAMAGE. 36 */ 37 38 #if 0 39 #ifndef lint 40 static char sccsid[] = "@(#)lexi.c 8.1 (Berkeley) 6/6/93"; 41 #endif /* not lint */ 42 #endif 43 #include <sys/cdefs.h> 44 __FBSDID("$FreeBSD$"); 45 46 /* 47 * Here we have the token scanner for indent. It scans off one token and puts 48 * it in the global variable "token". It returns a code, indicating the type 49 * of token scanned. 50 */ 51 52 #include <err.h> 53 #include <stdio.h> 54 #include <ctype.h> 55 #include <stdlib.h> 56 #include <string.h> 57 #include "indent_globs.h" 58 #include "indent_codes.h" 59 #include "indent.h" 60 61 #define alphanum 1 62 #ifdef undef 63 #define opchar 3 64 #endif 65 66 struct templ { 67 const char *rwd; 68 int rwcode; 69 }; 70 71 /* 72 * This table has to be sorted alphabetically, because it'll be used in binary 73 * search. For the same reason, string must be the first thing in struct templ. 74 */ 75 struct templ specials[] = 76 { 77 {"auto", 10}, 78 {"break", 9}, 79 {"case", 8}, 80 {"char", 4}, 81 {"const", 4}, 82 {"default", 8}, 83 {"do", 6}, 84 {"double", 4}, 85 {"else", 6}, 86 {"enum", 3}, 87 {"extern", 10}, 88 {"float", 4}, 89 {"for", 5}, 90 {"global", 4}, 91 {"goto", 9}, 92 {"if", 5}, 93 {"int", 4}, 94 {"long", 4}, 95 {"offsetof", 1}, 96 {"register", 10}, 97 {"return", 9}, 98 {"short", 4}, 99 {"sizeof", 2}, 100 {"static", 10}, 101 {"struct", 3}, 102 {"switch", 7}, 103 {"typedef", 10}, 104 {"union", 3}, 105 {"unsigned", 4}, 106 {"void", 4}, 107 {"volatile", 4}, 108 {"while", 5} 109 }; 110 111 const char **typenames; 112 int typename_count; 113 int typename_top = -1; 114 115 char chartype[128] = 116 { /* this is used to facilitate the decision of 117 * what type (alphanumeric, operator) each 118 * character is */ 119 0, 0, 0, 0, 0, 0, 0, 0, 120 0, 0, 0, 0, 0, 0, 0, 0, 121 0, 0, 0, 0, 0, 0, 0, 0, 122 0, 0, 0, 0, 0, 0, 0, 0, 123 0, 3, 0, 0, 1, 3, 3, 0, 124 0, 0, 3, 3, 0, 3, 0, 3, 125 1, 1, 1, 1, 1, 1, 1, 1, 126 1, 1, 0, 0, 3, 3, 3, 3, 127 0, 1, 1, 1, 1, 1, 1, 1, 128 1, 1, 1, 1, 1, 1, 1, 1, 129 1, 1, 1, 1, 1, 1, 1, 1, 130 1, 1, 1, 0, 0, 0, 3, 1, 131 0, 1, 1, 1, 1, 1, 1, 1, 132 1, 1, 1, 1, 1, 1, 1, 1, 133 1, 1, 1, 1, 1, 1, 1, 1, 134 1, 1, 1, 0, 3, 0, 3, 0 135 }; 136 137 static int 138 strcmp_type(const void *e1, const void *e2) 139 { 140 return (strcmp(e1, *(const char * const *)e2)); 141 } 142 143 int 144 lexi(void) 145 { 146 int unary_delim; /* this is set to 1 if the current token 147 * forces a following operator to be unary */ 148 static int last_code; /* the last token type returned */ 149 static int l_struct; /* set to 1 if the last token was 'struct' */ 150 int code; /* internal code to be returned */ 151 char qchar; /* the delimiter character for a string */ 152 153 e_token = s_token; /* point to start of place to save token */ 154 unary_delim = false; 155 ps.col_1 = ps.last_nl; /* tell world that this token started in 156 * column 1 iff the last thing scanned was nl */ 157 ps.last_nl = false; 158 159 while (*buf_ptr == ' ' || *buf_ptr == '\t') { /* get rid of blanks */ 160 ps.col_1 = false; /* leading blanks imply token is not in column 161 * 1 */ 162 if (++buf_ptr >= buf_end) 163 fill_buffer(); 164 } 165 166 /* Scan an alphanumeric token */ 167 if (chartype[(int)*buf_ptr] == alphanum || (buf_ptr[0] == '.' && isdigit(buf_ptr[1]))) { 168 /* 169 * we have a character or number 170 */ 171 struct templ *p; 172 173 if (isdigit(*buf_ptr) || (buf_ptr[0] == '.' && isdigit(buf_ptr[1]))) { 174 enum base { 175 BASE_2, BASE_8, BASE_10, BASE_16 176 }; 177 int seendot = 0, 178 seenexp = 0, 179 seensfx = 0; 180 enum base in_base = BASE_10; 181 182 if (*buf_ptr == '0') { 183 if (buf_ptr[1] == 'b' || buf_ptr[1] == 'B') 184 in_base = BASE_2; 185 else if (buf_ptr[1] == 'x' || buf_ptr[1] == 'X') 186 in_base = BASE_16; 187 else if (isdigit(buf_ptr[1])) 188 in_base = BASE_8; 189 } 190 switch (in_base) { 191 case BASE_2: 192 *e_token++ = *buf_ptr++; 193 *e_token++ = *buf_ptr++; 194 while (*buf_ptr == '0' || *buf_ptr == '1') { 195 CHECK_SIZE_TOKEN; 196 *e_token++ = *buf_ptr++; 197 } 198 break; 199 case BASE_8: 200 *e_token++ = *buf_ptr++; 201 while (*buf_ptr >= '0' && *buf_ptr <= '8') { 202 CHECK_SIZE_TOKEN; 203 *e_token++ = *buf_ptr++; 204 } 205 break; 206 case BASE_16: 207 *e_token++ = *buf_ptr++; 208 *e_token++ = *buf_ptr++; 209 while (isxdigit(*buf_ptr)) { 210 CHECK_SIZE_TOKEN; 211 *e_token++ = *buf_ptr++; 212 } 213 break; 214 case BASE_10: 215 while (1) { 216 if (*buf_ptr == '.') { 217 if (seendot) 218 break; 219 else 220 seendot++; 221 } 222 CHECK_SIZE_TOKEN; 223 *e_token++ = *buf_ptr++; 224 if (!isdigit(*buf_ptr) && *buf_ptr != '.') { 225 if ((*buf_ptr != 'E' && *buf_ptr != 'e') || seenexp) 226 break; 227 else { 228 seenexp++; 229 seendot++; 230 CHECK_SIZE_TOKEN; 231 *e_token++ = *buf_ptr++; 232 if (*buf_ptr == '+' || *buf_ptr == '-') 233 *e_token++ = *buf_ptr++; 234 } 235 } 236 } 237 break; 238 } 239 while (1) { 240 if (!(seensfx & 1) && (*buf_ptr == 'U' || *buf_ptr == 'u')) { 241 CHECK_SIZE_TOKEN; 242 *e_token++ = *buf_ptr++; 243 seensfx |= 1; 244 continue; 245 } 246 if (!(seensfx & 2) && (strchr("fFlL", *buf_ptr) != NULL)) { 247 CHECK_SIZE_TOKEN; 248 if (buf_ptr[1] == buf_ptr[0]) 249 *e_token++ = *buf_ptr++; 250 *e_token++ = *buf_ptr++; 251 seensfx |= 2; 252 continue; 253 } 254 break; 255 } 256 } 257 else 258 while (chartype[(int)*buf_ptr] == alphanum || *buf_ptr == BACKSLASH) { 259 /* fill_buffer() terminates buffer with newline */ 260 if (*buf_ptr == BACKSLASH) { 261 if (*(buf_ptr + 1) == '\n') { 262 buf_ptr += 2; 263 if (buf_ptr >= buf_end) 264 fill_buffer(); 265 } else 266 break; 267 } 268 CHECK_SIZE_TOKEN; 269 /* copy it over */ 270 *e_token++ = *buf_ptr++; 271 if (buf_ptr >= buf_end) 272 fill_buffer(); 273 } 274 *e_token++ = '\0'; 275 276 if (s_token[0] == 'L' && s_token[1] == '\0' && 277 (*buf_ptr == '"' || *buf_ptr == '\'')) 278 return (strpfx); 279 280 while (*buf_ptr == ' ' || *buf_ptr == '\t') { /* get rid of blanks */ 281 if (++buf_ptr >= buf_end) 282 fill_buffer(); 283 } 284 ps.keyword = 0; 285 if (l_struct && !ps.p_l_follow) { 286 /* if last token was 'struct' and we're not 287 * in parentheses, then this token 288 * should be treated as a declaration */ 289 l_struct = false; 290 last_code = ident; 291 ps.last_u_d = true; 292 return (decl); 293 } 294 ps.last_u_d = l_struct; /* Operator after identifier is binary 295 * unless last token was 'struct' */ 296 l_struct = false; 297 last_code = ident; /* Remember that this is the code we will 298 * return */ 299 300 p = bsearch(s_token, 301 specials, 302 sizeof(specials) / sizeof(specials[0]), 303 sizeof(specials[0]), 304 strcmp_type); 305 if (p == NULL) { /* not a special keyword... */ 306 char *u; 307 308 /* ... so maybe a type_t or a typedef */ 309 if ((auto_typedefs && ((u = strrchr(s_token, '_')) != NULL) && 310 strcmp(u, "_t") == 0) || (typename_top >= 0 && 311 bsearch(s_token, typenames, typename_top + 1, 312 sizeof(typenames[0]), strcmp_type))) { 313 ps.keyword = 4; /* a type name */ 314 ps.last_u_d = true; 315 goto found_typename; 316 } 317 } else { /* we have a keyword */ 318 ps.keyword = p->rwcode; 319 ps.last_u_d = true; 320 switch (p->rwcode) { 321 case 7: /* it is a switch */ 322 return (swstmt); 323 case 8: /* a case or default */ 324 return (casestmt); 325 326 case 3: /* a "struct" */ 327 /* 328 * Next time around, we will want to know that we have had a 329 * 'struct' 330 */ 331 l_struct = true; 332 /* FALLTHROUGH */ 333 334 case 4: /* one of the declaration keywords */ 335 found_typename: 336 if (ps.p_l_follow) { 337 /* inside parens: cast, param list, offsetof or sizeof */ 338 ps.cast_mask |= (1 << ps.p_l_follow) & ~ps.not_cast_mask; 339 break; 340 } 341 last_code = decl; 342 return (decl); 343 344 case 5: /* if, while, for */ 345 return (sp_paren); 346 347 case 6: /* do, else */ 348 return (sp_nparen); 349 350 case 10: /* storage class specifier */ 351 return (storage); 352 353 default: /* all others are treated like any other 354 * identifier */ 355 return (ident); 356 } /* end of switch */ 357 } /* end of if (found_it) */ 358 if (*buf_ptr == '(' && ps.tos <= 1 && ps.ind_level == 0 && 359 ps.in_parameter_declaration == 0 && ps.block_init == 0) { 360 char *tp = buf_ptr; 361 while (tp < buf_end) 362 if (*tp++ == ')' && (*tp == ';' || *tp == ',')) 363 goto not_proc; 364 strncpy(ps.procname, token, sizeof ps.procname - 1); 365 if (ps.in_decl) 366 ps.in_parameter_declaration = 1; 367 return (last_code = funcname); 368 not_proc:; 369 } 370 /* 371 * The following hack attempts to guess whether or not the current 372 * token is in fact a declaration keyword -- one that has been 373 * typedefd 374 */ 375 if (((*buf_ptr == '*' && buf_ptr[1] != '=') || isalpha(*buf_ptr) || *buf_ptr == '_') 376 && !ps.p_l_follow 377 && !ps.block_init 378 && (ps.last_token == rparen || ps.last_token == semicolon || 379 ps.last_token == decl || 380 ps.last_token == lbrace || ps.last_token == rbrace)) { 381 ps.keyword = 4; /* a type name */ 382 ps.last_u_d = true; 383 last_code = decl; 384 return decl; 385 } 386 if (last_code == decl) /* if this is a declared variable, then 387 * following sign is unary */ 388 ps.last_u_d = true; /* will make "int a -1" work */ 389 last_code = ident; 390 return (ident); /* the ident is not in the list */ 391 } /* end of procesing for alpanum character */ 392 393 /* Scan a non-alphanumeric token */ 394 395 *e_token++ = *buf_ptr; /* if it is only a one-character token, it is 396 * moved here */ 397 *e_token = '\0'; 398 if (++buf_ptr >= buf_end) 399 fill_buffer(); 400 401 switch (*token) { 402 case '\n': 403 unary_delim = ps.last_u_d; 404 ps.last_nl = true; /* remember that we just had a newline */ 405 code = (had_eof ? 0 : newline); 406 407 /* 408 * if data has been exhausted, the newline is a dummy, and we should 409 * return code to stop 410 */ 411 break; 412 413 case '\'': /* start of quoted character */ 414 case '"': /* start of string */ 415 qchar = *token; 416 if (troff) { 417 e_token[-1] = '`'; 418 if (qchar == '"') 419 *e_token++ = '`'; 420 e_token = chfont(&bodyf, &stringf, e_token); 421 } 422 do { /* copy the string */ 423 while (1) { /* move one character or [/<char>]<char> */ 424 if (*buf_ptr == '\n') { 425 diag2(1, "Unterminated literal"); 426 goto stop_lit; 427 } 428 CHECK_SIZE_TOKEN; /* Only have to do this once in this loop, 429 * since CHECK_SIZE guarantees that there 430 * are at least 5 entries left */ 431 *e_token = *buf_ptr++; 432 if (buf_ptr >= buf_end) 433 fill_buffer(); 434 if (*e_token == BACKSLASH) { /* if escape, copy extra char */ 435 if (*buf_ptr == '\n') /* check for escaped newline */ 436 ++line_no; 437 if (troff) { 438 *++e_token = BACKSLASH; 439 if (*buf_ptr == BACKSLASH) 440 *++e_token = BACKSLASH; 441 } 442 *++e_token = *buf_ptr++; 443 ++e_token; /* we must increment this again because we 444 * copied two chars */ 445 if (buf_ptr >= buf_end) 446 fill_buffer(); 447 } 448 else 449 break; /* we copied one character */ 450 } /* end of while (1) */ 451 } while (*e_token++ != qchar); 452 if (troff) { 453 e_token = chfont(&stringf, &bodyf, e_token - 1); 454 if (qchar == '"') 455 *e_token++ = '\''; 456 } 457 stop_lit: 458 code = ident; 459 break; 460 461 case ('('): 462 case ('['): 463 unary_delim = true; 464 code = lparen; 465 break; 466 467 case (')'): 468 case (']'): 469 code = rparen; 470 break; 471 472 case '#': 473 unary_delim = ps.last_u_d; 474 code = preesc; 475 break; 476 477 case '?': 478 unary_delim = true; 479 code = question; 480 break; 481 482 case (':'): 483 code = colon; 484 unary_delim = true; 485 break; 486 487 case (';'): 488 unary_delim = true; 489 code = semicolon; 490 break; 491 492 case ('{'): 493 unary_delim = true; 494 495 /* 496 * if (ps.in_or_st) ps.block_init = 1; 497 */ 498 /* ? code = ps.block_init ? lparen : lbrace; */ 499 code = lbrace; 500 break; 501 502 case ('}'): 503 unary_delim = true; 504 /* ? code = ps.block_init ? rparen : rbrace; */ 505 code = rbrace; 506 break; 507 508 case 014: /* a form feed */ 509 unary_delim = ps.last_u_d; 510 ps.last_nl = true; /* remember this so we can set 'ps.col_1' 511 * right */ 512 code = form_feed; 513 break; 514 515 case (','): 516 unary_delim = true; 517 code = comma; 518 break; 519 520 case '.': 521 unary_delim = false; 522 code = period; 523 break; 524 525 case '-': 526 case '+': /* check for -, +, --, ++ */ 527 code = (ps.last_u_d ? unary_op : binary_op); 528 unary_delim = true; 529 530 if (*buf_ptr == token[0]) { 531 /* check for doubled character */ 532 *e_token++ = *buf_ptr++; 533 /* buffer overflow will be checked at end of loop */ 534 if (last_code == ident || last_code == rparen) { 535 code = (ps.last_u_d ? unary_op : postop); 536 /* check for following ++ or -- */ 537 unary_delim = false; 538 } 539 } 540 else if (*buf_ptr == '=') 541 /* check for operator += */ 542 *e_token++ = *buf_ptr++; 543 else if (*buf_ptr == '>') { 544 /* check for operator -> */ 545 *e_token++ = *buf_ptr++; 546 if (!pointer_as_binop) { 547 unary_delim = false; 548 code = unary_op; 549 ps.want_blank = false; 550 } 551 } 552 break; /* buffer overflow will be checked at end of 553 * switch */ 554 555 case '=': 556 if (ps.in_or_st) 557 ps.block_init = 1; 558 #ifdef undef 559 if (chartype[*buf_ptr] == opchar) { /* we have two char assignment */ 560 e_token[-1] = *buf_ptr++; 561 if ((e_token[-1] == '<' || e_token[-1] == '>') && e_token[-1] == *buf_ptr) 562 *e_token++ = *buf_ptr++; 563 *e_token++ = '='; /* Flip =+ to += */ 564 *e_token = 0; 565 } 566 #else 567 if (*buf_ptr == '=') {/* == */ 568 *e_token++ = '='; /* Flip =+ to += */ 569 buf_ptr++; 570 *e_token = 0; 571 } 572 #endif 573 code = binary_op; 574 unary_delim = true; 575 break; 576 /* can drop thru!!! */ 577 578 case '>': 579 case '<': 580 case '!': /* ops like <, <<, <=, !=, etc */ 581 if (*buf_ptr == '>' || *buf_ptr == '<' || *buf_ptr == '=') { 582 *e_token++ = *buf_ptr; 583 if (++buf_ptr >= buf_end) 584 fill_buffer(); 585 } 586 if (*buf_ptr == '=') 587 *e_token++ = *buf_ptr++; 588 code = (ps.last_u_d ? unary_op : binary_op); 589 unary_delim = true; 590 break; 591 592 default: 593 if (token[0] == '/' && *buf_ptr == '*') { 594 /* it is start of comment */ 595 *e_token++ = '*'; 596 597 if (++buf_ptr >= buf_end) 598 fill_buffer(); 599 600 code = comment; 601 unary_delim = ps.last_u_d; 602 break; 603 } 604 while (*(e_token - 1) == *buf_ptr || *buf_ptr == '=') { 605 /* 606 * handle ||, &&, etc, and also things as in int *****i 607 */ 608 *e_token++ = *buf_ptr; 609 if (++buf_ptr >= buf_end) 610 fill_buffer(); 611 } 612 code = (ps.last_u_d ? unary_op : binary_op); 613 unary_delim = true; 614 615 616 } /* end of switch */ 617 if (code != newline) { 618 l_struct = false; 619 last_code = code; 620 } 621 if (buf_ptr >= buf_end) /* check for input buffer empty */ 622 fill_buffer(); 623 ps.last_u_d = unary_delim; 624 *e_token = '\0'; /* null terminate the token */ 625 return (code); 626 } 627 628 void 629 alloc_typenames(void) 630 { 631 632 typenames = (const char **)malloc(sizeof(typenames[0]) * 633 (typename_count = 16)); 634 if (typenames == NULL) 635 err(1, NULL); 636 } 637 638 void 639 add_typename(const char *key) 640 { 641 int comparison; 642 const char *copy; 643 644 if (typename_top + 1 >= typename_count) { 645 typenames = realloc((void *)typenames, 646 sizeof(typenames[0]) * (typename_count *= 2)); 647 if (typenames == NULL) 648 err(1, NULL); 649 } 650 if (typename_top == -1) 651 typenames[++typename_top] = copy = strdup(key); 652 else if ((comparison = strcmp(key, typenames[typename_top])) >= 0) { 653 /* take advantage of sorted input */ 654 if (comparison == 0) /* remove duplicates */ 655 return; 656 typenames[++typename_top] = copy = strdup(key); 657 } 658 else { 659 int p; 660 661 for (p = 0; (comparison = strcmp(key, typenames[p])) > 0; p++) 662 /* find place for the new key */; 663 if (comparison == 0) /* remove duplicates */ 664 return; 665 memmove(&typenames[p + 1], &typenames[p], 666 sizeof(typenames[0]) * (++typename_top - p)); 667 typenames[p] = copy = strdup(key); 668 } 669 670 if (copy == NULL) 671 err(1, NULL); 672 } 673