1 /*- 2 * SPDX-License-Identifier: BSD-4-Clause 3 * 4 * Copyright (c) 1985 Sun Microsystems, Inc. 5 * Copyright (c) 1980, 1993 6 * The Regents of the University of California. All rights reserved. 7 * All rights reserved. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 3. All advertising materials mentioning features or use of this software 18 * must display the following acknowledgement: 19 * This product includes software developed by the University of 20 * California, Berkeley and its contributors. 21 * 4. Neither the name of the University nor the names of its contributors 22 * may be used to endorse or promote products derived from this software 23 * without specific prior written permission. 24 * 25 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 26 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 27 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 28 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 29 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 30 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 31 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 32 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 33 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 34 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 35 * SUCH DAMAGE. 36 */ 37 38 /* 39 * Here we have the token scanner for indent. It scans off one token and puts 40 * it in the global variable "token". It returns a code, indicating the type 41 * of token scanned. 42 */ 43 44 #include <err.h> 45 #include <stdio.h> 46 #include <ctype.h> 47 #include <stdlib.h> 48 #include <string.h> 49 #include <sys/param.h> 50 51 #include "indent_globs.h" 52 #include "indent_codes.h" 53 #include "indent.h" 54 55 struct templ { 56 const char *rwd; 57 int rwcode; 58 }; 59 60 /* 61 * This table has to be sorted alphabetically, because it'll be used in binary 62 * search. For the same reason, string must be the first thing in struct templ. 63 */ 64 struct templ specials[] = 65 { 66 {"_Bool", 4}, 67 {"_Complex", 4}, 68 {"_Imaginary", 4}, 69 {"auto", 10}, 70 {"bool", 4}, 71 {"break", 9}, 72 {"case", 8}, 73 {"char", 4}, 74 {"complex", 4}, 75 {"const", 4}, 76 {"continue", 12}, 77 {"default", 8}, 78 {"do", 6}, 79 {"double", 4}, 80 {"else", 6}, 81 {"enum", 3}, 82 {"extern", 10}, 83 {"float", 4}, 84 {"for", 5}, 85 {"global", 4}, 86 {"goto", 9}, 87 {"if", 5}, 88 {"imaginary", 4}, 89 {"inline", 12}, 90 {"int", 4}, 91 {"long", 4}, 92 {"offsetof", 1}, 93 {"register", 10}, 94 {"restrict", 12}, 95 {"return", 9}, 96 {"short", 4}, 97 {"signed", 4}, 98 {"sizeof", 2}, 99 {"static", 10}, 100 {"struct", 3}, 101 {"switch", 7}, 102 {"typedef", 11}, 103 {"union", 3}, 104 {"unsigned", 4}, 105 {"void", 4}, 106 {"volatile", 4}, 107 {"while", 5} 108 }; 109 110 const char **typenames; 111 int typename_count; 112 int typename_top = -1; 113 114 /* 115 * The transition table below was rewritten by hand from lx's output, given 116 * the following definitions. lx is Katherine Flavel's lexer generator. 117 * 118 * O = /[0-7]/; D = /[0-9]/; NZ = /[1-9]/; 119 * H = /[a-f0-9]/i; B = /[0-1]/; HP = /0x/i; 120 * BP = /0b/i; E = /e[+\-]?/i D+; P = /p[+\-]?/i D+; 121 * FS = /[fl]/i; IS = /u/i /(l|L|ll|LL)/? | /(l|L|ll|LL)/ /u/i?; 122 * 123 * D+ E FS? -> $float; 124 * D* "." D+ E? FS? -> $float; 125 * D+ "." E? FS? -> $float; HP H+ IS? -> $int; 126 * HP H+ P FS? -> $float; NZ D* IS? -> $int; 127 * HP H* "." H+ P FS? -> $float; "0" O* IS? -> $int; 128 * HP H+ "." P FS -> $float; BP B+ IS? -> $int; 129 */ 130 static char const *table[] = { 131 /* examples: 132 00 133 s 0xx 134 t 00xaa 135 a 11 101100xxa.. 136 r 11ee0001101lbuuxx.a.pp 137 t.01.e+008bLuxll0Ll.aa.p+0 138 states: ABCDEFGHIJKLMNOPQRSTUVWXYZ */ 139 ['0'] = "CEIDEHHHIJQ U Q VUVVZZZ", 140 ['1'] = "DEIDEHHHIJQ U Q VUVVZZZ", 141 ['7'] = "DEIDEHHHIJ U VUVVZZZ", 142 ['9'] = "DEJDEHHHJJ U VUVVZZZ", 143 ['a'] = " U VUVV ", 144 ['b'] = " K U VUVV ", 145 ['e'] = " FFF FF U VUVV ", 146 ['f'] = " f f U VUVV f", 147 ['u'] = " MM M i iiM M ", 148 ['x'] = " N ", 149 ['p'] = " FFX ", 150 ['L'] = " LLf fL PR Li L f", 151 ['l'] = " OOf fO S P O i O f", 152 ['+'] = " G Y ", 153 ['.'] = "B EE EE T W ", 154 /* ABCDEFGHIJKLMNOPQRSTUVWXYZ */ 155 [0] = "uuiifuufiuuiiuiiiiiuiuuuuu", 156 }; 157 158 static int 159 strcmp_type(const void *e1, const void *e2) 160 { 161 return (strcmp(e1, *(const char * const *)e2)); 162 } 163 164 int 165 lexi(struct parser_state *state) 166 { 167 int unary_delim; /* this is set to 1 if the current token 168 * forces a following operator to be unary */ 169 int code; /* internal code to be returned */ 170 char qchar; /* the delimiter character for a string */ 171 172 e_token = s_token; /* point to start of place to save token */ 173 unary_delim = false; 174 state->col_1 = state->last_nl; /* tell world that this token started 175 * in column 1 iff the last thing 176 * scanned was a newline */ 177 state->last_nl = false; 178 179 while (*buf_ptr == ' ' || *buf_ptr == '\t') { /* get rid of blanks */ 180 state->col_1 = false; /* leading blanks imply token is not in column 181 * 1 */ 182 if (++buf_ptr >= buf_end) 183 fill_buffer(); 184 } 185 186 /* Scan an alphanumeric token */ 187 if (isalnum((unsigned char)*buf_ptr) || 188 *buf_ptr == '_' || *buf_ptr == '$' || 189 (buf_ptr[0] == '.' && isdigit((unsigned char)buf_ptr[1]))) { 190 /* 191 * we have a character or number 192 */ 193 struct templ *p; 194 195 if (isdigit((unsigned char)*buf_ptr) || 196 (buf_ptr[0] == '.' && isdigit((unsigned char)buf_ptr[1]))) { 197 char s; 198 unsigned char i; 199 200 for (s = 'A'; s != 'f' && s != 'i' && s != 'u'; ) { 201 i = (unsigned char)*buf_ptr; 202 if (i >= nitems(table) || table[i] == NULL || 203 table[i][s - 'A'] == ' ') { 204 s = table[0][s - 'A']; 205 break; 206 } 207 s = table[i][s - 'A']; 208 CHECK_SIZE_TOKEN(1); 209 *e_token++ = *buf_ptr++; 210 if (buf_ptr >= buf_end) 211 fill_buffer(); 212 } 213 /* s now indicates the type: f(loating), i(integer), u(nknown) */ 214 } 215 else 216 while (isalnum((unsigned char)*buf_ptr) || 217 *buf_ptr == BACKSLASH || 218 *buf_ptr == '_' || *buf_ptr == '$') { 219 /* fill_buffer() terminates buffer with newline */ 220 if (*buf_ptr == BACKSLASH) { 221 if (*(buf_ptr + 1) == '\n') { 222 buf_ptr += 2; 223 if (buf_ptr >= buf_end) 224 fill_buffer(); 225 } else 226 break; 227 } 228 CHECK_SIZE_TOKEN(1); 229 /* copy it over */ 230 *e_token++ = *buf_ptr++; 231 if (buf_ptr >= buf_end) 232 fill_buffer(); 233 } 234 *e_token = '\0'; 235 236 if (s_token[0] == 'L' && s_token[1] == '\0' && 237 (*buf_ptr == '"' || *buf_ptr == '\'')) 238 return (strpfx); 239 240 while (*buf_ptr == ' ' || *buf_ptr == '\t') { /* get rid of blanks */ 241 if (++buf_ptr >= buf_end) 242 fill_buffer(); 243 } 244 state->keyword = 0; 245 if (state->last_token == structure && !state->p_l_follow) { 246 /* if last token was 'struct' and we're not 247 * in parentheses, then this token 248 * should be treated as a declaration */ 249 state->last_u_d = true; 250 return (decl); 251 } 252 /* 253 * Operator after identifier is binary unless last token was 'struct' 254 */ 255 state->last_u_d = (state->last_token == structure); 256 257 p = bsearch(s_token, 258 specials, 259 sizeof(specials) / sizeof(specials[0]), 260 sizeof(specials[0]), 261 strcmp_type); 262 if (p == NULL) { /* not a special keyword... */ 263 char *u; 264 265 /* ... so maybe a type_t or a typedef */ 266 if ((opt.auto_typedefs && ((u = strrchr(s_token, '_')) != NULL) && 267 strcmp(u, "_t") == 0) || (typename_top >= 0 && 268 bsearch(s_token, typenames, typename_top + 1, 269 sizeof(typenames[0]), strcmp_type))) { 270 state->keyword = 4; /* a type name */ 271 state->last_u_d = true; 272 goto found_typename; 273 } 274 } else { /* we have a keyword */ 275 state->keyword = p->rwcode; 276 state->last_u_d = true; 277 switch (p->rwcode) { 278 case 7: /* it is a switch */ 279 return (swstmt); 280 case 8: /* a case or default */ 281 return (casestmt); 282 283 case 3: /* a "struct" */ 284 /* FALLTHROUGH */ 285 case 4: /* one of the declaration keywords */ 286 found_typename: 287 if (state->p_l_follow) { 288 /* inside parens: cast, param list, offsetof or sizeof */ 289 state->cast_mask |= (1 << state->p_l_follow) & ~state->not_cast_mask; 290 } 291 if (state->last_token == period || state->last_token == unary_op) { 292 state->keyword = 0; 293 break; 294 } 295 if (p != NULL && p->rwcode == 3) 296 return (structure); 297 if (state->p_l_follow) 298 break; 299 return (decl); 300 301 case 5: /* if, while, for */ 302 return (sp_paren); 303 304 case 6: /* do, else */ 305 return (sp_nparen); 306 307 case 10: /* storage class specifier */ 308 return (storage); 309 310 case 11: /* typedef */ 311 return (type_def); 312 313 default: /* all others are treated like any other 314 * identifier */ 315 return (ident); 316 } /* end of switch */ 317 } /* end of if (found_it) */ 318 if (*buf_ptr == '(' && state->tos <= 1 && state->ind_level == 0 && 319 state->in_parameter_declaration == 0 && state->block_init == 0) { 320 char *tp = buf_ptr; 321 while (tp < buf_end) 322 if (*tp++ == ')' && (*tp == ';' || *tp == ',')) 323 goto not_proc; 324 strncpy(state->procname, token, sizeof state->procname - 1); 325 if (state->in_decl) 326 state->in_parameter_declaration = 1; 327 return (funcname); 328 not_proc:; 329 } 330 /* 331 * The following hack attempts to guess whether or not the current 332 * token is in fact a declaration keyword -- one that has been 333 * typedefd 334 */ 335 else if (!state->p_l_follow && !state->block_init && 336 !state->in_stmt && 337 ((*buf_ptr == '*' && buf_ptr[1] != '=') || 338 isalpha((unsigned char)*buf_ptr)) && 339 (state->last_token == semicolon || state->last_token == lbrace || 340 state->last_token == rbrace)) { 341 state->keyword = 4; /* a type name */ 342 state->last_u_d = true; 343 return decl; 344 } 345 if (state->last_token == decl) /* if this is a declared variable, 346 * then following sign is unary */ 347 state->last_u_d = true; /* will make "int a -1" work */ 348 return (ident); /* the ident is not in the list */ 349 } /* end of processing for alpanum character */ 350 351 /* Scan a non-alphanumeric token */ 352 353 CHECK_SIZE_TOKEN(3); /* things like "<<=" */ 354 *e_token++ = *buf_ptr; /* if it is only a one-character token, it is 355 * moved here */ 356 *e_token = '\0'; 357 if (++buf_ptr >= buf_end) 358 fill_buffer(); 359 360 switch (*token) { 361 case '\n': 362 unary_delim = state->last_u_d; 363 state->last_nl = true; /* remember that we just had a newline */ 364 code = (had_eof ? 0 : newline); 365 366 /* 367 * if data has been exhausted, the newline is a dummy, and we should 368 * return code to stop 369 */ 370 break; 371 372 case '\'': /* start of quoted character */ 373 case '"': /* start of string */ 374 qchar = *token; 375 do { /* copy the string */ 376 while (1) { /* move one character or [/<char>]<char> */ 377 if (*buf_ptr == '\n') { 378 diag2(1, "Unterminated literal"); 379 goto stop_lit; 380 } 381 CHECK_SIZE_TOKEN(2); 382 *e_token = *buf_ptr++; 383 if (buf_ptr >= buf_end) 384 fill_buffer(); 385 if (*e_token == BACKSLASH) { /* if escape, copy extra char */ 386 if (*buf_ptr == '\n') /* check for escaped newline */ 387 ++line_no; 388 *++e_token = *buf_ptr++; 389 ++e_token; /* we must increment this again because we 390 * copied two chars */ 391 if (buf_ptr >= buf_end) 392 fill_buffer(); 393 } 394 else 395 break; /* we copied one character */ 396 } /* end of while (1) */ 397 } while (*e_token++ != qchar); 398 stop_lit: 399 code = ident; 400 break; 401 402 case ('('): 403 case ('['): 404 unary_delim = true; 405 code = lparen; 406 break; 407 408 case (')'): 409 case (']'): 410 code = rparen; 411 break; 412 413 case '#': 414 unary_delim = state->last_u_d; 415 code = preesc; 416 break; 417 418 case '?': 419 unary_delim = true; 420 code = question; 421 break; 422 423 case (':'): 424 code = colon; 425 unary_delim = true; 426 break; 427 428 case (';'): 429 unary_delim = true; 430 code = semicolon; 431 break; 432 433 case ('{'): 434 unary_delim = true; 435 436 /* 437 * if (state->in_or_st) state->block_init = 1; 438 */ 439 /* ? code = state->block_init ? lparen : lbrace; */ 440 code = lbrace; 441 break; 442 443 case ('}'): 444 unary_delim = true; 445 /* ? code = state->block_init ? rparen : rbrace; */ 446 code = rbrace; 447 break; 448 449 case 014: /* a form feed */ 450 unary_delim = state->last_u_d; 451 state->last_nl = true; /* remember this so we can set 'state->col_1' 452 * right */ 453 code = form_feed; 454 break; 455 456 case (','): 457 unary_delim = true; 458 code = comma; 459 break; 460 461 case '.': 462 unary_delim = false; 463 code = period; 464 break; 465 466 case '-': 467 case '+': /* check for -, +, --, ++ */ 468 code = (state->last_u_d ? unary_op : binary_op); 469 unary_delim = true; 470 471 if (*buf_ptr == token[0]) { 472 /* check for doubled character */ 473 *e_token++ = *buf_ptr++; 474 /* buffer overflow will be checked at end of loop */ 475 if (state->last_token == ident || state->last_token == rparen) { 476 code = (state->last_u_d ? unary_op : postop); 477 /* check for following ++ or -- */ 478 unary_delim = false; 479 } 480 } 481 else if (*buf_ptr == '=') 482 /* check for operator += */ 483 *e_token++ = *buf_ptr++; 484 else if (*buf_ptr == '>') { 485 /* check for operator -> */ 486 *e_token++ = *buf_ptr++; 487 if (!opt.pointer_as_binop) { 488 unary_delim = false; 489 code = unary_op; 490 state->want_blank = false; 491 } 492 } 493 break; /* buffer overflow will be checked at end of 494 * switch */ 495 496 case '=': 497 if (state->in_or_st) 498 state->block_init = 1; 499 if (*buf_ptr == '=') {/* == */ 500 *e_token++ = '='; /* Flip =+ to += */ 501 buf_ptr++; 502 *e_token = 0; 503 } 504 code = binary_op; 505 unary_delim = true; 506 break; 507 /* can drop thru!!! */ 508 509 case '>': 510 case '<': 511 case '!': /* ops like <, <<, <=, !=, etc */ 512 if (*buf_ptr == '>' || *buf_ptr == '<' || *buf_ptr == '=') { 513 *e_token++ = *buf_ptr; 514 if (++buf_ptr >= buf_end) 515 fill_buffer(); 516 } 517 if (*buf_ptr == '=') 518 *e_token++ = *buf_ptr++; 519 code = (state->last_u_d ? unary_op : binary_op); 520 unary_delim = true; 521 break; 522 523 case '*': 524 unary_delim = true; 525 if (!state->last_u_d) { 526 if (*buf_ptr == '=') 527 *e_token++ = *buf_ptr++; 528 code = binary_op; 529 break; 530 } 531 while (*buf_ptr == '*' || isspace((unsigned char)*buf_ptr)) { 532 if (*buf_ptr == '*') { 533 CHECK_SIZE_TOKEN(1); 534 *e_token++ = *buf_ptr; 535 } 536 if (++buf_ptr >= buf_end) 537 fill_buffer(); 538 } 539 if (ps.in_decl) { 540 char *tp = buf_ptr; 541 542 while (isalpha((unsigned char)*tp) || 543 isspace((unsigned char)*tp)) { 544 if (++tp >= buf_end) 545 fill_buffer(); 546 } 547 if (*tp == '(') 548 ps.procname[0] = ' '; 549 } 550 code = unary_op; 551 break; 552 553 default: 554 if (token[0] == '/' && *buf_ptr == '*') { 555 /* it is start of comment */ 556 *e_token++ = '*'; 557 558 if (++buf_ptr >= buf_end) 559 fill_buffer(); 560 561 code = comment; 562 unary_delim = state->last_u_d; 563 break; 564 } 565 while (*(e_token - 1) == *buf_ptr || *buf_ptr == '=') { 566 /* 567 * handle ||, &&, etc, and also things as in int *****i 568 */ 569 CHECK_SIZE_TOKEN(1); 570 *e_token++ = *buf_ptr; 571 if (++buf_ptr >= buf_end) 572 fill_buffer(); 573 } 574 code = (state->last_u_d ? unary_op : binary_op); 575 unary_delim = true; 576 577 578 } /* end of switch */ 579 if (buf_ptr >= buf_end) /* check for input buffer empty */ 580 fill_buffer(); 581 state->last_u_d = unary_delim; 582 CHECK_SIZE_TOKEN(1); 583 *e_token = '\0'; /* null terminate the token */ 584 return (code); 585 } 586 587 /* Initialize constant transition table */ 588 void 589 init_constant_tt(void) 590 { 591 table['-'] = table['+']; 592 table['8'] = table['9']; 593 table['2'] = table['3'] = table['4'] = table['5'] = table['6'] = table['7']; 594 table['A'] = table['C'] = table['D'] = table['c'] = table['d'] = table['a']; 595 table['B'] = table['b']; 596 table['E'] = table['e']; 597 table['U'] = table['u']; 598 table['X'] = table['x']; 599 table['P'] = table['p']; 600 table['F'] = table['f']; 601 } 602 603 void 604 alloc_typenames(void) 605 { 606 607 typenames = (const char **)malloc(sizeof(typenames[0]) * 608 (typename_count = 16)); 609 if (typenames == NULL) 610 err(1, NULL); 611 } 612 613 void 614 add_typename(const char *key) 615 { 616 int comparison; 617 const char *copy; 618 619 if (typename_top + 1 >= typename_count) { 620 typenames = realloc((void *)typenames, 621 sizeof(typenames[0]) * (typename_count *= 2)); 622 if (typenames == NULL) 623 err(1, NULL); 624 } 625 if (typename_top == -1) 626 typenames[++typename_top] = copy = strdup(key); 627 else if ((comparison = strcmp(key, typenames[typename_top])) >= 0) { 628 /* take advantage of sorted input */ 629 if (comparison == 0) /* remove duplicates */ 630 return; 631 typenames[++typename_top] = copy = strdup(key); 632 } 633 else { 634 int p; 635 636 for (p = 0; (comparison = strcmp(key, typenames[p])) > 0; p++) 637 /* find place for the new key */; 638 if (comparison == 0) /* remove duplicates */ 639 return; 640 memmove(&typenames[p + 1], &typenames[p], 641 sizeof(typenames[0]) * (++typename_top - p)); 642 typenames[p] = copy = strdup(key); 643 } 644 645 if (copy == NULL) 646 err(1, NULL); 647 } 648