1 /*- 2 * SPDX-License-Identifier: BSD-4-Clause 3 * 4 * Copyright (c) 1985 Sun Microsystems, Inc. 5 * Copyright (c) 1980, 1993 6 * The Regents of the University of California. All rights reserved. 7 * All rights reserved. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 3. All advertising materials mentioning features or use of this software 18 * must display the following acknowledgement: 19 * This product includes software developed by the University of 20 * California, Berkeley and its contributors. 21 * 4. Neither the name of the University nor the names of its contributors 22 * may be used to endorse or promote products derived from this software 23 * without specific prior written permission. 24 * 25 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 26 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 27 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 28 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 29 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 30 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 31 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 32 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 33 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 34 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 35 * SUCH DAMAGE. 36 */ 37 38 #if 0 39 #endif 40 #include <sys/cdefs.h> 41 /* 42 * Here we have the token scanner for indent. It scans off one token and puts 43 * it in the global variable "token". It returns a code, indicating the type 44 * of token scanned. 45 */ 46 47 #include <err.h> 48 #include <stdio.h> 49 #include <ctype.h> 50 #include <stdlib.h> 51 #include <string.h> 52 #include <sys/param.h> 53 54 #include "indent_globs.h" 55 #include "indent_codes.h" 56 #include "indent.h" 57 58 struct templ { 59 const char *rwd; 60 int rwcode; 61 }; 62 63 /* 64 * This table has to be sorted alphabetically, because it'll be used in binary 65 * search. For the same reason, string must be the first thing in struct templ. 66 */ 67 struct templ specials[] = 68 { 69 {"_Bool", 4}, 70 {"_Complex", 4}, 71 {"_Imaginary", 4}, 72 {"auto", 10}, 73 {"bool", 4}, 74 {"break", 9}, 75 {"case", 8}, 76 {"char", 4}, 77 {"complex", 4}, 78 {"const", 4}, 79 {"continue", 12}, 80 {"default", 8}, 81 {"do", 6}, 82 {"double", 4}, 83 {"else", 6}, 84 {"enum", 3}, 85 {"extern", 10}, 86 {"float", 4}, 87 {"for", 5}, 88 {"global", 4}, 89 {"goto", 9}, 90 {"if", 5}, 91 {"imaginary", 4}, 92 {"inline", 12}, 93 {"int", 4}, 94 {"long", 4}, 95 {"offsetof", 1}, 96 {"register", 10}, 97 {"restrict", 12}, 98 {"return", 9}, 99 {"short", 4}, 100 {"signed", 4}, 101 {"sizeof", 2}, 102 {"static", 10}, 103 {"struct", 3}, 104 {"switch", 7}, 105 {"typedef", 11}, 106 {"union", 3}, 107 {"unsigned", 4}, 108 {"void", 4}, 109 {"volatile", 4}, 110 {"while", 5} 111 }; 112 113 const char **typenames; 114 int typename_count; 115 int typename_top = -1; 116 117 /* 118 * The transition table below was rewritten by hand from lx's output, given 119 * the following definitions. lx is Katherine Flavel's lexer generator. 120 * 121 * O = /[0-7]/; D = /[0-9]/; NZ = /[1-9]/; 122 * H = /[a-f0-9]/i; B = /[0-1]/; HP = /0x/i; 123 * BP = /0b/i; E = /e[+\-]?/i D+; P = /p[+\-]?/i D+; 124 * FS = /[fl]/i; IS = /u/i /(l|L|ll|LL)/? | /(l|L|ll|LL)/ /u/i?; 125 * 126 * D+ E FS? -> $float; 127 * D* "." D+ E? FS? -> $float; 128 * D+ "." E? FS? -> $float; HP H+ IS? -> $int; 129 * HP H+ P FS? -> $float; NZ D* IS? -> $int; 130 * HP H* "." H+ P FS? -> $float; "0" O* IS? -> $int; 131 * HP H+ "." P FS -> $float; BP B+ IS? -> $int; 132 */ 133 static char const *table[] = { 134 /* examples: 135 00 136 s 0xx 137 t 00xaa 138 a 11 101100xxa.. 139 r 11ee0001101lbuuxx.a.pp 140 t.01.e+008bLuxll0Ll.aa.p+0 141 states: ABCDEFGHIJKLMNOPQRSTUVWXYZ */ 142 ['0'] = "CEIDEHHHIJQ U Q VUVVZZZ", 143 ['1'] = "DEIDEHHHIJQ U Q VUVVZZZ", 144 ['7'] = "DEIDEHHHIJ U VUVVZZZ", 145 ['9'] = "DEJDEHHHJJ U VUVVZZZ", 146 ['a'] = " U VUVV ", 147 ['b'] = " K U VUVV ", 148 ['e'] = " FFF FF U VUVV ", 149 ['f'] = " f f U VUVV f", 150 ['u'] = " MM M i iiM M ", 151 ['x'] = " N ", 152 ['p'] = " FFX ", 153 ['L'] = " LLf fL PR Li L f", 154 ['l'] = " OOf fO S P O i O f", 155 ['+'] = " G Y ", 156 ['.'] = "B EE EE T W ", 157 /* ABCDEFGHIJKLMNOPQRSTUVWXYZ */ 158 [0] = "uuiifuufiuuiiuiiiiiuiuuuuu", 159 }; 160 161 static int 162 strcmp_type(const void *e1, const void *e2) 163 { 164 return (strcmp(e1, *(const char * const *)e2)); 165 } 166 167 int 168 lexi(struct parser_state *state) 169 { 170 int unary_delim; /* this is set to 1 if the current token 171 * forces a following operator to be unary */ 172 int code; /* internal code to be returned */ 173 char qchar; /* the delimiter character for a string */ 174 175 e_token = s_token; /* point to start of place to save token */ 176 unary_delim = false; 177 state->col_1 = state->last_nl; /* tell world that this token started 178 * in column 1 iff the last thing 179 * scanned was a newline */ 180 state->last_nl = false; 181 182 while (*buf_ptr == ' ' || *buf_ptr == '\t') { /* get rid of blanks */ 183 state->col_1 = false; /* leading blanks imply token is not in column 184 * 1 */ 185 if (++buf_ptr >= buf_end) 186 fill_buffer(); 187 } 188 189 /* Scan an alphanumeric token */ 190 if (isalnum((unsigned char)*buf_ptr) || 191 *buf_ptr == '_' || *buf_ptr == '$' || 192 (buf_ptr[0] == '.' && isdigit((unsigned char)buf_ptr[1]))) { 193 /* 194 * we have a character or number 195 */ 196 struct templ *p; 197 198 if (isdigit((unsigned char)*buf_ptr) || 199 (buf_ptr[0] == '.' && isdigit((unsigned char)buf_ptr[1]))) { 200 char s; 201 unsigned char i; 202 203 for (s = 'A'; s != 'f' && s != 'i' && s != 'u'; ) { 204 i = (unsigned char)*buf_ptr; 205 if (i >= nitems(table) || table[i] == NULL || 206 table[i][s - 'A'] == ' ') { 207 s = table[0][s - 'A']; 208 break; 209 } 210 s = table[i][s - 'A']; 211 CHECK_SIZE_TOKEN(1); 212 *e_token++ = *buf_ptr++; 213 if (buf_ptr >= buf_end) 214 fill_buffer(); 215 } 216 /* s now indicates the type: f(loating), i(integer), u(nknown) */ 217 } 218 else 219 while (isalnum((unsigned char)*buf_ptr) || 220 *buf_ptr == BACKSLASH || 221 *buf_ptr == '_' || *buf_ptr == '$') { 222 /* fill_buffer() terminates buffer with newline */ 223 if (*buf_ptr == BACKSLASH) { 224 if (*(buf_ptr + 1) == '\n') { 225 buf_ptr += 2; 226 if (buf_ptr >= buf_end) 227 fill_buffer(); 228 } else 229 break; 230 } 231 CHECK_SIZE_TOKEN(1); 232 /* copy it over */ 233 *e_token++ = *buf_ptr++; 234 if (buf_ptr >= buf_end) 235 fill_buffer(); 236 } 237 *e_token = '\0'; 238 239 if (s_token[0] == 'L' && s_token[1] == '\0' && 240 (*buf_ptr == '"' || *buf_ptr == '\'')) 241 return (strpfx); 242 243 while (*buf_ptr == ' ' || *buf_ptr == '\t') { /* get rid of blanks */ 244 if (++buf_ptr >= buf_end) 245 fill_buffer(); 246 } 247 state->keyword = 0; 248 if (state->last_token == structure && !state->p_l_follow) { 249 /* if last token was 'struct' and we're not 250 * in parentheses, then this token 251 * should be treated as a declaration */ 252 state->last_u_d = true; 253 return (decl); 254 } 255 /* 256 * Operator after identifier is binary unless last token was 'struct' 257 */ 258 state->last_u_d = (state->last_token == structure); 259 260 p = bsearch(s_token, 261 specials, 262 sizeof(specials) / sizeof(specials[0]), 263 sizeof(specials[0]), 264 strcmp_type); 265 if (p == NULL) { /* not a special keyword... */ 266 char *u; 267 268 /* ... so maybe a type_t or a typedef */ 269 if ((opt.auto_typedefs && ((u = strrchr(s_token, '_')) != NULL) && 270 strcmp(u, "_t") == 0) || (typename_top >= 0 && 271 bsearch(s_token, typenames, typename_top + 1, 272 sizeof(typenames[0]), strcmp_type))) { 273 state->keyword = 4; /* a type name */ 274 state->last_u_d = true; 275 goto found_typename; 276 } 277 } else { /* we have a keyword */ 278 state->keyword = p->rwcode; 279 state->last_u_d = true; 280 switch (p->rwcode) { 281 case 7: /* it is a switch */ 282 return (swstmt); 283 case 8: /* a case or default */ 284 return (casestmt); 285 286 case 3: /* a "struct" */ 287 /* FALLTHROUGH */ 288 case 4: /* one of the declaration keywords */ 289 found_typename: 290 if (state->p_l_follow) { 291 /* inside parens: cast, param list, offsetof or sizeof */ 292 state->cast_mask |= (1 << state->p_l_follow) & ~state->not_cast_mask; 293 } 294 if (state->last_token == period || state->last_token == unary_op) { 295 state->keyword = 0; 296 break; 297 } 298 if (p != NULL && p->rwcode == 3) 299 return (structure); 300 if (state->p_l_follow) 301 break; 302 return (decl); 303 304 case 5: /* if, while, for */ 305 return (sp_paren); 306 307 case 6: /* do, else */ 308 return (sp_nparen); 309 310 case 10: /* storage class specifier */ 311 return (storage); 312 313 case 11: /* typedef */ 314 return (type_def); 315 316 default: /* all others are treated like any other 317 * identifier */ 318 return (ident); 319 } /* end of switch */ 320 } /* end of if (found_it) */ 321 if (*buf_ptr == '(' && state->tos <= 1 && state->ind_level == 0 && 322 state->in_parameter_declaration == 0 && state->block_init == 0) { 323 char *tp = buf_ptr; 324 while (tp < buf_end) 325 if (*tp++ == ')' && (*tp == ';' || *tp == ',')) 326 goto not_proc; 327 strncpy(state->procname, token, sizeof state->procname - 1); 328 if (state->in_decl) 329 state->in_parameter_declaration = 1; 330 return (funcname); 331 not_proc:; 332 } 333 /* 334 * The following hack attempts to guess whether or not the current 335 * token is in fact a declaration keyword -- one that has been 336 * typedefd 337 */ 338 else if (!state->p_l_follow && !state->block_init && 339 !state->in_stmt && 340 ((*buf_ptr == '*' && buf_ptr[1] != '=') || 341 isalpha((unsigned char)*buf_ptr)) && 342 (state->last_token == semicolon || state->last_token == lbrace || 343 state->last_token == rbrace)) { 344 state->keyword = 4; /* a type name */ 345 state->last_u_d = true; 346 return decl; 347 } 348 if (state->last_token == decl) /* if this is a declared variable, 349 * then following sign is unary */ 350 state->last_u_d = true; /* will make "int a -1" work */ 351 return (ident); /* the ident is not in the list */ 352 } /* end of processing for alpanum character */ 353 354 /* Scan a non-alphanumeric token */ 355 356 CHECK_SIZE_TOKEN(3); /* things like "<<=" */ 357 *e_token++ = *buf_ptr; /* if it is only a one-character token, it is 358 * moved here */ 359 *e_token = '\0'; 360 if (++buf_ptr >= buf_end) 361 fill_buffer(); 362 363 switch (*token) { 364 case '\n': 365 unary_delim = state->last_u_d; 366 state->last_nl = true; /* remember that we just had a newline */ 367 code = (had_eof ? 0 : newline); 368 369 /* 370 * if data has been exhausted, the newline is a dummy, and we should 371 * return code to stop 372 */ 373 break; 374 375 case '\'': /* start of quoted character */ 376 case '"': /* start of string */ 377 qchar = *token; 378 do { /* copy the string */ 379 while (1) { /* move one character or [/<char>]<char> */ 380 if (*buf_ptr == '\n') { 381 diag2(1, "Unterminated literal"); 382 goto stop_lit; 383 } 384 CHECK_SIZE_TOKEN(2); 385 *e_token = *buf_ptr++; 386 if (buf_ptr >= buf_end) 387 fill_buffer(); 388 if (*e_token == BACKSLASH) { /* if escape, copy extra char */ 389 if (*buf_ptr == '\n') /* check for escaped newline */ 390 ++line_no; 391 *++e_token = *buf_ptr++; 392 ++e_token; /* we must increment this again because we 393 * copied two chars */ 394 if (buf_ptr >= buf_end) 395 fill_buffer(); 396 } 397 else 398 break; /* we copied one character */ 399 } /* end of while (1) */ 400 } while (*e_token++ != qchar); 401 stop_lit: 402 code = ident; 403 break; 404 405 case ('('): 406 case ('['): 407 unary_delim = true; 408 code = lparen; 409 break; 410 411 case (')'): 412 case (']'): 413 code = rparen; 414 break; 415 416 case '#': 417 unary_delim = state->last_u_d; 418 code = preesc; 419 break; 420 421 case '?': 422 unary_delim = true; 423 code = question; 424 break; 425 426 case (':'): 427 code = colon; 428 unary_delim = true; 429 break; 430 431 case (';'): 432 unary_delim = true; 433 code = semicolon; 434 break; 435 436 case ('{'): 437 unary_delim = true; 438 439 /* 440 * if (state->in_or_st) state->block_init = 1; 441 */ 442 /* ? code = state->block_init ? lparen : lbrace; */ 443 code = lbrace; 444 break; 445 446 case ('}'): 447 unary_delim = true; 448 /* ? code = state->block_init ? rparen : rbrace; */ 449 code = rbrace; 450 break; 451 452 case 014: /* a form feed */ 453 unary_delim = state->last_u_d; 454 state->last_nl = true; /* remember this so we can set 'state->col_1' 455 * right */ 456 code = form_feed; 457 break; 458 459 case (','): 460 unary_delim = true; 461 code = comma; 462 break; 463 464 case '.': 465 unary_delim = false; 466 code = period; 467 break; 468 469 case '-': 470 case '+': /* check for -, +, --, ++ */ 471 code = (state->last_u_d ? unary_op : binary_op); 472 unary_delim = true; 473 474 if (*buf_ptr == token[0]) { 475 /* check for doubled character */ 476 *e_token++ = *buf_ptr++; 477 /* buffer overflow will be checked at end of loop */ 478 if (state->last_token == ident || state->last_token == rparen) { 479 code = (state->last_u_d ? unary_op : postop); 480 /* check for following ++ or -- */ 481 unary_delim = false; 482 } 483 } 484 else if (*buf_ptr == '=') 485 /* check for operator += */ 486 *e_token++ = *buf_ptr++; 487 else if (*buf_ptr == '>') { 488 /* check for operator -> */ 489 *e_token++ = *buf_ptr++; 490 if (!opt.pointer_as_binop) { 491 unary_delim = false; 492 code = unary_op; 493 state->want_blank = false; 494 } 495 } 496 break; /* buffer overflow will be checked at end of 497 * switch */ 498 499 case '=': 500 if (state->in_or_st) 501 state->block_init = 1; 502 if (*buf_ptr == '=') {/* == */ 503 *e_token++ = '='; /* Flip =+ to += */ 504 buf_ptr++; 505 *e_token = 0; 506 } 507 code = binary_op; 508 unary_delim = true; 509 break; 510 /* can drop thru!!! */ 511 512 case '>': 513 case '<': 514 case '!': /* ops like <, <<, <=, !=, etc */ 515 if (*buf_ptr == '>' || *buf_ptr == '<' || *buf_ptr == '=') { 516 *e_token++ = *buf_ptr; 517 if (++buf_ptr >= buf_end) 518 fill_buffer(); 519 } 520 if (*buf_ptr == '=') 521 *e_token++ = *buf_ptr++; 522 code = (state->last_u_d ? unary_op : binary_op); 523 unary_delim = true; 524 break; 525 526 case '*': 527 unary_delim = true; 528 if (!state->last_u_d) { 529 if (*buf_ptr == '=') 530 *e_token++ = *buf_ptr++; 531 code = binary_op; 532 break; 533 } 534 while (*buf_ptr == '*' || isspace((unsigned char)*buf_ptr)) { 535 if (*buf_ptr == '*') { 536 CHECK_SIZE_TOKEN(1); 537 *e_token++ = *buf_ptr; 538 } 539 if (++buf_ptr >= buf_end) 540 fill_buffer(); 541 } 542 if (ps.in_decl) { 543 char *tp = buf_ptr; 544 545 while (isalpha((unsigned char)*tp) || 546 isspace((unsigned char)*tp)) { 547 if (++tp >= buf_end) 548 fill_buffer(); 549 } 550 if (*tp == '(') 551 ps.procname[0] = ' '; 552 } 553 code = unary_op; 554 break; 555 556 default: 557 if (token[0] == '/' && *buf_ptr == '*') { 558 /* it is start of comment */ 559 *e_token++ = '*'; 560 561 if (++buf_ptr >= buf_end) 562 fill_buffer(); 563 564 code = comment; 565 unary_delim = state->last_u_d; 566 break; 567 } 568 while (*(e_token - 1) == *buf_ptr || *buf_ptr == '=') { 569 /* 570 * handle ||, &&, etc, and also things as in int *****i 571 */ 572 CHECK_SIZE_TOKEN(1); 573 *e_token++ = *buf_ptr; 574 if (++buf_ptr >= buf_end) 575 fill_buffer(); 576 } 577 code = (state->last_u_d ? unary_op : binary_op); 578 unary_delim = true; 579 580 581 } /* end of switch */ 582 if (buf_ptr >= buf_end) /* check for input buffer empty */ 583 fill_buffer(); 584 state->last_u_d = unary_delim; 585 CHECK_SIZE_TOKEN(1); 586 *e_token = '\0'; /* null terminate the token */ 587 return (code); 588 } 589 590 /* Initialize constant transition table */ 591 void 592 init_constant_tt(void) 593 { 594 table['-'] = table['+']; 595 table['8'] = table['9']; 596 table['2'] = table['3'] = table['4'] = table['5'] = table['6'] = table['7']; 597 table['A'] = table['C'] = table['D'] = table['c'] = table['d'] = table['a']; 598 table['B'] = table['b']; 599 table['E'] = table['e']; 600 table['U'] = table['u']; 601 table['X'] = table['x']; 602 table['P'] = table['p']; 603 table['F'] = table['f']; 604 } 605 606 void 607 alloc_typenames(void) 608 { 609 610 typenames = (const char **)malloc(sizeof(typenames[0]) * 611 (typename_count = 16)); 612 if (typenames == NULL) 613 err(1, NULL); 614 } 615 616 void 617 add_typename(const char *key) 618 { 619 int comparison; 620 const char *copy; 621 622 if (typename_top + 1 >= typename_count) { 623 typenames = realloc((void *)typenames, 624 sizeof(typenames[0]) * (typename_count *= 2)); 625 if (typenames == NULL) 626 err(1, NULL); 627 } 628 if (typename_top == -1) 629 typenames[++typename_top] = copy = strdup(key); 630 else if ((comparison = strcmp(key, typenames[typename_top])) >= 0) { 631 /* take advantage of sorted input */ 632 if (comparison == 0) /* remove duplicates */ 633 return; 634 typenames[++typename_top] = copy = strdup(key); 635 } 636 else { 637 int p; 638 639 for (p = 0; (comparison = strcmp(key, typenames[p])) > 0; p++) 640 /* find place for the new key */; 641 if (comparison == 0) /* remove duplicates */ 642 return; 643 memmove(&typenames[p + 1], &typenames[p], 644 sizeof(typenames[0]) * (++typename_top - p)); 645 typenames[p] = copy = strdup(key); 646 } 647 648 if (copy == NULL) 649 err(1, NULL); 650 } 651