1 /*- 2 * Copyright (c) 1985 Sun Microsystems, Inc. 3 * Copyright (c) 1980, 1993 4 * The Regents of the University of California. All rights reserved. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 3. All advertising materials mentioning features or use of this software 16 * must display the following acknowledgement: 17 * This product includes software developed by the University of 18 * California, Berkeley and its contributors. 19 * 4. Neither the name of the University nor the names of its contributors 20 * may be used to endorse or promote products derived from this software 21 * without specific prior written permission. 22 * 23 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 24 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 25 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 26 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 27 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 28 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 29 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 30 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 31 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 32 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 33 * SUCH DAMAGE. 34 */ 35 36 #if 0 37 #ifndef lint 38 static char sccsid[] = "@(#)lexi.c 8.1 (Berkeley) 6/6/93"; 39 #endif /* not lint */ 40 #endif 41 #include <sys/cdefs.h> 42 __FBSDID("$FreeBSD$"); 43 44 /* 45 * Here we have the token scanner for indent. It scans off one token and puts 46 * it in the global variable "token". It returns a code, indicating the type 47 * of token scanned. 48 */ 49 50 #include <err.h> 51 #include <stdio.h> 52 #include <ctype.h> 53 #include <stdlib.h> 54 #include <string.h> 55 #include "indent_globs.h" 56 #include "indent_codes.h" 57 #include "indent.h" 58 59 #define alphanum 1 60 #define opchar 3 61 62 struct templ { 63 const char *rwd; 64 int rwcode; 65 }; 66 67 /* 68 * This table has to be sorted alphabetically, because it'll be used in binary 69 * search. For the same reason, string must be the first thing in struct templ. 70 */ 71 struct templ specials[] = 72 { 73 {"break", 9}, 74 {"case", 8}, 75 {"char", 4}, 76 {"const", 4}, 77 {"default", 8}, 78 {"do", 6}, 79 {"double", 4}, 80 {"else", 6}, 81 {"enum", 3}, 82 {"extern", 4}, 83 {"float", 4}, 84 {"for", 5}, 85 {"global", 4}, 86 {"goto", 9}, 87 {"if", 5}, 88 {"int", 4}, 89 {"long", 4}, 90 {"offsetof", 1}, 91 {"register", 4}, 92 {"return", 9}, 93 {"short", 4}, 94 {"sizeof", 2}, 95 {"static", 4}, 96 {"struct", 3}, 97 {"switch", 7}, 98 {"typedef", 4}, 99 {"union", 3}, 100 {"unsigned", 4}, 101 {"void", 4}, 102 {"volatile", 4}, 103 {"while", 5} 104 }; 105 106 const char **typenames; 107 int typename_count; 108 int typename_top = -1; 109 110 char chartype[128] = 111 { /* this is used to facilitate the decision of 112 * what type (alphanumeric, operator) each 113 * character is */ 114 0, 0, 0, 0, 0, 0, 0, 0, 115 0, 0, 0, 0, 0, 0, 0, 0, 116 0, 0, 0, 0, 0, 0, 0, 0, 117 0, 0, 0, 0, 0, 0, 0, 0, 118 0, 3, 0, 0, 1, 3, 3, 0, 119 0, 0, 3, 3, 0, 3, 0, 3, 120 1, 1, 1, 1, 1, 1, 1, 1, 121 1, 1, 0, 0, 3, 3, 3, 3, 122 0, 1, 1, 1, 1, 1, 1, 1, 123 1, 1, 1, 1, 1, 1, 1, 1, 124 1, 1, 1, 1, 1, 1, 1, 1, 125 1, 1, 1, 0, 0, 0, 3, 1, 126 0, 1, 1, 1, 1, 1, 1, 1, 127 1, 1, 1, 1, 1, 1, 1, 1, 128 1, 1, 1, 1, 1, 1, 1, 1, 129 1, 1, 1, 0, 3, 0, 3, 0 130 }; 131 132 static int 133 strcmp_type(const void *e1, const void *e2) 134 { 135 return (strcmp(e1, *(const char * const *)e2)); 136 } 137 138 int 139 lexi(void) 140 { 141 int unary_delim; /* this is set to 1 if the current token 142 * forces a following operator to be unary */ 143 static int last_code; /* the last token type returned */ 144 static int l_struct; /* set to 1 if the last token was 'struct' */ 145 int code; /* internal code to be returned */ 146 char qchar; /* the delimiter character for a string */ 147 148 e_token = s_token; /* point to start of place to save token */ 149 unary_delim = false; 150 ps.col_1 = ps.last_nl; /* tell world that this token started in 151 * column 1 iff the last thing scanned was nl */ 152 ps.last_nl = false; 153 154 while (*buf_ptr == ' ' || *buf_ptr == '\t') { /* get rid of blanks */ 155 ps.col_1 = false; /* leading blanks imply token is not in column 156 * 1 */ 157 if (++buf_ptr >= buf_end) 158 fill_buffer(); 159 } 160 161 /* Scan an alphanumeric token */ 162 if (chartype[(int)*buf_ptr] == alphanum || (buf_ptr[0] == '.' && isdigit(buf_ptr[1]))) { 163 /* 164 * we have a character or number 165 */ 166 struct templ *p; 167 168 if (isdigit(*buf_ptr) || (buf_ptr[0] == '.' && isdigit(buf_ptr[1]))) { 169 int seendot = 0, 170 seenexp = 0, 171 seensfx = 0; 172 if (*buf_ptr == '0' && 173 (buf_ptr[1] == 'x' || buf_ptr[1] == 'X')) { 174 *e_token++ = *buf_ptr++; 175 *e_token++ = *buf_ptr++; 176 while (isxdigit(*buf_ptr)) { 177 CHECK_SIZE_TOKEN; 178 *e_token++ = *buf_ptr++; 179 } 180 } 181 else 182 while (1) { 183 if (*buf_ptr == '.') { 184 if (seendot) 185 break; 186 else 187 seendot++; 188 } 189 CHECK_SIZE_TOKEN; 190 *e_token++ = *buf_ptr++; 191 if (!isdigit(*buf_ptr) && *buf_ptr != '.') { 192 if ((*buf_ptr != 'E' && *buf_ptr != 'e') || seenexp) 193 break; 194 else { 195 seenexp++; 196 seendot++; 197 CHECK_SIZE_TOKEN; 198 *e_token++ = *buf_ptr++; 199 if (*buf_ptr == '+' || *buf_ptr == '-') 200 *e_token++ = *buf_ptr++; 201 } 202 } 203 } 204 while (1) { 205 if (!(seensfx & 1) && (*buf_ptr == 'U' || *buf_ptr == 'u')) { 206 CHECK_SIZE_TOKEN; 207 *e_token++ = *buf_ptr++; 208 seensfx |= 1; 209 continue; 210 } 211 if (!(seensfx & 2) && (strchr("fFlL", *buf_ptr) != NULL)) { 212 CHECK_SIZE_TOKEN; 213 if (buf_ptr[1] == buf_ptr[0]) 214 *e_token++ = *buf_ptr++; 215 *e_token++ = *buf_ptr++; 216 seensfx |= 2; 217 continue; 218 } 219 break; 220 } 221 } 222 else 223 while (chartype[(int)*buf_ptr] == alphanum || *buf_ptr == BACKSLASH) { 224 /* fill_buffer() terminates buffer with newline */ 225 if (*buf_ptr == BACKSLASH) { 226 if (*(buf_ptr + 1) == '\n') { 227 buf_ptr += 2; 228 if (buf_ptr >= buf_end) 229 fill_buffer(); 230 } else 231 break; 232 } 233 CHECK_SIZE_TOKEN; 234 /* copy it over */ 235 *e_token++ = *buf_ptr++; 236 if (buf_ptr >= buf_end) 237 fill_buffer(); 238 } 239 *e_token++ = '\0'; 240 241 if (s_token[0] == 'L' && s_token[1] == '\0' && 242 (*buf_ptr == '"' || *buf_ptr == '\'')) 243 return (strpfx); 244 245 while (*buf_ptr == ' ' || *buf_ptr == '\t') { /* get rid of blanks */ 246 if (++buf_ptr >= buf_end) 247 fill_buffer(); 248 } 249 ps.keyword = 0; 250 if (l_struct && !ps.p_l_follow) { 251 /* if last token was 'struct' and we're not 252 * in parentheses, then this token 253 * should be treated as a declaration */ 254 l_struct = false; 255 last_code = ident; 256 ps.last_u_d = true; 257 return (decl); 258 } 259 ps.last_u_d = l_struct; /* Operator after identifier is binary 260 * unless last token was 'struct' */ 261 l_struct = false; 262 last_code = ident; /* Remember that this is the code we will 263 * return */ 264 265 p = bsearch(s_token, 266 specials, 267 sizeof(specials) / sizeof(specials[0]), 268 sizeof(specials[0]), 269 strcmp_type); 270 if (p == NULL) { /* not a special keyword... */ 271 char *u; 272 273 /* ... so maybe a type_t or a typedef */ 274 if ((auto_typedefs && ((u = strrchr(s_token, '_')) != NULL) && 275 strcmp(u, "_t") == 0) || (typename_top >= 0 && 276 bsearch(s_token, typenames, typename_top + 1, 277 sizeof(typenames[0]), strcmp_type))) { 278 ps.keyword = 4; /* a type name */ 279 ps.last_u_d = true; 280 goto found_typename; 281 } 282 } else { /* we have a keyword */ 283 ps.keyword = p->rwcode; 284 ps.last_u_d = true; 285 switch (p->rwcode) { 286 case 7: /* it is a switch */ 287 return (swstmt); 288 case 8: /* a case or default */ 289 return (casestmt); 290 291 case 3: /* a "struct" */ 292 /* 293 * Next time around, we will want to know that we have had a 294 * 'struct' 295 */ 296 l_struct = true; 297 /* FALLTHROUGH */ 298 299 case 4: /* one of the declaration keywords */ 300 found_typename: 301 if (ps.p_l_follow) { 302 /* inside parens: cast, param list, offsetof or sizeof */ 303 ps.cast_mask |= (1 << ps.p_l_follow) & ~ps.not_cast_mask; 304 break; 305 } 306 last_code = decl; 307 return (decl); 308 309 case 5: /* if, while, for */ 310 return (sp_paren); 311 312 case 6: /* do, else */ 313 return (sp_nparen); 314 315 default: /* all others are treated like any other 316 * identifier */ 317 return (ident); 318 } /* end of switch */ 319 } /* end of if (found_it) */ 320 if (*buf_ptr == '(' && ps.tos <= 1 && ps.ind_level == 0) { 321 char *tp = buf_ptr; 322 while (tp < buf_end) 323 if (*tp++ == ')' && (*tp == ';' || *tp == ',')) 324 goto not_proc; 325 strncpy(ps.procname, token, sizeof ps.procname - 1); 326 ps.in_parameter_declaration = 1; 327 rparen_count = 1; 328 not_proc:; 329 } 330 /* 331 * The following hack attempts to guess whether or not the current 332 * token is in fact a declaration keyword -- one that has been 333 * typedefd 334 */ 335 if (((*buf_ptr == '*' && buf_ptr[1] != '=') || isalpha(*buf_ptr) || *buf_ptr == '_') 336 && !ps.p_l_follow 337 && !ps.block_init 338 && (ps.last_token == rparen || ps.last_token == semicolon || 339 ps.last_token == decl || 340 ps.last_token == lbrace || ps.last_token == rbrace)) { 341 ps.keyword = 4; /* a type name */ 342 ps.last_u_d = true; 343 last_code = decl; 344 return decl; 345 } 346 if (last_code == decl) /* if this is a declared variable, then 347 * following sign is unary */ 348 ps.last_u_d = true; /* will make "int a -1" work */ 349 last_code = ident; 350 return (ident); /* the ident is not in the list */ 351 } /* end of procesing for alpanum character */ 352 353 /* Scan a non-alphanumeric token */ 354 355 *e_token++ = *buf_ptr; /* if it is only a one-character token, it is 356 * moved here */ 357 *e_token = '\0'; 358 if (++buf_ptr >= buf_end) 359 fill_buffer(); 360 361 switch (*token) { 362 case '\n': 363 unary_delim = ps.last_u_d; 364 ps.last_nl = true; /* remember that we just had a newline */ 365 code = (had_eof ? 0 : newline); 366 367 /* 368 * if data has been exhausted, the newline is a dummy, and we should 369 * return code to stop 370 */ 371 break; 372 373 case '\'': /* start of quoted character */ 374 case '"': /* start of string */ 375 qchar = *token; 376 if (troff) { 377 e_token[-1] = '`'; 378 if (qchar == '"') 379 *e_token++ = '`'; 380 e_token = chfont(&bodyf, &stringf, e_token); 381 } 382 do { /* copy the string */ 383 while (1) { /* move one character or [/<char>]<char> */ 384 if (*buf_ptr == '\n') { 385 diag2(1, "Unterminated literal"); 386 goto stop_lit; 387 } 388 CHECK_SIZE_TOKEN; /* Only have to do this once in this loop, 389 * since CHECK_SIZE guarantees that there 390 * are at least 5 entries left */ 391 *e_token = *buf_ptr++; 392 if (buf_ptr >= buf_end) 393 fill_buffer(); 394 if (*e_token == BACKSLASH) { /* if escape, copy extra char */ 395 if (*buf_ptr == '\n') /* check for escaped newline */ 396 ++line_no; 397 if (troff) { 398 *++e_token = BACKSLASH; 399 if (*buf_ptr == BACKSLASH) 400 *++e_token = BACKSLASH; 401 } 402 *++e_token = *buf_ptr++; 403 ++e_token; /* we must increment this again because we 404 * copied two chars */ 405 if (buf_ptr >= buf_end) 406 fill_buffer(); 407 } 408 else 409 break; /* we copied one character */ 410 } /* end of while (1) */ 411 } while (*e_token++ != qchar); 412 if (troff) { 413 e_token = chfont(&stringf, &bodyf, e_token - 1); 414 if (qchar == '"') 415 *e_token++ = '\''; 416 } 417 stop_lit: 418 code = ident; 419 break; 420 421 case ('('): 422 case ('['): 423 unary_delim = true; 424 code = lparen; 425 break; 426 427 case (')'): 428 case (']'): 429 code = rparen; 430 break; 431 432 case '#': 433 unary_delim = ps.last_u_d; 434 code = preesc; 435 break; 436 437 case '?': 438 unary_delim = true; 439 code = question; 440 break; 441 442 case (':'): 443 code = colon; 444 unary_delim = true; 445 break; 446 447 case (';'): 448 unary_delim = true; 449 code = semicolon; 450 break; 451 452 case ('{'): 453 unary_delim = true; 454 455 /* 456 * if (ps.in_or_st) ps.block_init = 1; 457 */ 458 /* ? code = ps.block_init ? lparen : lbrace; */ 459 code = lbrace; 460 break; 461 462 case ('}'): 463 unary_delim = true; 464 /* ? code = ps.block_init ? rparen : rbrace; */ 465 code = rbrace; 466 break; 467 468 case 014: /* a form feed */ 469 unary_delim = ps.last_u_d; 470 ps.last_nl = true; /* remember this so we can set 'ps.col_1' 471 * right */ 472 code = form_feed; 473 break; 474 475 case (','): 476 unary_delim = true; 477 code = comma; 478 break; 479 480 case '.': 481 unary_delim = false; 482 code = period; 483 break; 484 485 case '-': 486 case '+': /* check for -, +, --, ++ */ 487 code = (ps.last_u_d ? unary_op : binary_op); 488 unary_delim = true; 489 490 if (*buf_ptr == token[0]) { 491 /* check for doubled character */ 492 *e_token++ = *buf_ptr++; 493 /* buffer overflow will be checked at end of loop */ 494 if (last_code == ident || last_code == rparen) { 495 code = (ps.last_u_d ? unary_op : postop); 496 /* check for following ++ or -- */ 497 unary_delim = false; 498 } 499 } 500 else if (*buf_ptr == '=') 501 /* check for operator += */ 502 *e_token++ = *buf_ptr++; 503 else if (*buf_ptr == '>') { 504 /* check for operator -> */ 505 *e_token++ = *buf_ptr++; 506 if (!pointer_as_binop) { 507 unary_delim = false; 508 code = unary_op; 509 ps.want_blank = false; 510 } 511 } 512 break; /* buffer overflow will be checked at end of 513 * switch */ 514 515 case '=': 516 if (ps.in_or_st) 517 ps.block_init = 1; 518 #ifdef undef 519 if (chartype[*buf_ptr] == opchar) { /* we have two char assignment */ 520 e_token[-1] = *buf_ptr++; 521 if ((e_token[-1] == '<' || e_token[-1] == '>') && e_token[-1] == *buf_ptr) 522 *e_token++ = *buf_ptr++; 523 *e_token++ = '='; /* Flip =+ to += */ 524 *e_token = 0; 525 } 526 #else 527 if (*buf_ptr == '=') {/* == */ 528 *e_token++ = '='; /* Flip =+ to += */ 529 buf_ptr++; 530 *e_token = 0; 531 } 532 #endif 533 code = binary_op; 534 unary_delim = true; 535 break; 536 /* can drop thru!!! */ 537 538 case '>': 539 case '<': 540 case '!': /* ops like <, <<, <=, !=, etc */ 541 if (*buf_ptr == '>' || *buf_ptr == '<' || *buf_ptr == '=') { 542 *e_token++ = *buf_ptr; 543 if (++buf_ptr >= buf_end) 544 fill_buffer(); 545 } 546 if (*buf_ptr == '=') 547 *e_token++ = *buf_ptr++; 548 code = (ps.last_u_d ? unary_op : binary_op); 549 unary_delim = true; 550 break; 551 552 default: 553 if (token[0] == '/' && *buf_ptr == '*') { 554 /* it is start of comment */ 555 *e_token++ = '*'; 556 557 if (++buf_ptr >= buf_end) 558 fill_buffer(); 559 560 code = comment; 561 unary_delim = ps.last_u_d; 562 break; 563 } 564 while (*(e_token - 1) == *buf_ptr || *buf_ptr == '=') { 565 /* 566 * handle ||, &&, etc, and also things as in int *****i 567 */ 568 *e_token++ = *buf_ptr; 569 if (++buf_ptr >= buf_end) 570 fill_buffer(); 571 } 572 code = (ps.last_u_d ? unary_op : binary_op); 573 unary_delim = true; 574 575 576 } /* end of switch */ 577 if (code != newline) { 578 l_struct = false; 579 last_code = code; 580 } 581 if (buf_ptr >= buf_end) /* check for input buffer empty */ 582 fill_buffer(); 583 ps.last_u_d = unary_delim; 584 *e_token = '\0'; /* null terminate the token */ 585 return (code); 586 } 587 588 void 589 alloc_typenames(void) 590 { 591 592 typenames = (const char **)malloc(sizeof(typenames[0]) * 593 (typename_count = 16)); 594 if (typenames == NULL) 595 err(1, NULL); 596 } 597 598 void 599 add_typename(const char *key) 600 { 601 int comparison; 602 const char *copy; 603 604 if (typename_top + 1 >= typename_count) { 605 typenames = realloc((void *)typenames, 606 sizeof(typenames[0]) * (typename_count *= 2)); 607 if (typenames == NULL) 608 err(1, NULL); 609 } 610 if (typename_top == -1) 611 typenames[++typename_top] = copy = strdup(key); 612 else if ((comparison = strcmp(key, typenames[typename_top])) >= 0) { 613 /* take advantage of sorted input */ 614 if (comparison == 0) /* remove duplicates */ 615 return; 616 typenames[++typename_top] = copy = strdup(key); 617 } 618 else { 619 int p; 620 621 for (p = 0; (comparison = strcmp(key, typenames[p])) > 0; p++) 622 /* find place for the new key */; 623 if (comparison == 0) /* remove duplicates */ 624 return; 625 memmove(&typenames[p + 1], &typenames[p], 626 sizeof(typenames[0]) * (++typename_top - p)); 627 typenames[p] = copy = strdup(key); 628 } 629 630 if (copy == NULL) 631 err(1, NULL); 632 } 633