1 /*- 2 * Copyright (c) 1985 Sun Microsystems, Inc. 3 * Copyright (c) 1980, 1993 4 * The Regents of the University of California. All rights reserved. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 3. All advertising materials mentioning features or use of this software 16 * must display the following acknowledgement: 17 * This product includes software developed by the University of 18 * California, Berkeley and its contributors. 19 * 4. Neither the name of the University nor the names of its contributors 20 * may be used to endorse or promote products derived from this software 21 * without specific prior written permission. 22 * 23 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 24 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 25 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 26 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 27 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 28 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 29 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 30 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 31 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 32 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 33 * SUCH DAMAGE. 34 */ 35 36 #if 0 37 #ifndef lint 38 static char sccsid[] = "@(#)lexi.c 8.1 (Berkeley) 6/6/93"; 39 #endif /* not lint */ 40 #endif 41 #include <sys/cdefs.h> 42 __FBSDID("$FreeBSD$"); 43 44 /* 45 * Here we have the token scanner for indent. It scans off one token and puts 46 * it in the global variable "token". It returns a code, indicating the type 47 * of token scanned. 48 */ 49 50 #include <err.h> 51 #include <stdio.h> 52 #include <ctype.h> 53 #include <stdlib.h> 54 #include <string.h> 55 #include "indent_globs.h" 56 #include "indent_codes.h" 57 #include "indent.h" 58 59 #define alphanum 1 60 #define opchar 3 61 62 struct templ { 63 const char *rwd; 64 int rwcode; 65 }; 66 67 /* 68 * This table has to be sorted alphabetically, because it'll be used in binary 69 * search. For the same reason, string must be the first thing in struct templ. 70 */ 71 struct templ specials[] = 72 { 73 {"break", 9}, 74 {"case", 8}, 75 {"char", 4}, 76 {"const", 4}, 77 {"default", 8}, 78 {"do", 6}, 79 {"double", 4}, 80 {"else", 6}, 81 {"enum", 3}, 82 {"extern", 4}, 83 {"float", 4}, 84 {"for", 5}, 85 {"global", 4}, 86 {"goto", 9}, 87 {"if", 5}, 88 {"int", 4}, 89 {"long", 4}, 90 {"offsetof", 1}, 91 {"register", 4}, 92 {"return", 9}, 93 {"short", 4}, 94 {"sizeof", 2}, 95 {"static", 4}, 96 {"struct", 3}, 97 {"switch", 7}, 98 {"typedef", 4}, 99 {"union", 3}, 100 {"unsigned", 4}, 101 {"void", 4}, 102 {"volatile", 4}, 103 {"while", 5} 104 }; 105 106 const char **typenames; 107 int typename_count; 108 int typename_top = -1; 109 110 char chartype[128] = 111 { /* this is used to facilitate the decision of 112 * what type (alphanumeric, operator) each 113 * character is */ 114 0, 0, 0, 0, 0, 0, 0, 0, 115 0, 0, 0, 0, 0, 0, 0, 0, 116 0, 0, 0, 0, 0, 0, 0, 0, 117 0, 0, 0, 0, 0, 0, 0, 0, 118 0, 3, 0, 0, 1, 3, 3, 0, 119 0, 0, 3, 3, 0, 3, 0, 3, 120 1, 1, 1, 1, 1, 1, 1, 1, 121 1, 1, 0, 0, 3, 3, 3, 3, 122 0, 1, 1, 1, 1, 1, 1, 1, 123 1, 1, 1, 1, 1, 1, 1, 1, 124 1, 1, 1, 1, 1, 1, 1, 1, 125 1, 1, 1, 0, 0, 0, 3, 1, 126 0, 1, 1, 1, 1, 1, 1, 1, 127 1, 1, 1, 1, 1, 1, 1, 1, 128 1, 1, 1, 1, 1, 1, 1, 1, 129 1, 1, 1, 0, 3, 0, 3, 0 130 }; 131 132 static int 133 strcmp_type(const void *e1, const void *e2) 134 { 135 return (strcmp(e1, *(const char * const *)e2)); 136 } 137 138 int 139 lexi(void) 140 { 141 int unary_delim; /* this is set to 1 if the current token 142 * forces a following operator to be unary */ 143 static int last_code; /* the last token type returned */ 144 static int l_struct; /* set to 1 if the last token was 'struct' */ 145 int code; /* internal code to be returned */ 146 char qchar; /* the delimiter character for a string */ 147 148 e_token = s_token; /* point to start of place to save token */ 149 unary_delim = false; 150 ps.col_1 = ps.last_nl; /* tell world that this token started in 151 * column 1 iff the last thing scanned was nl */ 152 ps.last_nl = false; 153 154 while (*buf_ptr == ' ' || *buf_ptr == '\t') { /* get rid of blanks */ 155 ps.col_1 = false; /* leading blanks imply token is not in column 156 * 1 */ 157 if (++buf_ptr >= buf_end) 158 fill_buffer(); 159 } 160 161 /* Scan an alphanumeric token */ 162 if (chartype[(int)*buf_ptr] == alphanum || (buf_ptr[0] == '.' && isdigit(buf_ptr[1]))) { 163 /* 164 * we have a character or number 165 */ 166 struct templ *p; 167 168 if (isdigit(*buf_ptr) || (buf_ptr[0] == '.' && isdigit(buf_ptr[1]))) { 169 int seendot = 0, 170 seenexp = 0, 171 seensfx = 0; 172 if (*buf_ptr == '0' && 173 (buf_ptr[1] == 'x' || buf_ptr[1] == 'X')) { 174 *e_token++ = *buf_ptr++; 175 *e_token++ = *buf_ptr++; 176 while (isxdigit(*buf_ptr)) { 177 CHECK_SIZE_TOKEN; 178 *e_token++ = *buf_ptr++; 179 } 180 } 181 else 182 while (1) { 183 if (*buf_ptr == '.') { 184 if (seendot) 185 break; 186 else 187 seendot++; 188 } 189 CHECK_SIZE_TOKEN; 190 *e_token++ = *buf_ptr++; 191 if (!isdigit(*buf_ptr) && *buf_ptr != '.') { 192 if ((*buf_ptr != 'E' && *buf_ptr != 'e') || seenexp) 193 break; 194 else { 195 seenexp++; 196 seendot++; 197 CHECK_SIZE_TOKEN; 198 *e_token++ = *buf_ptr++; 199 if (*buf_ptr == '+' || *buf_ptr == '-') 200 *e_token++ = *buf_ptr++; 201 } 202 } 203 } 204 while (1) { 205 if (!(seensfx & 1) && (*buf_ptr == 'U' || *buf_ptr == 'u')) { 206 CHECK_SIZE_TOKEN; 207 *e_token++ = *buf_ptr++; 208 seensfx |= 1; 209 continue; 210 } 211 if (!(seensfx & 2) && (strchr("fFlL", *buf_ptr) != NULL)) { 212 CHECK_SIZE_TOKEN; 213 if (buf_ptr[1] == buf_ptr[0]) 214 *e_token++ = *buf_ptr++; 215 *e_token++ = *buf_ptr++; 216 seensfx |= 2; 217 continue; 218 } 219 break; 220 } 221 } 222 else 223 while (chartype[(int)*buf_ptr] == alphanum || *buf_ptr == BACKSLASH) { 224 /* fill_buffer() terminates buffer with newline */ 225 if (*buf_ptr == BACKSLASH) { 226 if (*(buf_ptr + 1) == '\n') { 227 buf_ptr += 2; 228 if (buf_ptr >= buf_end) 229 fill_buffer(); 230 } else 231 break; 232 } 233 CHECK_SIZE_TOKEN; 234 /* copy it over */ 235 *e_token++ = *buf_ptr++; 236 if (buf_ptr >= buf_end) 237 fill_buffer(); 238 } 239 *e_token++ = '\0'; 240 while (*buf_ptr == ' ' || *buf_ptr == '\t') { /* get rid of blanks */ 241 if (++buf_ptr >= buf_end) 242 fill_buffer(); 243 } 244 ps.keyword = 0; 245 if (l_struct && !ps.p_l_follow) { 246 /* if last token was 'struct' and we're not 247 * in parentheses, then this token 248 * should be treated as a declaration */ 249 l_struct = false; 250 last_code = ident; 251 ps.last_u_d = true; 252 return (decl); 253 } 254 ps.last_u_d = l_struct; /* Operator after identifier is binary 255 * unless last token was 'struct' */ 256 l_struct = false; 257 last_code = ident; /* Remember that this is the code we will 258 * return */ 259 260 p = bsearch(s_token, 261 specials, 262 sizeof(specials) / sizeof(specials[0]), 263 sizeof(specials[0]), 264 strcmp_type); 265 if (p == NULL) { /* not a special keyword... */ 266 char *u; 267 268 /* ... so maybe a type_t or a typedef */ 269 if ((auto_typedefs && ((u = strrchr(s_token, '_')) != NULL) && 270 strcmp(u, "_t") == 0) || (typename_top >= 0 && 271 bsearch(s_token, typenames, typename_top + 1, 272 sizeof(typenames[0]), strcmp_type))) { 273 ps.keyword = 4; /* a type name */ 274 ps.last_u_d = true; 275 goto found_typename; 276 } 277 } else { /* we have a keyword */ 278 ps.keyword = p->rwcode; 279 ps.last_u_d = true; 280 switch (p->rwcode) { 281 case 7: /* it is a switch */ 282 return (swstmt); 283 case 8: /* a case or default */ 284 return (casestmt); 285 286 case 3: /* a "struct" */ 287 /* 288 * Next time around, we will want to know that we have had a 289 * 'struct' 290 */ 291 l_struct = true; 292 /* FALLTHROUGH */ 293 294 case 4: /* one of the declaration keywords */ 295 found_typename: 296 if (ps.p_l_follow) { 297 /* inside parens: cast, param list, offsetof or sizeof */ 298 ps.cast_mask |= (1 << ps.p_l_follow) & ~ps.not_cast_mask; 299 break; 300 } 301 last_code = decl; 302 return (decl); 303 304 case 5: /* if, while, for */ 305 return (sp_paren); 306 307 case 6: /* do, else */ 308 return (sp_nparen); 309 310 default: /* all others are treated like any other 311 * identifier */ 312 return (ident); 313 } /* end of switch */ 314 } /* end of if (found_it) */ 315 if (*buf_ptr == '(' && ps.tos <= 1 && ps.ind_level == 0) { 316 char *tp = buf_ptr; 317 while (tp < buf_end) 318 if (*tp++ == ')' && (*tp == ';' || *tp == ',')) 319 goto not_proc; 320 strncpy(ps.procname, token, sizeof ps.procname - 1); 321 ps.in_parameter_declaration = 1; 322 rparen_count = 1; 323 not_proc:; 324 } 325 /* 326 * The following hack attempts to guess whether or not the current 327 * token is in fact a declaration keyword -- one that has been 328 * typedefd 329 */ 330 if (((*buf_ptr == '*' && buf_ptr[1] != '=') || isalpha(*buf_ptr) || *buf_ptr == '_') 331 && !ps.p_l_follow 332 && !ps.block_init 333 && (ps.last_token == rparen || ps.last_token == semicolon || 334 ps.last_token == decl || 335 ps.last_token == lbrace || ps.last_token == rbrace)) { 336 ps.keyword = 4; /* a type name */ 337 ps.last_u_d = true; 338 last_code = decl; 339 return decl; 340 } 341 if (last_code == decl) /* if this is a declared variable, then 342 * following sign is unary */ 343 ps.last_u_d = true; /* will make "int a -1" work */ 344 last_code = ident; 345 return (ident); /* the ident is not in the list */ 346 } /* end of procesing for alpanum character */ 347 348 /* Scan a non-alphanumeric token */ 349 350 *e_token++ = *buf_ptr; /* if it is only a one-character token, it is 351 * moved here */ 352 *e_token = '\0'; 353 if (++buf_ptr >= buf_end) 354 fill_buffer(); 355 356 switch (*token) { 357 case '\n': 358 unary_delim = ps.last_u_d; 359 ps.last_nl = true; /* remember that we just had a newline */ 360 code = (had_eof ? 0 : newline); 361 362 /* 363 * if data has been exhausted, the newline is a dummy, and we should 364 * return code to stop 365 */ 366 break; 367 368 case '\'': /* start of quoted character */ 369 case '"': /* start of string */ 370 qchar = *token; 371 if (troff) { 372 e_token[-1] = '`'; 373 if (qchar == '"') 374 *e_token++ = '`'; 375 e_token = chfont(&bodyf, &stringf, e_token); 376 } 377 do { /* copy the string */ 378 while (1) { /* move one character or [/<char>]<char> */ 379 if (*buf_ptr == '\n') { 380 diag2(1, "Unterminated literal"); 381 goto stop_lit; 382 } 383 CHECK_SIZE_TOKEN; /* Only have to do this once in this loop, 384 * since CHECK_SIZE guarantees that there 385 * are at least 5 entries left */ 386 *e_token = *buf_ptr++; 387 if (buf_ptr >= buf_end) 388 fill_buffer(); 389 if (*e_token == BACKSLASH) { /* if escape, copy extra char */ 390 if (*buf_ptr == '\n') /* check for escaped newline */ 391 ++line_no; 392 if (troff) { 393 *++e_token = BACKSLASH; 394 if (*buf_ptr == BACKSLASH) 395 *++e_token = BACKSLASH; 396 } 397 *++e_token = *buf_ptr++; 398 ++e_token; /* we must increment this again because we 399 * copied two chars */ 400 if (buf_ptr >= buf_end) 401 fill_buffer(); 402 } 403 else 404 break; /* we copied one character */ 405 } /* end of while (1) */ 406 } while (*e_token++ != qchar); 407 if (troff) { 408 e_token = chfont(&stringf, &bodyf, e_token - 1); 409 if (qchar == '"') 410 *e_token++ = '\''; 411 } 412 stop_lit: 413 code = ident; 414 break; 415 416 case ('('): 417 case ('['): 418 unary_delim = true; 419 code = lparen; 420 break; 421 422 case (')'): 423 case (']'): 424 code = rparen; 425 break; 426 427 case '#': 428 unary_delim = ps.last_u_d; 429 code = preesc; 430 break; 431 432 case '?': 433 unary_delim = true; 434 code = question; 435 break; 436 437 case (':'): 438 code = colon; 439 unary_delim = true; 440 break; 441 442 case (';'): 443 unary_delim = true; 444 code = semicolon; 445 break; 446 447 case ('{'): 448 unary_delim = true; 449 450 /* 451 * if (ps.in_or_st) ps.block_init = 1; 452 */ 453 /* ? code = ps.block_init ? lparen : lbrace; */ 454 code = lbrace; 455 break; 456 457 case ('}'): 458 unary_delim = true; 459 /* ? code = ps.block_init ? rparen : rbrace; */ 460 code = rbrace; 461 break; 462 463 case 014: /* a form feed */ 464 unary_delim = ps.last_u_d; 465 ps.last_nl = true; /* remember this so we can set 'ps.col_1' 466 * right */ 467 code = form_feed; 468 break; 469 470 case (','): 471 unary_delim = true; 472 code = comma; 473 break; 474 475 case '.': 476 unary_delim = false; 477 code = period; 478 break; 479 480 case '-': 481 case '+': /* check for -, +, --, ++ */ 482 code = (ps.last_u_d ? unary_op : binary_op); 483 unary_delim = true; 484 485 if (*buf_ptr == token[0]) { 486 /* check for doubled character */ 487 *e_token++ = *buf_ptr++; 488 /* buffer overflow will be checked at end of loop */ 489 if (last_code == ident || last_code == rparen) { 490 code = (ps.last_u_d ? unary_op : postop); 491 /* check for following ++ or -- */ 492 unary_delim = false; 493 } 494 } 495 else if (*buf_ptr == '=') 496 /* check for operator += */ 497 *e_token++ = *buf_ptr++; 498 else if (*buf_ptr == '>') { 499 /* check for operator -> */ 500 *e_token++ = *buf_ptr++; 501 if (!pointer_as_binop) { 502 unary_delim = false; 503 code = unary_op; 504 ps.want_blank = false; 505 } 506 } 507 break; /* buffer overflow will be checked at end of 508 * switch */ 509 510 case '=': 511 if (ps.in_or_st) 512 ps.block_init = 1; 513 #ifdef undef 514 if (chartype[*buf_ptr] == opchar) { /* we have two char assignment */ 515 e_token[-1] = *buf_ptr++; 516 if ((e_token[-1] == '<' || e_token[-1] == '>') && e_token[-1] == *buf_ptr) 517 *e_token++ = *buf_ptr++; 518 *e_token++ = '='; /* Flip =+ to += */ 519 *e_token = 0; 520 } 521 #else 522 if (*buf_ptr == '=') {/* == */ 523 *e_token++ = '='; /* Flip =+ to += */ 524 buf_ptr++; 525 *e_token = 0; 526 } 527 #endif 528 code = binary_op; 529 unary_delim = true; 530 break; 531 /* can drop thru!!! */ 532 533 case '>': 534 case '<': 535 case '!': /* ops like <, <<, <=, !=, etc */ 536 if (*buf_ptr == '>' || *buf_ptr == '<' || *buf_ptr == '=') { 537 *e_token++ = *buf_ptr; 538 if (++buf_ptr >= buf_end) 539 fill_buffer(); 540 } 541 if (*buf_ptr == '=') 542 *e_token++ = *buf_ptr++; 543 code = (ps.last_u_d ? unary_op : binary_op); 544 unary_delim = true; 545 break; 546 547 default: 548 if (token[0] == '/' && *buf_ptr == '*') { 549 /* it is start of comment */ 550 *e_token++ = '*'; 551 552 if (++buf_ptr >= buf_end) 553 fill_buffer(); 554 555 code = comment; 556 unary_delim = ps.last_u_d; 557 break; 558 } 559 while (*(e_token - 1) == *buf_ptr || *buf_ptr == '=') { 560 /* 561 * handle ||, &&, etc, and also things as in int *****i 562 */ 563 *e_token++ = *buf_ptr; 564 if (++buf_ptr >= buf_end) 565 fill_buffer(); 566 } 567 code = (ps.last_u_d ? unary_op : binary_op); 568 unary_delim = true; 569 570 571 } /* end of switch */ 572 if (code != newline) { 573 l_struct = false; 574 last_code = code; 575 } 576 if (buf_ptr >= buf_end) /* check for input buffer empty */ 577 fill_buffer(); 578 ps.last_u_d = unary_delim; 579 *e_token = '\0'; /* null terminate the token */ 580 return (code); 581 } 582 583 void 584 alloc_typenames(void) 585 { 586 587 typenames = (const char **)malloc(sizeof(typenames[0]) * 588 (typename_count = 16)); 589 if (typenames == NULL) 590 err(1, NULL); 591 } 592 593 void 594 add_typename(const char *key) 595 { 596 int comparison; 597 const char *copy; 598 599 if (typename_top + 1 >= typename_count) { 600 typenames = realloc((void *)typenames, 601 sizeof(typenames[0]) * (typename_count *= 2)); 602 if (typenames == NULL) 603 err(1, NULL); 604 } 605 if (typename_top == -1) 606 typenames[++typename_top] = copy = strdup(key); 607 else if ((comparison = strcmp(key, typenames[typename_top])) >= 0) { 608 /* take advantage of sorted input */ 609 if (comparison == 0) /* remove duplicates */ 610 return; 611 typenames[++typename_top] = copy = strdup(key); 612 } 613 else { 614 int p; 615 616 for (p = 0; (comparison = strcmp(key, typenames[p])) > 0; p++) 617 /* find place for the new key */; 618 if (comparison == 0) /* remove duplicates */ 619 return; 620 memmove(&typenames[p + 1], &typenames[p], 621 sizeof(typenames[0]) * (++typename_top - p)); 622 typenames[p] = copy = strdup(key); 623 } 624 625 if (copy == NULL) 626 err(1, NULL); 627 } 628