1 /* 2 * Copyright (c) 1985 Sun Microsystems, Inc. 3 * Copyright (c) 1980, 1993 4 * The Regents of the University of California. All rights reserved. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 3. All advertising materials mentioning features or use of this software 16 * must display the following acknowledgement: 17 * This product includes software developed by the University of 18 * California, Berkeley and its contributors. 19 * 4. Neither the name of the University nor the names of its contributors 20 * may be used to endorse or promote products derived from this software 21 * without specific prior written permission. 22 * 23 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 24 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 25 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 26 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 27 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 28 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 29 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 30 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 31 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 32 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 33 * SUCH DAMAGE. 34 */ 35 36 #if 0 37 #ifndef lint 38 static char sccsid[] = "@(#)lexi.c 8.1 (Berkeley) 6/6/93"; 39 #endif /* not lint */ 40 #endif 41 #include <sys/cdefs.h> 42 __FBSDID("$FreeBSD$"); 43 44 /* 45 * Here we have the token scanner for indent. It scans off one token and puts 46 * it in the global variable "token". It returns a code, indicating the type 47 * of token scanned. 48 */ 49 50 #include <err.h> 51 #include <stdio.h> 52 #include <ctype.h> 53 #include <stdlib.h> 54 #include <string.h> 55 #include "indent_globs.h" 56 #include "indent_codes.h" 57 #include "indent.h" 58 59 #define alphanum 1 60 #define opchar 3 61 62 struct templ { 63 const char *rwd; 64 int rwcode; 65 }; 66 67 struct templ specials[1000] = 68 { 69 {"switch", 1}, 70 {"case", 2}, 71 {"break", 0}, 72 {"struct", 3}, 73 {"union", 3}, 74 {"enum", 3}, 75 {"default", 2}, 76 {"int", 4}, 77 {"char", 4}, 78 {"float", 4}, 79 {"double", 4}, 80 {"long", 4}, 81 {"short", 4}, 82 {"typdef", 4}, 83 {"unsigned", 4}, 84 {"register", 4}, 85 {"static", 4}, 86 {"global", 4}, 87 {"extern", 4}, 88 {"void", 4}, 89 {"const", 4}, 90 {"volatile", 4}, 91 {"goto", 0}, 92 {"return", 0}, 93 {"if", 5}, 94 {"while", 5}, 95 {"for", 5}, 96 {"else", 6}, 97 {"do", 6}, 98 {"sizeof", 7}, 99 {0, 0} 100 }; 101 102 char chartype[128] = 103 { /* this is used to facilitate the decision of 104 * what type (alphanumeric, operator) each 105 * character is */ 106 0, 0, 0, 0, 0, 0, 0, 0, 107 0, 0, 0, 0, 0, 0, 0, 0, 108 0, 0, 0, 0, 0, 0, 0, 0, 109 0, 0, 0, 0, 0, 0, 0, 0, 110 0, 3, 0, 0, 1, 3, 3, 0, 111 0, 0, 3, 3, 0, 3, 0, 3, 112 1, 1, 1, 1, 1, 1, 1, 1, 113 1, 1, 0, 0, 3, 3, 3, 3, 114 0, 1, 1, 1, 1, 1, 1, 1, 115 1, 1, 1, 1, 1, 1, 1, 1, 116 1, 1, 1, 1, 1, 1, 1, 1, 117 1, 1, 1, 0, 0, 0, 3, 1, 118 0, 1, 1, 1, 1, 1, 1, 1, 119 1, 1, 1, 1, 1, 1, 1, 1, 120 1, 1, 1, 1, 1, 1, 1, 1, 121 1, 1, 1, 0, 3, 0, 3, 0 122 }; 123 124 int 125 lexi(void) 126 { 127 int unary_delim; /* this is set to 1 if the current token 128 * forces a following operator to be unary */ 129 static int last_code; /* the last token type returned */ 130 static int l_struct; /* set to 1 if the last token was 'struct' */ 131 int code; /* internal code to be returned */ 132 char qchar; /* the delimiter character for a string */ 133 134 e_token = s_token; /* point to start of place to save token */ 135 unary_delim = false; 136 ps.col_1 = ps.last_nl; /* tell world that this token started in 137 * column 1 iff the last thing scanned was nl */ 138 ps.last_nl = false; 139 140 while (*buf_ptr == ' ' || *buf_ptr == '\t') { /* get rid of blanks */ 141 ps.col_1 = false; /* leading blanks imply token is not in column 142 * 1 */ 143 if (++buf_ptr >= buf_end) 144 fill_buffer(); 145 } 146 147 /* Scan an alphanumeric token */ 148 if (chartype[(int)*buf_ptr] == alphanum || (buf_ptr[0] == '.' && isdigit(buf_ptr[1]))) { 149 /* 150 * we have a character or number 151 */ 152 const char *j; /* used for searching thru list of 153 * 154 * reserved words */ 155 struct templ *p; 156 157 if (isdigit(*buf_ptr) || (buf_ptr[0] == '.' && isdigit(buf_ptr[1]))) { 158 int seendot = 0, 159 seenexp = 0, 160 seensfx = 0; 161 if (*buf_ptr == '0' && 162 (buf_ptr[1] == 'x' || buf_ptr[1] == 'X')) { 163 *e_token++ = *buf_ptr++; 164 *e_token++ = *buf_ptr++; 165 while (isxdigit(*buf_ptr)) { 166 CHECK_SIZE_TOKEN; 167 *e_token++ = *buf_ptr++; 168 } 169 } 170 else 171 while (1) { 172 if (*buf_ptr == '.') { 173 if (seendot) 174 break; 175 else 176 seendot++; 177 } 178 CHECK_SIZE_TOKEN; 179 *e_token++ = *buf_ptr++; 180 if (!isdigit(*buf_ptr) && *buf_ptr != '.') { 181 if ((*buf_ptr != 'E' && *buf_ptr != 'e') || seenexp) 182 break; 183 else { 184 seenexp++; 185 seendot++; 186 CHECK_SIZE_TOKEN; 187 *e_token++ = *buf_ptr++; 188 if (*buf_ptr == '+' || *buf_ptr == '-') 189 *e_token++ = *buf_ptr++; 190 } 191 } 192 } 193 while (1) { 194 if (!(seensfx & 1) && 195 (*buf_ptr == 'U' || *buf_ptr == 'u')) { 196 CHECK_SIZE_TOKEN; 197 *e_token++ = *buf_ptr++; 198 seensfx |= 1; 199 continue; 200 } 201 if (!(seensfx & 2) && 202 (*buf_ptr == 'L' || *buf_ptr == 'l')) { 203 CHECK_SIZE_TOKEN; 204 if (buf_ptr[1] == buf_ptr[0]) 205 *e_token++ = *buf_ptr++; 206 *e_token++ = *buf_ptr++; 207 seensfx |= 2; 208 continue; 209 } 210 break; 211 } 212 } 213 else 214 while (chartype[(int)*buf_ptr] == alphanum || *buf_ptr == BACKSLASH) { 215 /* fill_buffer() terminates buffer with newline */ 216 if (*buf_ptr == BACKSLASH) { 217 if (*(buf_ptr + 1) == '\n') { 218 buf_ptr += 2; 219 if (buf_ptr >= buf_end) 220 fill_buffer(); 221 } else 222 break; 223 } 224 CHECK_SIZE_TOKEN; 225 /* copy it over */ 226 *e_token++ = *buf_ptr++; 227 if (buf_ptr >= buf_end) 228 fill_buffer(); 229 } 230 *e_token++ = '\0'; 231 while (*buf_ptr == ' ' || *buf_ptr == '\t') { /* get rid of blanks */ 232 if (++buf_ptr >= buf_end) 233 fill_buffer(); 234 } 235 ps.its_a_keyword = false; 236 ps.sizeof_keyword = false; 237 if (l_struct && !ps.p_l_follow) { 238 /* if last token was 'struct' and we're not 239 * in parentheses, then this token 240 * should be treated as a declaration */ 241 l_struct = false; 242 last_code = ident; 243 ps.last_u_d = true; 244 return (decl); 245 } 246 ps.last_u_d = l_struct; /* Operator after identifier is binary 247 * unless last token was 'struct' */ 248 l_struct = false; 249 last_code = ident; /* Remember that this is the code we will 250 * return */ 251 252 if (auto_typedefs) { 253 const char *q = s_token; 254 size_t q_len = strlen(q); 255 /* Check if we have an "_t" in the end */ 256 if (q_len > 2 && 257 (strcmp(q + q_len - 2, "_t") == 0)) { 258 ps.its_a_keyword = true; 259 ps.last_u_d = true; 260 goto found_auto_typedef; 261 } 262 } 263 264 /* 265 * This loop will check if the token is a keyword. 266 */ 267 for (p = specials; (j = p->rwd) != 0; p++) { 268 const char *q = s_token; /* point at scanned token */ 269 if (*j++ != *q++ || *j++ != *q++) 270 continue; /* This test depends on the fact that 271 * identifiers are always at least 1 character 272 * long (ie. the first two bytes of the 273 * identifier are always meaningful) */ 274 if (q[-1] == 0) 275 break; /* If its a one-character identifier */ 276 while (*q++ == *j) 277 if (*j++ == 0) 278 goto found_keyword; /* I wish that C had a multi-level 279 * break... */ 280 } 281 if (p->rwd) { /* we have a keyword */ 282 found_keyword: 283 ps.its_a_keyword = true; 284 ps.last_u_d = true; 285 switch (p->rwcode) { 286 case 1: /* it is a switch */ 287 return (swstmt); 288 case 2: /* a case or default */ 289 return (casestmt); 290 291 case 3: /* a "struct" */ 292 /* 293 * Next time around, we will want to know that we have had a 294 * 'struct' 295 */ 296 l_struct = true; 297 /* FALLTHROUGH */ 298 299 case 4: /* one of the declaration keywords */ 300 found_auto_typedef: 301 if (ps.p_l_follow) { 302 ps.cast_mask |= (1 << ps.p_l_follow) & ~ps.sizeof_mask; 303 break; /* inside parens: cast, param list or sizeof */ 304 } 305 last_code = decl; 306 return (decl); 307 308 case 5: /* if, while, for */ 309 return (sp_paren); 310 311 case 6: /* do, else */ 312 return (sp_nparen); 313 314 case 7: 315 ps.sizeof_keyword = true; 316 default: /* all others are treated like any other 317 * identifier */ 318 return (ident); 319 } /* end of switch */ 320 } /* end of if (found_it) */ 321 if (*buf_ptr == '(' && ps.tos <= 1 && ps.ind_level == 0) { 322 char *tp = buf_ptr; 323 while (tp < buf_end) 324 if (*tp++ == ')' && (*tp == ';' || *tp == ',')) 325 goto not_proc; 326 strncpy(ps.procname, token, sizeof ps.procname - 1); 327 ps.in_parameter_declaration = 1; 328 rparen_count = 1; 329 not_proc:; 330 } 331 /* 332 * The following hack attempts to guess whether or not the current 333 * token is in fact a declaration keyword -- one that has been 334 * typedefd 335 */ 336 if (((*buf_ptr == '*' && buf_ptr[1] != '=') || isalpha(*buf_ptr) || *buf_ptr == '_') 337 && !ps.p_l_follow 338 && !ps.block_init 339 && (ps.last_token == rparen || ps.last_token == semicolon || 340 ps.last_token == decl || 341 ps.last_token == lbrace || ps.last_token == rbrace)) { 342 ps.its_a_keyword = true; 343 ps.last_u_d = true; 344 last_code = decl; 345 return decl; 346 } 347 if (last_code == decl) /* if this is a declared variable, then 348 * following sign is unary */ 349 ps.last_u_d = true; /* will make "int a -1" work */ 350 last_code = ident; 351 return (ident); /* the ident is not in the list */ 352 } /* end of procesing for alpanum character */ 353 354 /* Scan a non-alphanumeric token */ 355 356 *e_token++ = *buf_ptr; /* if it is only a one-character token, it is 357 * moved here */ 358 *e_token = '\0'; 359 if (++buf_ptr >= buf_end) 360 fill_buffer(); 361 362 switch (*token) { 363 case '\n': 364 unary_delim = ps.last_u_d; 365 ps.last_nl = true; /* remember that we just had a newline */ 366 code = (had_eof ? 0 : newline); 367 368 /* 369 * if data has been exhausted, the newline is a dummy, and we should 370 * return code to stop 371 */ 372 break; 373 374 case '\'': /* start of quoted character */ 375 case '"': /* start of string */ 376 qchar = *token; 377 if (troff) { 378 e_token[-1] = '`'; 379 if (qchar == '"') 380 *e_token++ = '`'; 381 e_token = chfont(&bodyf, &stringf, e_token); 382 } 383 do { /* copy the string */ 384 while (1) { /* move one character or [/<char>]<char> */ 385 if (*buf_ptr == '\n') { 386 diag2(1, "Unterminated literal"); 387 goto stop_lit; 388 } 389 CHECK_SIZE_TOKEN; /* Only have to do this once in this loop, 390 * since CHECK_SIZE guarantees that there 391 * are at least 5 entries left */ 392 *e_token = *buf_ptr++; 393 if (buf_ptr >= buf_end) 394 fill_buffer(); 395 if (*e_token == BACKSLASH) { /* if escape, copy extra char */ 396 if (*buf_ptr == '\n') /* check for escaped newline */ 397 ++line_no; 398 if (troff) { 399 *++e_token = BACKSLASH; 400 if (*buf_ptr == BACKSLASH) 401 *++e_token = BACKSLASH; 402 } 403 *++e_token = *buf_ptr++; 404 ++e_token; /* we must increment this again because we 405 * copied two chars */ 406 if (buf_ptr >= buf_end) 407 fill_buffer(); 408 } 409 else 410 break; /* we copied one character */ 411 } /* end of while (1) */ 412 } while (*e_token++ != qchar); 413 if (troff) { 414 e_token = chfont(&stringf, &bodyf, e_token - 1); 415 if (qchar == '"') 416 *e_token++ = '\''; 417 } 418 stop_lit: 419 code = ident; 420 break; 421 422 case ('('): 423 case ('['): 424 unary_delim = true; 425 code = lparen; 426 break; 427 428 case (')'): 429 case (']'): 430 code = rparen; 431 break; 432 433 case '#': 434 unary_delim = ps.last_u_d; 435 code = preesc; 436 break; 437 438 case '?': 439 unary_delim = true; 440 code = question; 441 break; 442 443 case (':'): 444 code = colon; 445 unary_delim = true; 446 break; 447 448 case (';'): 449 unary_delim = true; 450 code = semicolon; 451 break; 452 453 case ('{'): 454 unary_delim = true; 455 456 /* 457 * if (ps.in_or_st) ps.block_init = 1; 458 */ 459 /* ? code = ps.block_init ? lparen : lbrace; */ 460 code = lbrace; 461 break; 462 463 case ('}'): 464 unary_delim = true; 465 /* ? code = ps.block_init ? rparen : rbrace; */ 466 code = rbrace; 467 break; 468 469 case 014: /* a form feed */ 470 unary_delim = ps.last_u_d; 471 ps.last_nl = true; /* remember this so we can set 'ps.col_1' 472 * right */ 473 code = form_feed; 474 break; 475 476 case (','): 477 unary_delim = true; 478 code = comma; 479 break; 480 481 case '.': 482 unary_delim = false; 483 code = period; 484 break; 485 486 case '-': 487 case '+': /* check for -, +, --, ++ */ 488 code = (ps.last_u_d ? unary_op : binary_op); 489 unary_delim = true; 490 491 if (*buf_ptr == token[0]) { 492 /* check for doubled character */ 493 *e_token++ = *buf_ptr++; 494 /* buffer overflow will be checked at end of loop */ 495 if (last_code == ident || last_code == rparen) { 496 code = (ps.last_u_d ? unary_op : postop); 497 /* check for following ++ or -- */ 498 unary_delim = false; 499 } 500 } 501 else if (*buf_ptr == '=') 502 /* check for operator += */ 503 *e_token++ = *buf_ptr++; 504 else if (*buf_ptr == '>') { 505 /* check for operator -> */ 506 *e_token++ = *buf_ptr++; 507 if (!pointer_as_binop) { 508 unary_delim = false; 509 code = unary_op; 510 ps.want_blank = false; 511 } 512 } 513 break; /* buffer overflow will be checked at end of 514 * switch */ 515 516 case '=': 517 if (ps.in_or_st) 518 ps.block_init = 1; 519 #ifdef undef 520 if (chartype[*buf_ptr] == opchar) { /* we have two char assignment */ 521 e_token[-1] = *buf_ptr++; 522 if ((e_token[-1] == '<' || e_token[-1] == '>') && e_token[-1] == *buf_ptr) 523 *e_token++ = *buf_ptr++; 524 *e_token++ = '='; /* Flip =+ to += */ 525 *e_token = 0; 526 } 527 #else 528 if (*buf_ptr == '=') {/* == */ 529 *e_token++ = '='; /* Flip =+ to += */ 530 buf_ptr++; 531 *e_token = 0; 532 } 533 #endif 534 code = binary_op; 535 unary_delim = true; 536 break; 537 /* can drop thru!!! */ 538 539 case '>': 540 case '<': 541 case '!': /* ops like <, <<, <=, !=, etc */ 542 if (*buf_ptr == '>' || *buf_ptr == '<' || *buf_ptr == '=') { 543 *e_token++ = *buf_ptr; 544 if (++buf_ptr >= buf_end) 545 fill_buffer(); 546 } 547 if (*buf_ptr == '=') 548 *e_token++ = *buf_ptr++; 549 code = (ps.last_u_d ? unary_op : binary_op); 550 unary_delim = true; 551 break; 552 553 default: 554 if (token[0] == '/' && *buf_ptr == '*') { 555 /* it is start of comment */ 556 *e_token++ = '*'; 557 558 if (++buf_ptr >= buf_end) 559 fill_buffer(); 560 561 code = comment; 562 unary_delim = ps.last_u_d; 563 break; 564 } 565 while (*(e_token - 1) == *buf_ptr || *buf_ptr == '=') { 566 /* 567 * handle ||, &&, etc, and also things as in int *****i 568 */ 569 *e_token++ = *buf_ptr; 570 if (++buf_ptr >= buf_end) 571 fill_buffer(); 572 } 573 code = (ps.last_u_d ? unary_op : binary_op); 574 unary_delim = true; 575 576 577 } /* end of switch */ 578 if (code != newline) { 579 l_struct = false; 580 last_code = code; 581 } 582 if (buf_ptr >= buf_end) /* check for input buffer empty */ 583 fill_buffer(); 584 ps.last_u_d = unary_delim; 585 *e_token = '\0'; /* null terminate the token */ 586 return (code); 587 } 588 589 /* 590 * Add the given keyword to the keyword table, using val as the keyword type 591 */ 592 void 593 addkey(char *key, int val) 594 { 595 struct templ *p = specials; 596 while (p->rwd) 597 if (p->rwd[0] == key[0] && strcmp(p->rwd, key) == 0) 598 return; 599 else 600 p++; 601 if (p >= specials + sizeof specials / sizeof specials[0]) 602 return; /* For now, table overflows are silently 603 * ignored */ 604 p->rwd = key; 605 p->rwcode = val; 606 p[1].rwd = 0; 607 p[1].rwcode = 0; 608 } 609