1 /* 2 * Copyright (c) 1985 Sun Microsystems, Inc. 3 * Copyright (c) 1980, 1993 4 * The Regents of the University of California. All rights reserved. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 3. All advertising materials mentioning features or use of this software 16 * must display the following acknowledgement: 17 * This product includes software developed by the University of 18 * California, Berkeley and its contributors. 19 * 4. Neither the name of the University nor the names of its contributors 20 * may be used to endorse or promote products derived from this software 21 * without specific prior written permission. 22 * 23 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 24 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 25 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 26 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 27 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 28 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 29 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 30 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 31 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 32 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 33 * SUCH DAMAGE. 34 */ 35 36 #if 0 37 #ifndef lint 38 static char sccsid[] = "@(#)lexi.c 8.1 (Berkeley) 6/6/93"; 39 #endif /* not lint */ 40 #endif 41 #include <sys/cdefs.h> 42 __FBSDID("$FreeBSD$"); 43 44 /* 45 * Here we have the token scanner for indent. It scans off one token and puts 46 * it in the global variable "token". It returns a code, indicating the type 47 * of token scanned. 48 */ 49 50 #include <err.h> 51 #include <stdio.h> 52 #include <ctype.h> 53 #include <stdlib.h> 54 #include <string.h> 55 #include "indent_globs.h" 56 #include "indent_codes.h" 57 #include "indent.h" 58 59 #define alphanum 1 60 #define opchar 3 61 62 struct templ { 63 const char *rwd; 64 int rwcode; 65 }; 66 67 struct templ specials[1000] = 68 { 69 {"switch", 1}, 70 {"case", 2}, 71 {"break", 0}, 72 {"struct", 3}, 73 {"union", 3}, 74 {"enum", 3}, 75 {"default", 2}, 76 {"int", 4}, 77 {"char", 4}, 78 {"float", 4}, 79 {"double", 4}, 80 {"long", 4}, 81 {"short", 4}, 82 {"typdef", 4}, 83 {"unsigned", 4}, 84 {"register", 4}, 85 {"static", 4}, 86 {"global", 4}, 87 {"extern", 4}, 88 {"void", 4}, 89 {"goto", 0}, 90 {"return", 0}, 91 {"if", 5}, 92 {"while", 5}, 93 {"for", 5}, 94 {"else", 6}, 95 {"do", 6}, 96 {"sizeof", 7}, 97 {"const", 9}, 98 {"volatile", 9}, 99 {0, 0} 100 }; 101 102 char chartype[128] = 103 { /* this is used to facilitate the decision of 104 * what type (alphanumeric, operator) each 105 * character is */ 106 0, 0, 0, 0, 0, 0, 0, 0, 107 0, 0, 0, 0, 0, 0, 0, 0, 108 0, 0, 0, 0, 0, 0, 0, 0, 109 0, 0, 0, 0, 0, 0, 0, 0, 110 0, 3, 0, 0, 1, 3, 3, 0, 111 0, 0, 3, 3, 0, 3, 0, 3, 112 1, 1, 1, 1, 1, 1, 1, 1, 113 1, 1, 0, 0, 3, 3, 3, 3, 114 0, 1, 1, 1, 1, 1, 1, 1, 115 1, 1, 1, 1, 1, 1, 1, 1, 116 1, 1, 1, 1, 1, 1, 1, 1, 117 1, 1, 1, 0, 0, 0, 3, 1, 118 0, 1, 1, 1, 1, 1, 1, 1, 119 1, 1, 1, 1, 1, 1, 1, 1, 120 1, 1, 1, 1, 1, 1, 1, 1, 121 1, 1, 1, 0, 3, 0, 3, 0 122 }; 123 124 int 125 lexi(void) 126 { 127 int unary_delim; /* this is set to 1 if the current token 128 * forces a following operator to be unary */ 129 static int last_code; /* the last token type returned */ 130 static int l_struct; /* set to 1 if the last token was 'struct' */ 131 int code; /* internal code to be returned */ 132 char qchar; /* the delimiter character for a string */ 133 134 e_token = s_token; /* point to start of place to save token */ 135 unary_delim = false; 136 ps.col_1 = ps.last_nl; /* tell world that this token started in 137 * column 1 iff the last thing scanned was nl */ 138 ps.last_nl = false; 139 140 while (*buf_ptr == ' ' || *buf_ptr == '\t') { /* get rid of blanks */ 141 ps.col_1 = false; /* leading blanks imply token is not in column 142 * 1 */ 143 if (++buf_ptr >= buf_end) 144 fill_buffer(); 145 } 146 147 /* Scan an alphanumeric token */ 148 if (chartype[(int)*buf_ptr] == alphanum || (buf_ptr[0] == '.' && isdigit(buf_ptr[1]))) { 149 /* 150 * we have a character or number 151 */ 152 const char *j; /* used for searching thru list of 153 * 154 * reserved words */ 155 struct templ *p; 156 157 if (isdigit(*buf_ptr) || (buf_ptr[0] == '.' && isdigit(buf_ptr[1]))) { 158 int seendot = 0, 159 seenexp = 0, 160 seensfx = 0; 161 if (*buf_ptr == '0' && 162 (buf_ptr[1] == 'x' || buf_ptr[1] == 'X')) { 163 *e_token++ = *buf_ptr++; 164 *e_token++ = *buf_ptr++; 165 while (isxdigit(*buf_ptr)) { 166 CHECK_SIZE_TOKEN; 167 *e_token++ = *buf_ptr++; 168 } 169 } 170 else 171 while (1) { 172 if (*buf_ptr == '.') { 173 if (seendot) 174 break; 175 else 176 seendot++; 177 } 178 CHECK_SIZE_TOKEN; 179 *e_token++ = *buf_ptr++; 180 if (!isdigit(*buf_ptr) && *buf_ptr != '.') { 181 if ((*buf_ptr != 'E' && *buf_ptr != 'e') || seenexp) 182 break; 183 else { 184 seenexp++; 185 seendot++; 186 CHECK_SIZE_TOKEN; 187 *e_token++ = *buf_ptr++; 188 if (*buf_ptr == '+' || *buf_ptr == '-') 189 *e_token++ = *buf_ptr++; 190 } 191 } 192 } 193 while (1) { 194 if (!(seensfx & 1) && 195 (*buf_ptr == 'U' || *buf_ptr == 'u')) { 196 CHECK_SIZE_TOKEN; 197 *e_token++ = *buf_ptr++; 198 seensfx |= 1; 199 continue; 200 } 201 if (!(seensfx & 2) && 202 (*buf_ptr == 'L' || *buf_ptr == 'l')) { 203 CHECK_SIZE_TOKEN; 204 if (buf_ptr[1] == buf_ptr[0]) 205 *e_token++ = *buf_ptr++; 206 *e_token++ = *buf_ptr++; 207 seensfx |= 2; 208 continue; 209 } 210 break; 211 } 212 } 213 else 214 while (chartype[(int)*buf_ptr] == alphanum || *buf_ptr == BACKSLASH) { 215 /* fill_buffer() terminates buffer with newline */ 216 if (*buf_ptr == BACKSLASH) { 217 if (*(buf_ptr + 1) == '\n') { 218 buf_ptr += 2; 219 if (buf_ptr >= buf_end) 220 fill_buffer(); 221 } else 222 break; 223 } 224 CHECK_SIZE_TOKEN; 225 /* copy it over */ 226 *e_token++ = *buf_ptr++; 227 if (buf_ptr >= buf_end) 228 fill_buffer(); 229 } 230 *e_token++ = '\0'; 231 while (*buf_ptr == ' ' || *buf_ptr == '\t') { /* get rid of blanks */ 232 if (++buf_ptr >= buf_end) 233 fill_buffer(); 234 } 235 ps.its_a_keyword = false; 236 ps.sizeof_keyword = false; 237 if (l_struct) { /* if last token was 'struct', then this token 238 * should be treated as a declaration */ 239 l_struct = false; 240 last_code = ident; 241 ps.last_u_d = true; 242 return (decl); 243 } 244 ps.last_u_d = false; /* Operator after identifier is binary */ 245 last_code = ident; /* Remember that this is the code we will 246 * return */ 247 248 /* 249 * This loop will check if the token is a keyword. 250 */ 251 for (p = specials; (j = p->rwd) != 0; p++) { 252 const char *q = s_token; /* point at scanned token */ 253 if (*j++ != *q++ || *j++ != *q++) 254 continue; /* This test depends on the fact that 255 * identifiers are always at least 1 character 256 * long (ie. the first two bytes of the 257 * identifier are always meaningful) */ 258 if (q[-1] == 0) 259 break; /* If its a one-character identifier */ 260 while (*q++ == *j) 261 if (*j++ == 0) 262 goto found_keyword; /* I wish that C had a multi-level 263 * break... */ 264 } 265 if (p->rwd) { /* we have a keyword */ 266 found_keyword: 267 ps.its_a_keyword = true; 268 ps.last_u_d = true; 269 switch (p->rwcode) { 270 case 1: /* it is a switch */ 271 return (swstmt); 272 case 2: /* a case or default */ 273 return (casestmt); 274 275 case 3: /* a "struct" */ 276 if (ps.p_l_follow) 277 break; /* inside parens: cast */ 278 /* 279 * Next time around, we may want to know that we have had a 280 * 'struct' 281 */ 282 l_struct = true; 283 284 /* 285 * Fall through to test for a cast, function prototype or 286 * sizeof(). 287 */ 288 case 4: /* one of the declaration keywords */ 289 if (ps.p_l_follow) { 290 ps.cast_mask |= 1 << ps.p_l_follow; 291 292 /* 293 * Forget that we saw `struct' if we're in a sizeof(). 294 */ 295 if (ps.sizeof_mask) 296 l_struct = false; 297 298 break; /* inside parens: cast, prototype or sizeof() */ 299 } 300 last_code = decl; 301 return (decl); 302 303 case 5: /* if, while, for */ 304 return (sp_paren); 305 306 case 6: /* do, else */ 307 return (sp_nparen); 308 309 case 7: 310 ps.sizeof_keyword = true; 311 default: /* all others are treated like any other 312 * identifier */ 313 return (ident); 314 } /* end of switch */ 315 } /* end of if (found_it) */ 316 if (*buf_ptr == '(' && ps.tos <= 1 && ps.ind_level == 0) { 317 char *tp = buf_ptr; 318 while (tp < buf_end) 319 if (*tp++ == ')' && (*tp == ';' || *tp == ',')) 320 goto not_proc; 321 strncpy(ps.procname, token, sizeof ps.procname - 1); 322 ps.in_parameter_declaration = 1; 323 rparen_count = 1; 324 not_proc:; 325 } 326 /* 327 * The following hack attempts to guess whether or not the current 328 * token is in fact a declaration keyword -- one that has been 329 * typedefd 330 */ 331 if (((*buf_ptr == '*' && buf_ptr[1] != '=') || isalpha(*buf_ptr) || *buf_ptr == '_') 332 && !ps.p_l_follow 333 && !ps.block_init 334 && (ps.last_token == rparen || ps.last_token == semicolon || 335 ps.last_token == decl || 336 ps.last_token == lbrace || ps.last_token == rbrace)) { 337 ps.its_a_keyword = true; 338 ps.last_u_d = true; 339 last_code = decl; 340 return decl; 341 } 342 if (last_code == decl) /* if this is a declared variable, then 343 * following sign is unary */ 344 ps.last_u_d = true; /* will make "int a -1" work */ 345 last_code = ident; 346 return (ident); /* the ident is not in the list */ 347 } /* end of procesing for alpanum character */ 348 349 /* Scan a non-alphanumeric token */ 350 351 *e_token++ = *buf_ptr; /* if it is only a one-character token, it is 352 * moved here */ 353 *e_token = '\0'; 354 if (++buf_ptr >= buf_end) 355 fill_buffer(); 356 357 switch (*token) { 358 case '\n': 359 unary_delim = ps.last_u_d; 360 ps.last_nl = true; /* remember that we just had a newline */ 361 code = (had_eof ? 0 : newline); 362 363 /* 364 * if data has been exhausted, the newline is a dummy, and we should 365 * return code to stop 366 */ 367 break; 368 369 case '\'': /* start of quoted character */ 370 case '"': /* start of string */ 371 qchar = *token; 372 if (troff) { 373 e_token[-1] = '`'; 374 if (qchar == '"') 375 *e_token++ = '`'; 376 e_token = chfont(&bodyf, &stringf, e_token); 377 } 378 do { /* copy the string */ 379 while (1) { /* move one character or [/<char>]<char> */ 380 if (*buf_ptr == '\n') { 381 printf("%d: Unterminated literal\n", line_no); 382 goto stop_lit; 383 } 384 CHECK_SIZE_TOKEN; /* Only have to do this once in this loop, 385 * since CHECK_SIZE guarantees that there 386 * are at least 5 entries left */ 387 *e_token = *buf_ptr++; 388 if (buf_ptr >= buf_end) 389 fill_buffer(); 390 if (*e_token == BACKSLASH) { /* if escape, copy extra char */ 391 if (*buf_ptr == '\n') /* check for escaped newline */ 392 ++line_no; 393 if (troff) { 394 *++e_token = BACKSLASH; 395 if (*buf_ptr == BACKSLASH) 396 *++e_token = BACKSLASH; 397 } 398 *++e_token = *buf_ptr++; 399 ++e_token; /* we must increment this again because we 400 * copied two chars */ 401 if (buf_ptr >= buf_end) 402 fill_buffer(); 403 } 404 else 405 break; /* we copied one character */ 406 } /* end of while (1) */ 407 } while (*e_token++ != qchar); 408 if (troff) { 409 e_token = chfont(&stringf, &bodyf, e_token - 1); 410 if (qchar == '"') 411 *e_token++ = '\''; 412 } 413 stop_lit: 414 code = ident; 415 break; 416 417 case ('('): 418 case ('['): 419 unary_delim = true; 420 code = lparen; 421 break; 422 423 case (')'): 424 case (']'): 425 code = rparen; 426 break; 427 428 case '#': 429 unary_delim = ps.last_u_d; 430 code = preesc; 431 break; 432 433 case '?': 434 unary_delim = true; 435 code = question; 436 break; 437 438 case (':'): 439 code = colon; 440 unary_delim = true; 441 break; 442 443 case (';'): 444 unary_delim = true; 445 code = semicolon; 446 break; 447 448 case ('{'): 449 unary_delim = true; 450 451 /* 452 * if (ps.in_or_st) ps.block_init = 1; 453 */ 454 /* ? code = ps.block_init ? lparen : lbrace; */ 455 code = lbrace; 456 break; 457 458 case ('}'): 459 unary_delim = true; 460 /* ? code = ps.block_init ? rparen : rbrace; */ 461 code = rbrace; 462 break; 463 464 case 014: /* a form feed */ 465 unary_delim = ps.last_u_d; 466 ps.last_nl = true; /* remember this so we can set 'ps.col_1' 467 * right */ 468 code = form_feed; 469 break; 470 471 case (','): 472 unary_delim = true; 473 code = comma; 474 break; 475 476 case '.': 477 unary_delim = false; 478 code = period; 479 break; 480 481 case '-': 482 case '+': /* check for -, +, --, ++ */ 483 code = (ps.last_u_d ? unary_op : binary_op); 484 unary_delim = true; 485 486 if (*buf_ptr == token[0]) { 487 /* check for doubled character */ 488 *e_token++ = *buf_ptr++; 489 /* buffer overflow will be checked at end of loop */ 490 if (last_code == ident || last_code == rparen) { 491 code = (ps.last_u_d ? unary_op : postop); 492 /* check for following ++ or -- */ 493 unary_delim = false; 494 } 495 } 496 else if (*buf_ptr == '=') 497 /* check for operator += */ 498 *e_token++ = *buf_ptr++; 499 else if (*buf_ptr == '>') { 500 /* check for operator -> */ 501 *e_token++ = *buf_ptr++; 502 if (!pointer_as_binop) { 503 unary_delim = false; 504 code = unary_op; 505 ps.want_blank = false; 506 } 507 } 508 break; /* buffer overflow will be checked at end of 509 * switch */ 510 511 case '=': 512 if (ps.in_or_st) 513 ps.block_init = 1; 514 #ifdef undef 515 if (chartype[*buf_ptr] == opchar) { /* we have two char assignment */ 516 e_token[-1] = *buf_ptr++; 517 if ((e_token[-1] == '<' || e_token[-1] == '>') && e_token[-1] == *buf_ptr) 518 *e_token++ = *buf_ptr++; 519 *e_token++ = '='; /* Flip =+ to += */ 520 *e_token = 0; 521 } 522 #else 523 if (*buf_ptr == '=') {/* == */ 524 *e_token++ = '='; /* Flip =+ to += */ 525 buf_ptr++; 526 *e_token = 0; 527 } 528 #endif 529 code = binary_op; 530 unary_delim = true; 531 break; 532 /* can drop thru!!! */ 533 534 case '>': 535 case '<': 536 case '!': /* ops like <, <<, <=, !=, etc */ 537 if (*buf_ptr == '>' || *buf_ptr == '<' || *buf_ptr == '=') { 538 *e_token++ = *buf_ptr; 539 if (++buf_ptr >= buf_end) 540 fill_buffer(); 541 } 542 if (*buf_ptr == '=') 543 *e_token++ = *buf_ptr++; 544 code = (ps.last_u_d ? unary_op : binary_op); 545 unary_delim = true; 546 break; 547 548 default: 549 if (token[0] == '/' && *buf_ptr == '*') { 550 /* it is start of comment */ 551 *e_token++ = '*'; 552 553 if (++buf_ptr >= buf_end) 554 fill_buffer(); 555 556 code = comment; 557 unary_delim = ps.last_u_d; 558 break; 559 } 560 while (*(e_token - 1) == *buf_ptr || *buf_ptr == '=') { 561 /* 562 * handle ||, &&, etc, and also things as in int *****i 563 */ 564 *e_token++ = *buf_ptr; 565 if (++buf_ptr >= buf_end) 566 fill_buffer(); 567 } 568 code = (ps.last_u_d ? unary_op : binary_op); 569 unary_delim = true; 570 571 572 } /* end of switch */ 573 if (code != newline) { 574 l_struct = false; 575 last_code = code; 576 } 577 if (buf_ptr >= buf_end) /* check for input buffer empty */ 578 fill_buffer(); 579 ps.last_u_d = unary_delim; 580 *e_token = '\0'; /* null terminate the token */ 581 return (code); 582 } 583 584 /* 585 * Add the given keyword to the keyword table, using val as the keyword type 586 */ 587 void 588 addkey(char *key, int val) 589 { 590 struct templ *p = specials; 591 while (p->rwd) 592 if (p->rwd[0] == key[0] && strcmp(p->rwd, key) == 0) 593 return; 594 else 595 p++; 596 if (p >= specials + sizeof specials / sizeof specials[0]) 597 return; /* For now, table overflows are silently 598 * ignored */ 599 p->rwd = key; 600 p->rwcode = val; 601 p[1].rwd = 0; 602 p[1].rwcode = 0; 603 } 604