1 /* 2 * Copyright (c) 1985 Sun Microsystems, Inc. 3 * Copyright (c) 1980, 1993 4 * The Regents of the University of California. All rights reserved. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 3. All advertising materials mentioning features or use of this software 16 * must display the following acknowledgement: 17 * This product includes software developed by the University of 18 * California, Berkeley and its contributors. 19 * 4. Neither the name of the University nor the names of its contributors 20 * may be used to endorse or promote products derived from this software 21 * without specific prior written permission. 22 * 23 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 24 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 25 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 26 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 27 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 28 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 29 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 30 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 31 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 32 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 33 * SUCH DAMAGE. 34 */ 35 36 #if 0 37 #ifndef lint 38 static char sccsid[] = "@(#)lexi.c 8.1 (Berkeley) 6/6/93"; 39 #endif /* not lint */ 40 #endif 41 #include <sys/cdefs.h> 42 __FBSDID("$FreeBSD$"); 43 44 /* 45 * Here we have the token scanner for indent. It scans off one token and puts 46 * it in the global variable "token". It returns a code, indicating the type 47 * of token scanned. 48 */ 49 50 #include <stdio.h> 51 #include <ctype.h> 52 #include <stdlib.h> 53 #include <string.h> 54 #include "indent_globs.h" 55 #include "indent_codes.h" 56 #include "indent.h" 57 58 #define alphanum 1 59 #define opchar 3 60 61 void fill_buffer(void); 62 63 struct templ { 64 const char *rwd; 65 int rwcode; 66 }; 67 68 struct templ specials[1000] = 69 { 70 {"switch", 1}, 71 {"case", 2}, 72 {"break", 0}, 73 {"struct", 3}, 74 {"union", 3}, 75 {"enum", 3}, 76 {"default", 2}, 77 {"int", 4}, 78 {"char", 4}, 79 {"float", 4}, 80 {"double", 4}, 81 {"long", 4}, 82 {"short", 4}, 83 {"typdef", 4}, 84 {"unsigned", 4}, 85 {"register", 4}, 86 {"static", 4}, 87 {"global", 4}, 88 {"extern", 4}, 89 {"void", 4}, 90 {"goto", 0}, 91 {"return", 0}, 92 {"if", 5}, 93 {"while", 5}, 94 {"for", 5}, 95 {"else", 6}, 96 {"do", 6}, 97 {"sizeof", 7}, 98 {"const", 9}, 99 {"volatile", 9}, 100 {0, 0} 101 }; 102 103 char chartype[128] = 104 { /* this is used to facilitate the decision of 105 * what type (alphanumeric, operator) each 106 * character is */ 107 0, 0, 0, 0, 0, 0, 0, 0, 108 0, 0, 0, 0, 0, 0, 0, 0, 109 0, 0, 0, 0, 0, 0, 0, 0, 110 0, 0, 0, 0, 0, 0, 0, 0, 111 0, 3, 0, 0, 1, 3, 3, 0, 112 0, 0, 3, 3, 0, 3, 0, 3, 113 1, 1, 1, 1, 1, 1, 1, 1, 114 1, 1, 0, 0, 3, 3, 3, 3, 115 0, 1, 1, 1, 1, 1, 1, 1, 116 1, 1, 1, 1, 1, 1, 1, 1, 117 1, 1, 1, 1, 1, 1, 1, 1, 118 1, 1, 1, 0, 0, 0, 3, 1, 119 0, 1, 1, 1, 1, 1, 1, 1, 120 1, 1, 1, 1, 1, 1, 1, 1, 121 1, 1, 1, 1, 1, 1, 1, 1, 122 1, 1, 1, 0, 3, 0, 3, 0 123 }; 124 125 int 126 lexi(void) 127 { 128 int unary_delim; /* this is set to 1 if the current token 129 * forces a following operator to be unary */ 130 static int last_code; /* the last token type returned */ 131 static int l_struct; /* set to 1 if the last token was 'struct' */ 132 int code; /* internal code to be returned */ 133 char qchar; /* the delimiter character for a string */ 134 135 e_token = s_token; /* point to start of place to save token */ 136 unary_delim = false; 137 ps.col_1 = ps.last_nl; /* tell world that this token started in 138 * column 1 iff the last thing scanned was nl */ 139 ps.last_nl = false; 140 141 while (*buf_ptr == ' ' || *buf_ptr == '\t') { /* get rid of blanks */ 142 ps.col_1 = false; /* leading blanks imply token is not in column 143 * 1 */ 144 if (++buf_ptr >= buf_end) 145 fill_buffer(); 146 } 147 148 /* Scan an alphanumeric token */ 149 if (chartype[(int)*buf_ptr] == alphanum || (buf_ptr[0] == '.' && isdigit(buf_ptr[1]))) { 150 /* 151 * we have a character or number 152 */ 153 const char *j; /* used for searching thru list of 154 * 155 * reserved words */ 156 struct templ *p; 157 158 if (isdigit(*buf_ptr) || (buf_ptr[0] == '.' && isdigit(buf_ptr[1]))) { 159 int seendot = 0, 160 seenexp = 0, 161 seensfx = 0; 162 if (*buf_ptr == '0' && 163 (buf_ptr[1] == 'x' || buf_ptr[1] == 'X')) { 164 *e_token++ = *buf_ptr++; 165 *e_token++ = *buf_ptr++; 166 while (isxdigit(*buf_ptr)) { 167 CHECK_SIZE_TOKEN; 168 *e_token++ = *buf_ptr++; 169 } 170 } 171 else 172 while (1) { 173 if (*buf_ptr == '.') { 174 if (seendot) 175 break; 176 else 177 seendot++; 178 } 179 CHECK_SIZE_TOKEN; 180 *e_token++ = *buf_ptr++; 181 if (!isdigit(*buf_ptr) && *buf_ptr != '.') { 182 if ((*buf_ptr != 'E' && *buf_ptr != 'e') || seenexp) 183 break; 184 else { 185 seenexp++; 186 seendot++; 187 CHECK_SIZE_TOKEN; 188 *e_token++ = *buf_ptr++; 189 if (*buf_ptr == '+' || *buf_ptr == '-') 190 *e_token++ = *buf_ptr++; 191 } 192 } 193 } 194 while (1) { 195 if (!(seensfx & 1) && 196 (*buf_ptr == 'U' || *buf_ptr == 'u')) { 197 CHECK_SIZE_TOKEN; 198 *e_token++ = *buf_ptr++; 199 seensfx |= 1; 200 continue; 201 } 202 if (!(seensfx & 2) && 203 (*buf_ptr == 'L' || *buf_ptr == 'l')) { 204 CHECK_SIZE_TOKEN; 205 if (buf_ptr[1] == buf_ptr[0]) 206 *e_token++ = *buf_ptr++; 207 *e_token++ = *buf_ptr++; 208 seensfx |= 2; 209 continue; 210 } 211 break; 212 } 213 } 214 else 215 while (chartype[(int)*buf_ptr] == alphanum || *buf_ptr == BACKSLASH) { 216 /* fill_buffer() terminates buffer with newline */ 217 if (*buf_ptr == BACKSLASH) { 218 if (*(buf_ptr + 1) == '\n') { 219 buf_ptr += 2; 220 if (buf_ptr >= buf_end) 221 fill_buffer(); 222 } else 223 break; 224 } 225 CHECK_SIZE_TOKEN; 226 /* copy it over */ 227 *e_token++ = *buf_ptr++; 228 if (buf_ptr >= buf_end) 229 fill_buffer(); 230 } 231 *e_token++ = '\0'; 232 while (*buf_ptr == ' ' || *buf_ptr == '\t') { /* get rid of blanks */ 233 if (++buf_ptr >= buf_end) 234 fill_buffer(); 235 } 236 ps.its_a_keyword = false; 237 ps.sizeof_keyword = false; 238 if (l_struct) { /* if last token was 'struct', then this token 239 * should be treated as a declaration */ 240 l_struct = false; 241 last_code = ident; 242 ps.last_u_d = true; 243 return (decl); 244 } 245 ps.last_u_d = false; /* Operator after identifier is binary */ 246 last_code = ident; /* Remember that this is the code we will 247 * return */ 248 249 /* 250 * This loop will check if the token is a keyword. 251 */ 252 for (p = specials; (j = p->rwd) != 0; p++) { 253 const char *q = s_token; /* point at scanned token */ 254 if (*j++ != *q++ || *j++ != *q++) 255 continue; /* This test depends on the fact that 256 * identifiers are always at least 1 character 257 * long (ie. the first two bytes of the 258 * identifier are always meaningful) */ 259 if (q[-1] == 0) 260 break; /* If its a one-character identifier */ 261 while (*q++ == *j) 262 if (*j++ == 0) 263 goto found_keyword; /* I wish that C had a multi-level 264 * break... */ 265 } 266 if (p->rwd) { /* we have a keyword */ 267 found_keyword: 268 ps.its_a_keyword = true; 269 ps.last_u_d = true; 270 switch (p->rwcode) { 271 case 1: /* it is a switch */ 272 return (swstmt); 273 case 2: /* a case or default */ 274 return (casestmt); 275 276 case 3: /* a "struct" */ 277 if (ps.p_l_follow) 278 break; /* inside parens: cast */ 279 /* 280 * Next time around, we may want to know that we have had a 281 * 'struct' 282 */ 283 l_struct = true; 284 285 /* 286 * Fall through to test for a cast, function prototype or 287 * sizeof(). 288 */ 289 case 4: /* one of the declaration keywords */ 290 if (ps.p_l_follow) { 291 ps.cast_mask |= 1 << ps.p_l_follow; 292 293 /* 294 * Forget that we saw `struct' if we're in a sizeof(). 295 */ 296 if (ps.sizeof_mask) 297 l_struct = false; 298 299 break; /* inside parens: cast, prototype or sizeof() */ 300 } 301 last_code = decl; 302 return (decl); 303 304 case 5: /* if, while, for */ 305 return (sp_paren); 306 307 case 6: /* do, else */ 308 return (sp_nparen); 309 310 case 7: 311 ps.sizeof_keyword = true; 312 default: /* all others are treated like any other 313 * identifier */ 314 return (ident); 315 } /* end of switch */ 316 } /* end of if (found_it) */ 317 if (*buf_ptr == '(' && ps.tos <= 1 && ps.ind_level == 0) { 318 char *tp = buf_ptr; 319 while (tp < buf_end) 320 if (*tp++ == ')' && (*tp == ';' || *tp == ',')) 321 goto not_proc; 322 strncpy(ps.procname, token, sizeof ps.procname - 1); 323 ps.in_parameter_declaration = 1; 324 rparen_count = 1; 325 not_proc:; 326 } 327 /* 328 * The following hack attempts to guess whether or not the current 329 * token is in fact a declaration keyword -- one that has been 330 * typedefd 331 */ 332 if (((*buf_ptr == '*' && buf_ptr[1] != '=') || isalpha(*buf_ptr) || *buf_ptr == '_') 333 && !ps.p_l_follow 334 && !ps.block_init 335 && (ps.last_token == rparen || ps.last_token == semicolon || 336 ps.last_token == decl || 337 ps.last_token == lbrace || ps.last_token == rbrace)) { 338 ps.its_a_keyword = true; 339 ps.last_u_d = true; 340 last_code = decl; 341 return decl; 342 } 343 if (last_code == decl) /* if this is a declared variable, then 344 * following sign is unary */ 345 ps.last_u_d = true; /* will make "int a -1" work */ 346 last_code = ident; 347 return (ident); /* the ident is not in the list */ 348 } /* end of procesing for alpanum character */ 349 350 /* Scan a non-alphanumeric token */ 351 352 *e_token++ = *buf_ptr; /* if it is only a one-character token, it is 353 * moved here */ 354 *e_token = '\0'; 355 if (++buf_ptr >= buf_end) 356 fill_buffer(); 357 358 switch (*token) { 359 case '\n': 360 unary_delim = ps.last_u_d; 361 ps.last_nl = true; /* remember that we just had a newline */ 362 code = (had_eof ? 0 : newline); 363 364 /* 365 * if data has been exhausted, the newline is a dummy, and we should 366 * return code to stop 367 */ 368 break; 369 370 case '\'': /* start of quoted character */ 371 case '"': /* start of string */ 372 qchar = *token; 373 if (troff) { 374 e_token[-1] = '`'; 375 if (qchar == '"') 376 *e_token++ = '`'; 377 e_token = chfont(&bodyf, &stringf, e_token); 378 } 379 do { /* copy the string */ 380 while (1) { /* move one character or [/<char>]<char> */ 381 if (*buf_ptr == '\n') { 382 printf("%d: Unterminated literal\n", line_no); 383 goto stop_lit; 384 } 385 CHECK_SIZE_TOKEN; /* Only have to do this once in this loop, 386 * since CHECK_SIZE guarantees that there 387 * are at least 5 entries left */ 388 *e_token = *buf_ptr++; 389 if (buf_ptr >= buf_end) 390 fill_buffer(); 391 if (*e_token == BACKSLASH) { /* if escape, copy extra char */ 392 if (*buf_ptr == '\n') /* check for escaped newline */ 393 ++line_no; 394 if (troff) { 395 *++e_token = BACKSLASH; 396 if (*buf_ptr == BACKSLASH) 397 *++e_token = BACKSLASH; 398 } 399 *++e_token = *buf_ptr++; 400 ++e_token; /* we must increment this again because we 401 * copied two chars */ 402 if (buf_ptr >= buf_end) 403 fill_buffer(); 404 } 405 else 406 break; /* we copied one character */ 407 } /* end of while (1) */ 408 } while (*e_token++ != qchar); 409 if (troff) { 410 e_token = chfont(&stringf, &bodyf, e_token - 1); 411 if (qchar == '"') 412 *e_token++ = '\''; 413 } 414 stop_lit: 415 code = ident; 416 break; 417 418 case ('('): 419 case ('['): 420 unary_delim = true; 421 code = lparen; 422 break; 423 424 case (')'): 425 case (']'): 426 code = rparen; 427 break; 428 429 case '#': 430 unary_delim = ps.last_u_d; 431 code = preesc; 432 break; 433 434 case '?': 435 unary_delim = true; 436 code = question; 437 break; 438 439 case (':'): 440 code = colon; 441 unary_delim = true; 442 break; 443 444 case (';'): 445 unary_delim = true; 446 code = semicolon; 447 break; 448 449 case ('{'): 450 unary_delim = true; 451 452 /* 453 * if (ps.in_or_st) ps.block_init = 1; 454 */ 455 /* ? code = ps.block_init ? lparen : lbrace; */ 456 code = lbrace; 457 break; 458 459 case ('}'): 460 unary_delim = true; 461 /* ? code = ps.block_init ? rparen : rbrace; */ 462 code = rbrace; 463 break; 464 465 case 014: /* a form feed */ 466 unary_delim = ps.last_u_d; 467 ps.last_nl = true; /* remember this so we can set 'ps.col_1' 468 * right */ 469 code = form_feed; 470 break; 471 472 case (','): 473 unary_delim = true; 474 code = comma; 475 break; 476 477 case '.': 478 unary_delim = false; 479 code = period; 480 break; 481 482 case '-': 483 case '+': /* check for -, +, --, ++ */ 484 code = (ps.last_u_d ? unary_op : binary_op); 485 unary_delim = true; 486 487 if (*buf_ptr == token[0]) { 488 /* check for doubled character */ 489 *e_token++ = *buf_ptr++; 490 /* buffer overflow will be checked at end of loop */ 491 if (last_code == ident || last_code == rparen) { 492 code = (ps.last_u_d ? unary_op : postop); 493 /* check for following ++ or -- */ 494 unary_delim = false; 495 } 496 } 497 else if (*buf_ptr == '=') 498 /* check for operator += */ 499 *e_token++ = *buf_ptr++; 500 else if (*buf_ptr == '>') { 501 /* check for operator -> */ 502 *e_token++ = *buf_ptr++; 503 if (!pointer_as_binop) { 504 unary_delim = false; 505 code = unary_op; 506 ps.want_blank = false; 507 } 508 } 509 break; /* buffer overflow will be checked at end of 510 * switch */ 511 512 case '=': 513 if (ps.in_or_st) 514 ps.block_init = 1; 515 #ifdef undef 516 if (chartype[*buf_ptr] == opchar) { /* we have two char assignment */ 517 e_token[-1] = *buf_ptr++; 518 if ((e_token[-1] == '<' || e_token[-1] == '>') && e_token[-1] == *buf_ptr) 519 *e_token++ = *buf_ptr++; 520 *e_token++ = '='; /* Flip =+ to += */ 521 *e_token = 0; 522 } 523 #else 524 if (*buf_ptr == '=') {/* == */ 525 *e_token++ = '='; /* Flip =+ to += */ 526 buf_ptr++; 527 *e_token = 0; 528 } 529 #endif 530 code = binary_op; 531 unary_delim = true; 532 break; 533 /* can drop thru!!! */ 534 535 case '>': 536 case '<': 537 case '!': /* ops like <, <<, <=, !=, etc */ 538 if (*buf_ptr == '>' || *buf_ptr == '<' || *buf_ptr == '=') { 539 *e_token++ = *buf_ptr; 540 if (++buf_ptr >= buf_end) 541 fill_buffer(); 542 } 543 if (*buf_ptr == '=') 544 *e_token++ = *buf_ptr++; 545 code = (ps.last_u_d ? unary_op : binary_op); 546 unary_delim = true; 547 break; 548 549 default: 550 if (token[0] == '/' && *buf_ptr == '*') { 551 /* it is start of comment */ 552 *e_token++ = '*'; 553 554 if (++buf_ptr >= buf_end) 555 fill_buffer(); 556 557 code = comment; 558 unary_delim = ps.last_u_d; 559 break; 560 } 561 while (*(e_token - 1) == *buf_ptr || *buf_ptr == '=') { 562 /* 563 * handle ||, &&, etc, and also things as in int *****i 564 */ 565 *e_token++ = *buf_ptr; 566 if (++buf_ptr >= buf_end) 567 fill_buffer(); 568 } 569 code = (ps.last_u_d ? unary_op : binary_op); 570 unary_delim = true; 571 572 573 } /* end of switch */ 574 if (code != newline) { 575 l_struct = false; 576 last_code = code; 577 } 578 if (buf_ptr >= buf_end) /* check for input buffer empty */ 579 fill_buffer(); 580 ps.last_u_d = unary_delim; 581 *e_token = '\0'; /* null terminate the token */ 582 return (code); 583 } 584 585 /* 586 * Add the given keyword to the keyword table, using val as the keyword type 587 */ 588 void 589 addkey(char *key, int val) 590 { 591 struct templ *p = specials; 592 while (p->rwd) 593 if (p->rwd[0] == key[0] && strcmp(p->rwd, key) == 0) 594 return; 595 else 596 p++; 597 if (p >= specials + sizeof specials / sizeof specials[0]) 598 return; /* For now, table overflows are silently 599 * ignored */ 600 p->rwd = key; 601 p->rwcode = val; 602 p[1].rwd = 0; 603 p[1].rwcode = 0; 604 } 605