1 /* 2 * Copyright (c) 1985 Sun Microsystems, Inc. 3 * Copyright (c) 1980, 1993 4 * The Regents of the University of California. All rights reserved. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 3. All advertising materials mentioning features or use of this software 16 * must display the following acknowledgement: 17 * This product includes software developed by the University of 18 * California, Berkeley and its contributors. 19 * 4. Neither the name of the University nor the names of its contributors 20 * may be used to endorse or promote products derived from this software 21 * without specific prior written permission. 22 * 23 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 24 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 25 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 26 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 27 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 28 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 29 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 30 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 31 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 32 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 33 * SUCH DAMAGE. 34 */ 35 36 #ifndef lint 37 static char sccsid[] = "@(#)lexi.c 8.1 (Berkeley) 6/6/93"; 38 static const char rcsid[] = 39 "$FreeBSD$"; 40 #endif /* not lint */ 41 42 /* 43 * Here we have the token scanner for indent. It scans off one token and puts 44 * it in the global variable "token". It returns a code, indicating the type 45 * of token scanned. 46 */ 47 48 #include <stdio.h> 49 #include <ctype.h> 50 #include <stdlib.h> 51 #include <string.h> 52 #include "indent_globs.h" 53 #include "indent_codes.h" 54 55 #define alphanum 1 56 #define opchar 3 57 58 struct templ { 59 char *rwd; 60 int rwcode; 61 }; 62 63 struct templ specials[1000] = 64 { 65 "switch", 1, 66 "case", 2, 67 "break", 0, 68 "struct", 3, 69 "union", 3, 70 "enum", 3, 71 "default", 2, 72 "int", 4, 73 "char", 4, 74 "float", 4, 75 "double", 4, 76 "long", 4, 77 "short", 4, 78 "typdef", 4, 79 "unsigned", 4, 80 "register", 4, 81 "static", 4, 82 "global", 4, 83 "extern", 4, 84 "void", 4, 85 "goto", 0, 86 "return", 0, 87 "if", 5, 88 "while", 5, 89 "for", 5, 90 "else", 6, 91 "do", 6, 92 "sizeof", 7, 93 "const", 9, 94 "volatile", 9, 95 0, 0 96 }; 97 98 char chartype[128] = 99 { /* this is used to facilitate the decision of 100 * what type (alphanumeric, operator) each 101 * character is */ 102 0, 0, 0, 0, 0, 0, 0, 0, 103 0, 0, 0, 0, 0, 0, 0, 0, 104 0, 0, 0, 0, 0, 0, 0, 0, 105 0, 0, 0, 0, 0, 0, 0, 0, 106 0, 3, 0, 0, 1, 3, 3, 0, 107 0, 0, 3, 3, 0, 3, 0, 3, 108 1, 1, 1, 1, 1, 1, 1, 1, 109 1, 1, 0, 0, 3, 3, 3, 3, 110 0, 1, 1, 1, 1, 1, 1, 1, 111 1, 1, 1, 1, 1, 1, 1, 1, 112 1, 1, 1, 1, 1, 1, 1, 1, 113 1, 1, 1, 0, 0, 0, 3, 1, 114 0, 1, 1, 1, 1, 1, 1, 1, 115 1, 1, 1, 1, 1, 1, 1, 1, 116 1, 1, 1, 1, 1, 1, 1, 1, 117 1, 1, 1, 0, 3, 0, 3, 0 118 }; 119 120 121 122 123 int 124 lexi() 125 { 126 int unary_delim; /* this is set to 1 if the current token 127 * 128 * forces a following operator to be unary */ 129 static int last_code; /* the last token type returned */ 130 static int l_struct; /* set to 1 if the last token was 'struct' */ 131 int code; /* internal code to be returned */ 132 char qchar; /* the delimiter character for a string */ 133 134 e_token = s_token; /* point to start of place to save token */ 135 unary_delim = false; 136 ps.col_1 = ps.last_nl; /* tell world that this token started in 137 * column 1 iff the last thing scanned was nl */ 138 ps.last_nl = false; 139 140 while (*buf_ptr == ' ' || *buf_ptr == '\t') { /* get rid of blanks */ 141 ps.col_1 = false; /* leading blanks imply token is not in column 142 * 1 */ 143 if (++buf_ptr >= buf_end) 144 fill_buffer(); 145 } 146 147 /* Scan an alphanumeric token */ 148 if (chartype[*buf_ptr] == alphanum || buf_ptr[0] == '.' && isdigit(buf_ptr[1])) { 149 /* 150 * we have a character or number 151 */ 152 register char *j; /* used for searching thru list of 153 * 154 * reserved words */ 155 register struct templ *p; 156 157 if (isdigit(*buf_ptr) || buf_ptr[0] == '.' && isdigit(buf_ptr[1])) { 158 int seendot = 0, 159 seenexp = 0, 160 seensfx = 0; 161 if (*buf_ptr == '0' && 162 (buf_ptr[1] == 'x' || buf_ptr[1] == 'X')) { 163 *e_token++ = *buf_ptr++; 164 *e_token++ = *buf_ptr++; 165 while (isxdigit(*buf_ptr)) { 166 CHECK_SIZE_TOKEN; 167 *e_token++ = *buf_ptr++; 168 } 169 } 170 else 171 while (1) { 172 if (*buf_ptr == '.') 173 if (seendot) 174 break; 175 else 176 seendot++; 177 CHECK_SIZE_TOKEN; 178 *e_token++ = *buf_ptr++; 179 if (!isdigit(*buf_ptr) && *buf_ptr != '.') 180 if ((*buf_ptr != 'E' && *buf_ptr != 'e') || seenexp) 181 break; 182 else { 183 seenexp++; 184 seendot++; 185 CHECK_SIZE_TOKEN; 186 *e_token++ = *buf_ptr++; 187 if (*buf_ptr == '+' || *buf_ptr == '-') 188 *e_token++ = *buf_ptr++; 189 } 190 } 191 while (1) { 192 if (!(seensfx & 1) && 193 (*buf_ptr == 'U' || *buf_ptr == 'u')) { 194 CHECK_SIZE_TOKEN; 195 *e_token++ = *buf_ptr++; 196 seensfx |= 1; 197 continue; 198 } 199 if (!(seensfx & 2) && 200 (*buf_ptr == 'L' || *buf_ptr == 'l')) { 201 CHECK_SIZE_TOKEN; 202 if (buf_ptr[1] == buf_ptr[0]) 203 *e_token++ = *buf_ptr++; 204 *e_token++ = *buf_ptr++; 205 seensfx |= 2; 206 continue; 207 } 208 break; 209 } 210 } 211 else 212 while (chartype[*buf_ptr] == alphanum) { /* copy it over */ 213 CHECK_SIZE_TOKEN; 214 *e_token++ = *buf_ptr++; 215 if (buf_ptr >= buf_end) 216 fill_buffer(); 217 } 218 *e_token++ = '\0'; 219 while (*buf_ptr == ' ' || *buf_ptr == '\t') { /* get rid of blanks */ 220 if (++buf_ptr >= buf_end) 221 fill_buffer(); 222 } 223 ps.its_a_keyword = false; 224 ps.sizeof_keyword = false; 225 if (l_struct) { /* if last token was 'struct', then this token 226 * should be treated as a declaration */ 227 l_struct = false; 228 last_code = ident; 229 ps.last_u_d = true; 230 return (decl); 231 } 232 ps.last_u_d = false; /* Operator after indentifier is binary */ 233 last_code = ident; /* Remember that this is the code we will 234 * return */ 235 236 /* 237 * This loop will check if the token is a keyword. 238 */ 239 for (p = specials; (j = p->rwd) != 0; p++) { 240 register char *p = s_token; /* point at scanned token */ 241 if (*j++ != *p++ || *j++ != *p++) 242 continue; /* This test depends on the fact that 243 * identifiers are always at least 1 character 244 * long (ie. the first two bytes of the 245 * identifier are always meaningful) */ 246 if (p[-1] == 0) 247 break; /* If its a one-character identifier */ 248 while (*p++ == *j) 249 if (*j++ == 0) 250 goto found_keyword; /* I wish that C had a multi-level 251 * break... */ 252 } 253 if (p->rwd) { /* we have a keyword */ 254 found_keyword: 255 ps.its_a_keyword = true; 256 ps.last_u_d = true; 257 switch (p->rwcode) { 258 case 1: /* it is a switch */ 259 return (swstmt); 260 case 2: /* a case or default */ 261 return (casestmt); 262 263 case 3: /* a "struct" */ 264 if (ps.p_l_follow) 265 break; /* inside parens: cast */ 266 /* 267 * Next time around, we may want to know that we have had a 268 * 'struct' 269 */ 270 l_struct = true; 271 272 /* 273 * Fall through to test for a cast, function prototype or 274 * sizeof(). 275 */ 276 case 4: /* one of the declaration keywords */ 277 if (ps.p_l_follow) { 278 ps.cast_mask |= 1 << ps.p_l_follow; 279 280 /* 281 * Forget that we saw `struct' if we're in a sizeof(). 282 */ 283 if (ps.sizeof_mask) 284 l_struct = false; 285 286 break; /* inside parens: cast, prototype or sizeof() */ 287 } 288 last_code = decl; 289 return (decl); 290 291 case 5: /* if, while, for */ 292 return (sp_paren); 293 294 case 6: /* do, else */ 295 return (sp_nparen); 296 297 case 7: 298 ps.sizeof_keyword = true; 299 default: /* all others are treated like any other 300 * identifier */ 301 return (ident); 302 } /* end of switch */ 303 } /* end of if (found_it) */ 304 if (*buf_ptr == '(' && ps.tos <= 1 && ps.ind_level == 0) { 305 register char *tp = buf_ptr; 306 while (tp < buf_end) 307 if (*tp++ == ')' && (*tp == ';' || *tp == ',')) 308 goto not_proc; 309 strncpy(ps.procname, token, sizeof ps.procname - 1); 310 ps.in_parameter_declaration = 1; 311 rparen_count = 1; 312 not_proc:; 313 } 314 /* 315 * The following hack attempts to guess whether or not the current 316 * token is in fact a declaration keyword -- one that has been 317 * typedefd 318 */ 319 if (((*buf_ptr == '*' && buf_ptr[1] != '=') || isalpha(*buf_ptr) || *buf_ptr == '_') 320 && !ps.p_l_follow 321 && !ps.block_init 322 && (ps.last_token == rparen || ps.last_token == semicolon || 323 ps.last_token == decl || 324 ps.last_token == lbrace || ps.last_token == rbrace)) { 325 ps.its_a_keyword = true; 326 ps.last_u_d = true; 327 last_code = decl; 328 return decl; 329 } 330 if (last_code == decl) /* if this is a declared variable, then 331 * following sign is unary */ 332 ps.last_u_d = true; /* will make "int a -1" work */ 333 last_code = ident; 334 return (ident); /* the ident is not in the list */ 335 } /* end of procesing for alpanum character */ 336 337 /* Scan a non-alphanumeric token */ 338 339 *e_token++ = *buf_ptr; /* if it is only a one-character token, it is 340 * moved here */ 341 *e_token = '\0'; 342 if (++buf_ptr >= buf_end) 343 fill_buffer(); 344 345 switch (*token) { 346 case '\n': 347 unary_delim = ps.last_u_d; 348 ps.last_nl = true; /* remember that we just had a newline */ 349 code = (had_eof ? 0 : newline); 350 351 /* 352 * if data has been exausted, the newline is a dummy, and we should 353 * return code to stop 354 */ 355 break; 356 357 case '\'': /* start of quoted character */ 358 case '"': /* start of string */ 359 qchar = *token; 360 if (troff) { 361 e_token[-1] = '`'; 362 if (qchar == '"') 363 *e_token++ = '`'; 364 e_token = chfont(&bodyf, &stringf, e_token); 365 } 366 do { /* copy the string */ 367 while (1) { /* move one character or [/<char>]<char> */ 368 if (*buf_ptr == '\n') { 369 printf("%d: Unterminated literal\n", line_no); 370 goto stop_lit; 371 } 372 CHECK_SIZE_TOKEN; /* Only have to do this once in this loop, 373 * since CHECK_SIZE guarantees that there 374 * are at least 5 entries left */ 375 *e_token = *buf_ptr++; 376 if (buf_ptr >= buf_end) 377 fill_buffer(); 378 if (*e_token == BACKSLASH) { /* if escape, copy extra char */ 379 if (*buf_ptr == '\n') /* check for escaped newline */ 380 ++line_no; 381 if (troff) { 382 *++e_token = BACKSLASH; 383 if (*buf_ptr == BACKSLASH) 384 *++e_token = BACKSLASH; 385 } 386 *++e_token = *buf_ptr++; 387 ++e_token; /* we must increment this again because we 388 * copied two chars */ 389 if (buf_ptr >= buf_end) 390 fill_buffer(); 391 } 392 else 393 break; /* we copied one character */ 394 } /* end of while (1) */ 395 } while (*e_token++ != qchar); 396 if (troff) { 397 e_token = chfont(&stringf, &bodyf, e_token - 1); 398 if (qchar == '"') 399 *e_token++ = '\''; 400 } 401 stop_lit: 402 code = ident; 403 break; 404 405 case ('('): 406 case ('['): 407 unary_delim = true; 408 code = lparen; 409 break; 410 411 case (')'): 412 case (']'): 413 code = rparen; 414 break; 415 416 case '#': 417 unary_delim = ps.last_u_d; 418 code = preesc; 419 break; 420 421 case '?': 422 unary_delim = true; 423 code = question; 424 break; 425 426 case (':'): 427 code = colon; 428 unary_delim = true; 429 break; 430 431 case (';'): 432 unary_delim = true; 433 code = semicolon; 434 break; 435 436 case ('{'): 437 unary_delim = true; 438 439 /* 440 * if (ps.in_or_st) ps.block_init = 1; 441 */ 442 /* ? code = ps.block_init ? lparen : lbrace; */ 443 code = lbrace; 444 break; 445 446 case ('}'): 447 unary_delim = true; 448 /* ? code = ps.block_init ? rparen : rbrace; */ 449 code = rbrace; 450 break; 451 452 case 014: /* a form feed */ 453 unary_delim = ps.last_u_d; 454 ps.last_nl = true; /* remember this so we can set 'ps.col_1' 455 * right */ 456 code = form_feed; 457 break; 458 459 case (','): 460 unary_delim = true; 461 code = comma; 462 break; 463 464 case '.': 465 unary_delim = false; 466 code = period; 467 break; 468 469 case '-': 470 case '+': /* check for -, +, --, ++ */ 471 code = (ps.last_u_d ? unary_op : binary_op); 472 unary_delim = true; 473 474 if (*buf_ptr == token[0]) { 475 /* check for doubled character */ 476 *e_token++ = *buf_ptr++; 477 /* buffer overflow will be checked at end of loop */ 478 if (last_code == ident || last_code == rparen) { 479 code = (ps.last_u_d ? unary_op : postop); 480 /* check for following ++ or -- */ 481 unary_delim = false; 482 } 483 } 484 else if (*buf_ptr == '=') 485 /* check for operator += */ 486 *e_token++ = *buf_ptr++; 487 else if (*buf_ptr == '>') { 488 /* check for operator -> */ 489 *e_token++ = *buf_ptr++; 490 if (!pointer_as_binop) { 491 unary_delim = false; 492 code = unary_op; 493 ps.want_blank = false; 494 } 495 } 496 break; /* buffer overflow will be checked at end of 497 * switch */ 498 499 case '=': 500 if (ps.in_or_st) 501 ps.block_init = 1; 502 #ifdef undef 503 if (chartype[*buf_ptr] == opchar) { /* we have two char assignment */ 504 e_token[-1] = *buf_ptr++; 505 if ((e_token[-1] == '<' || e_token[-1] == '>') && e_token[-1] == *buf_ptr) 506 *e_token++ = *buf_ptr++; 507 *e_token++ = '='; /* Flip =+ to += */ 508 *e_token = 0; 509 } 510 #else 511 if (*buf_ptr == '=') {/* == */ 512 *e_token++ = '='; /* Flip =+ to += */ 513 buf_ptr++; 514 *e_token = 0; 515 } 516 #endif 517 code = binary_op; 518 unary_delim = true; 519 break; 520 /* can drop thru!!! */ 521 522 case '>': 523 case '<': 524 case '!': /* ops like <, <<, <=, !=, etc */ 525 if (*buf_ptr == '>' || *buf_ptr == '<' || *buf_ptr == '=') { 526 *e_token++ = *buf_ptr; 527 if (++buf_ptr >= buf_end) 528 fill_buffer(); 529 } 530 if (*buf_ptr == '=') 531 *e_token++ = *buf_ptr++; 532 code = (ps.last_u_d ? unary_op : binary_op); 533 unary_delim = true; 534 break; 535 536 default: 537 if (token[0] == '/' && *buf_ptr == '*') { 538 /* it is start of comment */ 539 *e_token++ = '*'; 540 541 if (++buf_ptr >= buf_end) 542 fill_buffer(); 543 544 code = comment; 545 unary_delim = ps.last_u_d; 546 break; 547 } 548 while (*(e_token - 1) == *buf_ptr || *buf_ptr == '=') { 549 /* 550 * handle ||, &&, etc, and also things as in int *****i 551 */ 552 *e_token++ = *buf_ptr; 553 if (++buf_ptr >= buf_end) 554 fill_buffer(); 555 } 556 code = (ps.last_u_d ? unary_op : binary_op); 557 unary_delim = true; 558 559 560 } /* end of switch */ 561 if (code != newline) { 562 l_struct = false; 563 last_code = code; 564 } 565 if (buf_ptr >= buf_end) /* check for input buffer empty */ 566 fill_buffer(); 567 ps.last_u_d = unary_delim; 568 *e_token = '\0'; /* null terminate the token */ 569 return (code); 570 } 571 572 /* 573 * Add the given keyword to the keyword table, using val as the keyword type 574 */ 575 addkey(key, val) 576 char *key; 577 { 578 register struct templ *p = specials; 579 while (p->rwd) 580 if (p->rwd[0] == key[0] && strcmp(p->rwd, key) == 0) 581 return; 582 else 583 p++; 584 if (p >= specials + sizeof specials / sizeof specials[0]) 585 return; /* For now, table overflows are silently 586 * ignored */ 587 p->rwd = key; 588 p->rwcode = val; 589 p[1].rwd = 0; 590 p[1].rwcode = 0; 591 return; 592 } 593