1 /* 2 * Copyright (c) 1985 Sun Microsystems, Inc. 3 * Copyright (c) 1980, 1993 4 * The Regents of the University of California. All rights reserved. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 3. All advertising materials mentioning features or use of this software 16 * must display the following acknowledgement: 17 * This product includes software developed by the University of 18 * California, Berkeley and its contributors. 19 * 4. Neither the name of the University nor the names of its contributors 20 * may be used to endorse or promote products derived from this software 21 * without specific prior written permission. 22 * 23 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 24 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 25 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 26 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 27 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 28 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 29 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 30 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 31 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 32 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 33 * SUCH DAMAGE. 34 */ 35 36 #ifndef lint 37 static char sccsid[] = "@(#)lexi.c 8.1 (Berkeley) 6/6/93"; 38 #endif /* not lint */ 39 40 /* 41 * Here we have the token scanner for indent. It scans off one token and puts 42 * it in the global variable "token". It returns a code, indicating the type 43 * of token scanned. 44 */ 45 46 #include <stdio.h> 47 #include <ctype.h> 48 #include <stdlib.h> 49 #include <string.h> 50 #include "indent_globs.h" 51 #include "indent_codes.h" 52 53 #define alphanum 1 54 #define opchar 3 55 56 struct templ { 57 char *rwd; 58 int rwcode; 59 }; 60 61 struct templ specials[100] = 62 { 63 "switch", 1, 64 "case", 2, 65 "break", 0, 66 "struct", 3, 67 "union", 3, 68 "enum", 3, 69 "default", 2, 70 "int", 4, 71 "char", 4, 72 "float", 4, 73 "double", 4, 74 "long", 4, 75 "short", 4, 76 "typdef", 4, 77 "unsigned", 4, 78 "register", 4, 79 "static", 4, 80 "global", 4, 81 "extern", 4, 82 "void", 4, 83 "goto", 0, 84 "return", 0, 85 "if", 5, 86 "while", 5, 87 "for", 5, 88 "else", 6, 89 "do", 6, 90 "sizeof", 7, 91 0, 0 92 }; 93 94 char chartype[128] = 95 { /* this is used to facilitate the decision of 96 * what type (alphanumeric, operator) each 97 * character is */ 98 0, 0, 0, 0, 0, 0, 0, 0, 99 0, 0, 0, 0, 0, 0, 0, 0, 100 0, 0, 0, 0, 0, 0, 0, 0, 101 0, 0, 0, 0, 0, 0, 0, 0, 102 0, 3, 0, 0, 1, 3, 3, 0, 103 0, 0, 3, 3, 0, 3, 0, 3, 104 1, 1, 1, 1, 1, 1, 1, 1, 105 1, 1, 0, 0, 3, 3, 3, 3, 106 0, 1, 1, 1, 1, 1, 1, 1, 107 1, 1, 1, 1, 1, 1, 1, 1, 108 1, 1, 1, 1, 1, 1, 1, 1, 109 1, 1, 1, 0, 0, 0, 3, 1, 110 0, 1, 1, 1, 1, 1, 1, 1, 111 1, 1, 1, 1, 1, 1, 1, 1, 112 1, 1, 1, 1, 1, 1, 1, 1, 113 1, 1, 1, 0, 3, 0, 3, 0 114 }; 115 116 117 118 119 int 120 lexi() 121 { 122 int unary_delim; /* this is set to 1 if the current token 123 * 124 * forces a following operator to be unary */ 125 static int last_code; /* the last token type returned */ 126 static int l_struct; /* set to 1 if the last token was 'struct' */ 127 int code; /* internal code to be returned */ 128 char qchar; /* the delimiter character for a string */ 129 130 e_token = s_token; /* point to start of place to save token */ 131 unary_delim = false; 132 ps.col_1 = ps.last_nl; /* tell world that this token started in 133 * column 1 iff the last thing scanned was nl */ 134 ps.last_nl = false; 135 136 while (*buf_ptr == ' ' || *buf_ptr == '\t') { /* get rid of blanks */ 137 ps.col_1 = false; /* leading blanks imply token is not in column 138 * 1 */ 139 if (++buf_ptr >= buf_end) 140 fill_buffer(); 141 } 142 143 /* Scan an alphanumeric token */ 144 if (chartype[*buf_ptr] == alphanum || buf_ptr[0] == '.' && isdigit(buf_ptr[1])) { 145 /* 146 * we have a character or number 147 */ 148 register char *j; /* used for searching thru list of 149 * 150 * reserved words */ 151 register struct templ *p; 152 153 if (isdigit(*buf_ptr) || buf_ptr[0] == '.' && isdigit(buf_ptr[1])) { 154 int seendot = 0, 155 seenexp = 0, 156 seensfx = 0; 157 if (*buf_ptr == '0' && 158 (buf_ptr[1] == 'x' || buf_ptr[1] == 'X')) { 159 *e_token++ = *buf_ptr++; 160 *e_token++ = *buf_ptr++; 161 while (isxdigit(*buf_ptr)) { 162 CHECK_SIZE_TOKEN; 163 *e_token++ = *buf_ptr++; 164 } 165 } 166 else 167 while (1) { 168 if (*buf_ptr == '.') 169 if (seendot) 170 break; 171 else 172 seendot++; 173 CHECK_SIZE_TOKEN; 174 *e_token++ = *buf_ptr++; 175 if (!isdigit(*buf_ptr) && *buf_ptr != '.') 176 if ((*buf_ptr != 'E' && *buf_ptr != 'e') || seenexp) 177 break; 178 else { 179 seenexp++; 180 seendot++; 181 CHECK_SIZE_TOKEN; 182 *e_token++ = *buf_ptr++; 183 if (*buf_ptr == '+' || *buf_ptr == '-') 184 *e_token++ = *buf_ptr++; 185 } 186 } 187 while (1) { 188 if (!(seensfx & 1) && 189 (*buf_ptr == 'U' || *buf_ptr == 'u')) { 190 CHECK_SIZE_TOKEN; 191 *e_token++ = *buf_ptr++; 192 seensfx |= 1; 193 continue; 194 } 195 if (!(seensfx & 2) && 196 (*buf_ptr == 'L' || *buf_ptr == 'l')) { 197 CHECK_SIZE_TOKEN; 198 if (buf_ptr[1] == buf_ptr[0]) 199 *e_token++ = *buf_ptr++; 200 *e_token++ = *buf_ptr++; 201 seensfx |= 2; 202 continue; 203 } 204 break; 205 } 206 } 207 else 208 while (chartype[*buf_ptr] == alphanum) { /* copy it over */ 209 CHECK_SIZE_TOKEN; 210 *e_token++ = *buf_ptr++; 211 if (buf_ptr >= buf_end) 212 fill_buffer(); 213 } 214 *e_token++ = '\0'; 215 while (*buf_ptr == ' ' || *buf_ptr == '\t') { /* get rid of blanks */ 216 if (++buf_ptr >= buf_end) 217 fill_buffer(); 218 } 219 ps.its_a_keyword = false; 220 ps.sizeof_keyword = false; 221 if (l_struct) { /* if last token was 'struct', then this token 222 * should be treated as a declaration */ 223 l_struct = false; 224 last_code = ident; 225 ps.last_u_d = true; 226 return (decl); 227 } 228 ps.last_u_d = false; /* Operator after indentifier is binary */ 229 last_code = ident; /* Remember that this is the code we will 230 * return */ 231 232 /* 233 * This loop will check if the token is a keyword. 234 */ 235 for (p = specials; (j = p->rwd) != 0; p++) { 236 register char *p = s_token; /* point at scanned token */ 237 if (*j++ != *p++ || *j++ != *p++) 238 continue; /* This test depends on the fact that 239 * identifiers are always at least 1 character 240 * long (ie. the first two bytes of the 241 * identifier are always meaningful) */ 242 if (p[-1] == 0) 243 break; /* If its a one-character identifier */ 244 while (*p++ == *j) 245 if (*j++ == 0) 246 goto found_keyword; /* I wish that C had a multi-level 247 * break... */ 248 } 249 if (p->rwd) { /* we have a keyword */ 250 found_keyword: 251 ps.its_a_keyword = true; 252 ps.last_u_d = true; 253 switch (p->rwcode) { 254 case 1: /* it is a switch */ 255 return (swstmt); 256 case 2: /* a case or default */ 257 return (casestmt); 258 259 case 3: /* a "struct" */ 260 if (ps.p_l_follow) 261 break; /* inside parens: cast */ 262 l_struct = true; 263 264 /* 265 * Next time around, we will want to know that we have had a 266 * 'struct' 267 */ 268 case 4: /* one of the declaration keywords */ 269 if (ps.p_l_follow) { 270 ps.cast_mask |= 1 << ps.p_l_follow; 271 break; /* inside parens: cast */ 272 } 273 last_code = decl; 274 return (decl); 275 276 case 5: /* if, while, for */ 277 return (sp_paren); 278 279 case 6: /* do, else */ 280 return (sp_nparen); 281 282 case 7: 283 ps.sizeof_keyword = true; 284 default: /* all others are treated like any other 285 * identifier */ 286 return (ident); 287 } /* end of switch */ 288 } /* end of if (found_it) */ 289 if (*buf_ptr == '(' && ps.tos <= 1 && ps.ind_level == 0) { 290 register char *tp = buf_ptr; 291 while (tp < buf_end) 292 if (*tp++ == ')' && (*tp == ';' || *tp == ',')) 293 goto not_proc; 294 strncpy(ps.procname, token, sizeof ps.procname - 1); 295 ps.in_parameter_declaration = 1; 296 rparen_count = 1; 297 not_proc:; 298 } 299 /* 300 * The following hack attempts to guess whether or not the current 301 * token is in fact a declaration keyword -- one that has been 302 * typedefd 303 */ 304 if (((*buf_ptr == '*' && buf_ptr[1] != '=') || isalpha(*buf_ptr) || *buf_ptr == '_') 305 && !ps.p_l_follow 306 && !ps.block_init 307 && (ps.last_token == rparen || ps.last_token == semicolon || 308 ps.last_token == decl || 309 ps.last_token == lbrace || ps.last_token == rbrace)) { 310 ps.its_a_keyword = true; 311 ps.last_u_d = true; 312 last_code = decl; 313 return decl; 314 } 315 if (last_code == decl) /* if this is a declared variable, then 316 * following sign is unary */ 317 ps.last_u_d = true; /* will make "int a -1" work */ 318 last_code = ident; 319 return (ident); /* the ident is not in the list */ 320 } /* end of procesing for alpanum character */ 321 322 /* Scan a non-alphanumeric token */ 323 324 *e_token++ = *buf_ptr; /* if it is only a one-character token, it is 325 * moved here */ 326 *e_token = '\0'; 327 if (++buf_ptr >= buf_end) 328 fill_buffer(); 329 330 switch (*token) { 331 case '\n': 332 unary_delim = ps.last_u_d; 333 ps.last_nl = true; /* remember that we just had a newline */ 334 code = (had_eof ? 0 : newline); 335 336 /* 337 * if data has been exausted, the newline is a dummy, and we should 338 * return code to stop 339 */ 340 break; 341 342 case '\'': /* start of quoted character */ 343 case '"': /* start of string */ 344 qchar = *token; 345 if (troff) { 346 e_token[-1] = '`'; 347 if (qchar == '"') 348 *e_token++ = '`'; 349 e_token = chfont(&bodyf, &stringf, e_token); 350 } 351 do { /* copy the string */ 352 while (1) { /* move one character or [/<char>]<char> */ 353 if (*buf_ptr == '\n') { 354 printf("%d: Unterminated literal\n", line_no); 355 goto stop_lit; 356 } 357 CHECK_SIZE_TOKEN; /* Only have to do this once in this loop, 358 * since CHECK_SIZE guarantees that there 359 * are at least 5 entries left */ 360 *e_token = *buf_ptr++; 361 if (buf_ptr >= buf_end) 362 fill_buffer(); 363 if (*e_token == BACKSLASH) { /* if escape, copy extra char */ 364 if (*buf_ptr == '\n') /* check for escaped newline */ 365 ++line_no; 366 if (troff) { 367 *++e_token = BACKSLASH; 368 if (*buf_ptr == BACKSLASH) 369 *++e_token = BACKSLASH; 370 } 371 *++e_token = *buf_ptr++; 372 ++e_token; /* we must increment this again because we 373 * copied two chars */ 374 if (buf_ptr >= buf_end) 375 fill_buffer(); 376 } 377 else 378 break; /* we copied one character */ 379 } /* end of while (1) */ 380 } while (*e_token++ != qchar); 381 if (troff) { 382 e_token = chfont(&stringf, &bodyf, e_token - 1); 383 if (qchar == '"') 384 *e_token++ = '\''; 385 } 386 stop_lit: 387 code = ident; 388 break; 389 390 case ('('): 391 case ('['): 392 unary_delim = true; 393 code = lparen; 394 break; 395 396 case (')'): 397 case (']'): 398 code = rparen; 399 break; 400 401 case '#': 402 unary_delim = ps.last_u_d; 403 code = preesc; 404 break; 405 406 case '?': 407 unary_delim = true; 408 code = question; 409 break; 410 411 case (':'): 412 code = colon; 413 unary_delim = true; 414 break; 415 416 case (';'): 417 unary_delim = true; 418 code = semicolon; 419 break; 420 421 case ('{'): 422 unary_delim = true; 423 424 /* 425 * if (ps.in_or_st) ps.block_init = 1; 426 */ 427 /* ? code = ps.block_init ? lparen : lbrace; */ 428 code = lbrace; 429 break; 430 431 case ('}'): 432 unary_delim = true; 433 /* ? code = ps.block_init ? rparen : rbrace; */ 434 code = rbrace; 435 break; 436 437 case 014: /* a form feed */ 438 unary_delim = ps.last_u_d; 439 ps.last_nl = true; /* remember this so we can set 'ps.col_1' 440 * right */ 441 code = form_feed; 442 break; 443 444 case (','): 445 unary_delim = true; 446 code = comma; 447 break; 448 449 case '.': 450 unary_delim = false; 451 code = period; 452 break; 453 454 case '-': 455 case '+': /* check for -, +, --, ++ */ 456 code = (ps.last_u_d ? unary_op : binary_op); 457 unary_delim = true; 458 459 if (*buf_ptr == token[0]) { 460 /* check for doubled character */ 461 *e_token++ = *buf_ptr++; 462 /* buffer overflow will be checked at end of loop */ 463 if (last_code == ident || last_code == rparen) { 464 code = (ps.last_u_d ? unary_op : postop); 465 /* check for following ++ or -- */ 466 unary_delim = false; 467 } 468 } 469 else if (*buf_ptr == '=') 470 /* check for operator += */ 471 *e_token++ = *buf_ptr++; 472 else if (*buf_ptr == '>') { 473 /* check for operator -> */ 474 *e_token++ = *buf_ptr++; 475 if (!pointer_as_binop) { 476 unary_delim = false; 477 code = unary_op; 478 ps.want_blank = false; 479 } 480 } 481 break; /* buffer overflow will be checked at end of 482 * switch */ 483 484 case '=': 485 if (ps.in_or_st) 486 ps.block_init = 1; 487 #ifdef undef 488 if (chartype[*buf_ptr] == opchar) { /* we have two char assignment */ 489 e_token[-1] = *buf_ptr++; 490 if ((e_token[-1] == '<' || e_token[-1] == '>') && e_token[-1] == *buf_ptr) 491 *e_token++ = *buf_ptr++; 492 *e_token++ = '='; /* Flip =+ to += */ 493 *e_token = 0; 494 } 495 #else 496 if (*buf_ptr == '=') {/* == */ 497 *e_token++ = '='; /* Flip =+ to += */ 498 buf_ptr++; 499 *e_token = 0; 500 } 501 #endif 502 code = binary_op; 503 unary_delim = true; 504 break; 505 /* can drop thru!!! */ 506 507 case '>': 508 case '<': 509 case '!': /* ops like <, <<, <=, !=, etc */ 510 if (*buf_ptr == '>' || *buf_ptr == '<' || *buf_ptr == '=') { 511 *e_token++ = *buf_ptr; 512 if (++buf_ptr >= buf_end) 513 fill_buffer(); 514 } 515 if (*buf_ptr == '=') 516 *e_token++ = *buf_ptr++; 517 code = (ps.last_u_d ? unary_op : binary_op); 518 unary_delim = true; 519 break; 520 521 default: 522 if (token[0] == '/' && *buf_ptr == '*') { 523 /* it is start of comment */ 524 *e_token++ = '*'; 525 526 if (++buf_ptr >= buf_end) 527 fill_buffer(); 528 529 code = comment; 530 unary_delim = ps.last_u_d; 531 break; 532 } 533 while (*(e_token - 1) == *buf_ptr || *buf_ptr == '=') { 534 /* 535 * handle ||, &&, etc, and also things as in int *****i 536 */ 537 *e_token++ = *buf_ptr; 538 if (++buf_ptr >= buf_end) 539 fill_buffer(); 540 } 541 code = (ps.last_u_d ? unary_op : binary_op); 542 unary_delim = true; 543 544 545 } /* end of switch */ 546 if (code != newline) { 547 l_struct = false; 548 last_code = code; 549 } 550 if (buf_ptr >= buf_end) /* check for input buffer empty */ 551 fill_buffer(); 552 ps.last_u_d = unary_delim; 553 *e_token = '\0'; /* null terminate the token */ 554 return (code); 555 } 556 557 /* 558 * Add the given keyword to the keyword table, using val as the keyword type 559 */ 560 addkey(key, val) 561 char *key; 562 { 563 register struct templ *p = specials; 564 while (p->rwd) 565 if (p->rwd[0] == key[0] && strcmp(p->rwd, key) == 0) 566 return; 567 else 568 p++; 569 if (p >= specials + sizeof specials / sizeof specials[0]) 570 return; /* For now, table overflows are silently 571 * ignored */ 572 p->rwd = key; 573 p->rwcode = val; 574 p[1].rwd = 0; 575 p[1].rwcode = 0; 576 return; 577 } 578