1 /* 2 * Copyright (c) 1985 Sun Microsystems, Inc. 3 * Copyright (c) 1980, 1993 4 * The Regents of the University of California. All rights reserved. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 3. All advertising materials mentioning features or use of this software 16 * must display the following acknowledgement: 17 * This product includes software developed by the University of 18 * California, Berkeley and its contributors. 19 * 4. Neither the name of the University nor the names of its contributors 20 * may be used to endorse or promote products derived from this software 21 * without specific prior written permission. 22 * 23 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 24 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 25 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 26 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 27 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 28 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 29 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 30 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 31 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 32 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 33 * SUCH DAMAGE. 34 */ 35 36 #ifndef lint 37 static char sccsid[] = "@(#)lexi.c 8.1 (Berkeley) 6/6/93"; 38 #endif /* not lint */ 39 40 /* 41 * Here we have the token scanner for indent. It scans off one token and puts 42 * it in the global variable "token". It returns a code, indicating the type 43 * of token scanned. 44 */ 45 46 #include <stdio.h> 47 #include <ctype.h> 48 #include <stdlib.h> 49 #include <string.h> 50 #include "indent_globs.h" 51 #include "indent_codes.h" 52 53 #define alphanum 1 54 #define opchar 3 55 56 struct templ { 57 char *rwd; 58 int rwcode; 59 }; 60 61 struct templ specials[100] = 62 { 63 "switch", 1, 64 "case", 2, 65 "break", 0, 66 "struct", 3, 67 "union", 3, 68 "enum", 3, 69 "default", 2, 70 "int", 4, 71 "char", 4, 72 "float", 4, 73 "double", 4, 74 "long", 4, 75 "short", 4, 76 "typdef", 4, 77 "unsigned", 4, 78 "register", 4, 79 "static", 4, 80 "global", 4, 81 "extern", 4, 82 "void", 4, 83 "goto", 0, 84 "return", 0, 85 "if", 5, 86 "while", 5, 87 "for", 5, 88 "else", 6, 89 "do", 6, 90 "sizeof", 7, 91 0, 0 92 }; 93 94 char chartype[128] = 95 { /* this is used to facilitate the decision of 96 * what type (alphanumeric, operator) each 97 * character is */ 98 0, 0, 0, 0, 0, 0, 0, 0, 99 0, 0, 0, 0, 0, 0, 0, 0, 100 0, 0, 0, 0, 0, 0, 0, 0, 101 0, 0, 0, 0, 0, 0, 0, 0, 102 0, 3, 0, 0, 1, 3, 3, 0, 103 0, 0, 3, 3, 0, 3, 0, 3, 104 1, 1, 1, 1, 1, 1, 1, 1, 105 1, 1, 0, 0, 3, 3, 3, 3, 106 0, 1, 1, 1, 1, 1, 1, 1, 107 1, 1, 1, 1, 1, 1, 1, 1, 108 1, 1, 1, 1, 1, 1, 1, 1, 109 1, 1, 1, 0, 0, 0, 3, 1, 110 0, 1, 1, 1, 1, 1, 1, 1, 111 1, 1, 1, 1, 1, 1, 1, 1, 112 1, 1, 1, 1, 1, 1, 1, 1, 113 1, 1, 1, 0, 3, 0, 3, 0 114 }; 115 116 117 118 119 int 120 lexi() 121 { 122 int unary_delim; /* this is set to 1 if the current token 123 * 124 * forces a following operator to be unary */ 125 static int last_code; /* the last token type returned */ 126 static int l_struct; /* set to 1 if the last token was 'struct' */ 127 int code; /* internal code to be returned */ 128 char qchar; /* the delimiter character for a string */ 129 130 e_token = s_token; /* point to start of place to save token */ 131 unary_delim = false; 132 ps.col_1 = ps.last_nl; /* tell world that this token started in 133 * column 1 iff the last thing scanned was nl */ 134 ps.last_nl = false; 135 136 while (*buf_ptr == ' ' || *buf_ptr == '\t') { /* get rid of blanks */ 137 ps.col_1 = false; /* leading blanks imply token is not in column 138 * 1 */ 139 if (++buf_ptr >= buf_end) 140 fill_buffer(); 141 } 142 143 /* Scan an alphanumeric token */ 144 if (chartype[*buf_ptr] == alphanum || buf_ptr[0] == '.' && isdigit(buf_ptr[1])) { 145 /* 146 * we have a character or number 147 */ 148 register char *j; /* used for searching thru list of 149 * 150 * reserved words */ 151 register struct templ *p; 152 153 if (isdigit(*buf_ptr) || buf_ptr[0] == '.' && isdigit(buf_ptr[1])) { 154 int seendot = 0, 155 seenexp = 0; 156 if (*buf_ptr == '0' && 157 (buf_ptr[1] == 'x' || buf_ptr[1] == 'X')) { 158 *e_token++ = *buf_ptr++; 159 *e_token++ = *buf_ptr++; 160 while (isxdigit(*buf_ptr)) { 161 CHECK_SIZE_TOKEN; 162 *e_token++ = *buf_ptr++; 163 } 164 } 165 else 166 while (1) { 167 if (*buf_ptr == '.') 168 if (seendot) 169 break; 170 else 171 seendot++; 172 CHECK_SIZE_TOKEN; 173 *e_token++ = *buf_ptr++; 174 if (!isdigit(*buf_ptr) && *buf_ptr != '.') 175 if ((*buf_ptr != 'E' && *buf_ptr != 'e') || seenexp) 176 break; 177 else { 178 seenexp++; 179 seendot++; 180 CHECK_SIZE_TOKEN; 181 *e_token++ = *buf_ptr++; 182 if (*buf_ptr == '+' || *buf_ptr == '-') 183 *e_token++ = *buf_ptr++; 184 } 185 } 186 if (*buf_ptr == 'L' || *buf_ptr == 'l') 187 *e_token++ = *buf_ptr++; 188 } 189 else 190 while (chartype[*buf_ptr] == alphanum) { /* copy it over */ 191 CHECK_SIZE_TOKEN; 192 *e_token++ = *buf_ptr++; 193 if (buf_ptr >= buf_end) 194 fill_buffer(); 195 } 196 *e_token++ = '\0'; 197 while (*buf_ptr == ' ' || *buf_ptr == '\t') { /* get rid of blanks */ 198 if (++buf_ptr >= buf_end) 199 fill_buffer(); 200 } 201 ps.its_a_keyword = false; 202 ps.sizeof_keyword = false; 203 if (l_struct) { /* if last token was 'struct', then this token 204 * should be treated as a declaration */ 205 l_struct = false; 206 last_code = ident; 207 ps.last_u_d = true; 208 return (decl); 209 } 210 ps.last_u_d = false; /* Operator after indentifier is binary */ 211 last_code = ident; /* Remember that this is the code we will 212 * return */ 213 214 /* 215 * This loop will check if the token is a keyword. 216 */ 217 for (p = specials; (j = p->rwd) != 0; p++) { 218 register char *p = s_token; /* point at scanned token */ 219 if (*j++ != *p++ || *j++ != *p++) 220 continue; /* This test depends on the fact that 221 * identifiers are always at least 1 character 222 * long (ie. the first two bytes of the 223 * identifier are always meaningful) */ 224 if (p[-1] == 0) 225 break; /* If its a one-character identifier */ 226 while (*p++ == *j) 227 if (*j++ == 0) 228 goto found_keyword; /* I wish that C had a multi-level 229 * break... */ 230 } 231 if (p->rwd) { /* we have a keyword */ 232 found_keyword: 233 ps.its_a_keyword = true; 234 ps.last_u_d = true; 235 switch (p->rwcode) { 236 case 1: /* it is a switch */ 237 return (swstmt); 238 case 2: /* a case or default */ 239 return (casestmt); 240 241 case 3: /* a "struct" */ 242 if (ps.p_l_follow) 243 break; /* inside parens: cast */ 244 l_struct = true; 245 246 /* 247 * Next time around, we will want to know that we have had a 248 * 'struct' 249 */ 250 case 4: /* one of the declaration keywords */ 251 if (ps.p_l_follow) { 252 ps.cast_mask |= 1 << ps.p_l_follow; 253 break; /* inside parens: cast */ 254 } 255 last_code = decl; 256 return (decl); 257 258 case 5: /* if, while, for */ 259 return (sp_paren); 260 261 case 6: /* do, else */ 262 return (sp_nparen); 263 264 case 7: 265 ps.sizeof_keyword = true; 266 default: /* all others are treated like any other 267 * identifier */ 268 return (ident); 269 } /* end of switch */ 270 } /* end of if (found_it) */ 271 if (*buf_ptr == '(' && ps.tos <= 1 && ps.ind_level == 0) { 272 register char *tp = buf_ptr; 273 while (tp < buf_end) 274 if (*tp++ == ')' && (*tp == ';' || *tp == ',')) 275 goto not_proc; 276 strncpy(ps.procname, token, sizeof ps.procname - 1); 277 ps.in_parameter_declaration = 1; 278 rparen_count = 1; 279 not_proc:; 280 } 281 /* 282 * The following hack attempts to guess whether or not the current 283 * token is in fact a declaration keyword -- one that has been 284 * typedefd 285 */ 286 if (((*buf_ptr == '*' && buf_ptr[1] != '=') || isalpha(*buf_ptr) || *buf_ptr == '_') 287 && !ps.p_l_follow 288 && !ps.block_init 289 && (ps.last_token == rparen || ps.last_token == semicolon || 290 ps.last_token == decl || 291 ps.last_token == lbrace || ps.last_token == rbrace)) { 292 ps.its_a_keyword = true; 293 ps.last_u_d = true; 294 last_code = decl; 295 return decl; 296 } 297 if (last_code == decl) /* if this is a declared variable, then 298 * following sign is unary */ 299 ps.last_u_d = true; /* will make "int a -1" work */ 300 last_code = ident; 301 return (ident); /* the ident is not in the list */ 302 } /* end of procesing for alpanum character */ 303 304 /* Scan a non-alphanumeric token */ 305 306 *e_token++ = *buf_ptr; /* if it is only a one-character token, it is 307 * moved here */ 308 *e_token = '\0'; 309 if (++buf_ptr >= buf_end) 310 fill_buffer(); 311 312 switch (*token) { 313 case '\n': 314 unary_delim = ps.last_u_d; 315 ps.last_nl = true; /* remember that we just had a newline */ 316 code = (had_eof ? 0 : newline); 317 318 /* 319 * if data has been exausted, the newline is a dummy, and we should 320 * return code to stop 321 */ 322 break; 323 324 case '\'': /* start of quoted character */ 325 case '"': /* start of string */ 326 qchar = *token; 327 if (troff) { 328 e_token[-1] = '`'; 329 if (qchar == '"') 330 *e_token++ = '`'; 331 e_token = chfont(&bodyf, &stringf, e_token); 332 } 333 do { /* copy the string */ 334 while (1) { /* move one character or [/<char>]<char> */ 335 if (*buf_ptr == '\n') { 336 printf("%d: Unterminated literal\n", line_no); 337 goto stop_lit; 338 } 339 CHECK_SIZE_TOKEN; /* Only have to do this once in this loop, 340 * since CHECK_SIZE guarantees that there 341 * are at least 5 entries left */ 342 *e_token = *buf_ptr++; 343 if (buf_ptr >= buf_end) 344 fill_buffer(); 345 if (*e_token == BACKSLASH) { /* if escape, copy extra char */ 346 if (*buf_ptr == '\n') /* check for escaped newline */ 347 ++line_no; 348 if (troff) { 349 *++e_token = BACKSLASH; 350 if (*buf_ptr == BACKSLASH) 351 *++e_token = BACKSLASH; 352 } 353 *++e_token = *buf_ptr++; 354 ++e_token; /* we must increment this again because we 355 * copied two chars */ 356 if (buf_ptr >= buf_end) 357 fill_buffer(); 358 } 359 else 360 break; /* we copied one character */ 361 } /* end of while (1) */ 362 } while (*e_token++ != qchar); 363 if (troff) { 364 e_token = chfont(&stringf, &bodyf, e_token - 1); 365 if (qchar == '"') 366 *e_token++ = '\''; 367 } 368 stop_lit: 369 code = ident; 370 break; 371 372 case ('('): 373 case ('['): 374 unary_delim = true; 375 code = lparen; 376 break; 377 378 case (')'): 379 case (']'): 380 code = rparen; 381 break; 382 383 case '#': 384 unary_delim = ps.last_u_d; 385 code = preesc; 386 break; 387 388 case '?': 389 unary_delim = true; 390 code = question; 391 break; 392 393 case (':'): 394 code = colon; 395 unary_delim = true; 396 break; 397 398 case (';'): 399 unary_delim = true; 400 code = semicolon; 401 break; 402 403 case ('{'): 404 unary_delim = true; 405 406 /* 407 * if (ps.in_or_st) ps.block_init = 1; 408 */ 409 /* ? code = ps.block_init ? lparen : lbrace; */ 410 code = lbrace; 411 break; 412 413 case ('}'): 414 unary_delim = true; 415 /* ? code = ps.block_init ? rparen : rbrace; */ 416 code = rbrace; 417 break; 418 419 case 014: /* a form feed */ 420 unary_delim = ps.last_u_d; 421 ps.last_nl = true; /* remember this so we can set 'ps.col_1' 422 * right */ 423 code = form_feed; 424 break; 425 426 case (','): 427 unary_delim = true; 428 code = comma; 429 break; 430 431 case '.': 432 unary_delim = false; 433 code = period; 434 break; 435 436 case '-': 437 case '+': /* check for -, +, --, ++ */ 438 code = (ps.last_u_d ? unary_op : binary_op); 439 unary_delim = true; 440 441 if (*buf_ptr == token[0]) { 442 /* check for doubled character */ 443 *e_token++ = *buf_ptr++; 444 /* buffer overflow will be checked at end of loop */ 445 if (last_code == ident || last_code == rparen) { 446 code = (ps.last_u_d ? unary_op : postop); 447 /* check for following ++ or -- */ 448 unary_delim = false; 449 } 450 } 451 else if (*buf_ptr == '=') 452 /* check for operator += */ 453 *e_token++ = *buf_ptr++; 454 else if (*buf_ptr == '>') { 455 /* check for operator -> */ 456 *e_token++ = *buf_ptr++; 457 if (!pointer_as_binop) { 458 unary_delim = false; 459 code = unary_op; 460 ps.want_blank = false; 461 } 462 } 463 break; /* buffer overflow will be checked at end of 464 * switch */ 465 466 case '=': 467 if (ps.in_or_st) 468 ps.block_init = 1; 469 #ifdef undef 470 if (chartype[*buf_ptr] == opchar) { /* we have two char assignment */ 471 e_token[-1] = *buf_ptr++; 472 if ((e_token[-1] == '<' || e_token[-1] == '>') && e_token[-1] == *buf_ptr) 473 *e_token++ = *buf_ptr++; 474 *e_token++ = '='; /* Flip =+ to += */ 475 *e_token = 0; 476 } 477 #else 478 if (*buf_ptr == '=') {/* == */ 479 *e_token++ = '='; /* Flip =+ to += */ 480 buf_ptr++; 481 *e_token = 0; 482 } 483 #endif 484 code = binary_op; 485 unary_delim = true; 486 break; 487 /* can drop thru!!! */ 488 489 case '>': 490 case '<': 491 case '!': /* ops like <, <<, <=, !=, etc */ 492 if (*buf_ptr == '>' || *buf_ptr == '<' || *buf_ptr == '=') { 493 *e_token++ = *buf_ptr; 494 if (++buf_ptr >= buf_end) 495 fill_buffer(); 496 } 497 if (*buf_ptr == '=') 498 *e_token++ = *buf_ptr++; 499 code = (ps.last_u_d ? unary_op : binary_op); 500 unary_delim = true; 501 break; 502 503 default: 504 if (token[0] == '/' && *buf_ptr == '*') { 505 /* it is start of comment */ 506 *e_token++ = '*'; 507 508 if (++buf_ptr >= buf_end) 509 fill_buffer(); 510 511 code = comment; 512 unary_delim = ps.last_u_d; 513 break; 514 } 515 while (*(e_token - 1) == *buf_ptr || *buf_ptr == '=') { 516 /* 517 * handle ||, &&, etc, and also things as in int *****i 518 */ 519 *e_token++ = *buf_ptr; 520 if (++buf_ptr >= buf_end) 521 fill_buffer(); 522 } 523 code = (ps.last_u_d ? unary_op : binary_op); 524 unary_delim = true; 525 526 527 } /* end of switch */ 528 if (code != newline) { 529 l_struct = false; 530 last_code = code; 531 } 532 if (buf_ptr >= buf_end) /* check for input buffer empty */ 533 fill_buffer(); 534 ps.last_u_d = unary_delim; 535 *e_token = '\0'; /* null terminate the token */ 536 return (code); 537 } 538 539 /* 540 * Add the given keyword to the keyword table, using val as the keyword type 541 */ 542 addkey(key, val) 543 char *key; 544 { 545 register struct templ *p = specials; 546 while (p->rwd) 547 if (p->rwd[0] == key[0] && strcmp(p->rwd, key) == 0) 548 return; 549 else 550 p++; 551 if (p >= specials + sizeof specials / sizeof specials[0]) 552 return; /* For now, table overflows are silently 553 * ignored */ 554 p->rwd = key; 555 p->rwcode = val; 556 p[1].rwd = 0; 557 p[1].rwcode = 0; 558 return; 559 } 560