1 /*********************************************************************** 2 * * 3 * This software is part of the ast package * 4 * Copyright (c) 1986-2007 AT&T Knowledge Ventures * 5 * and is licensed under the * 6 * Common Public License, Version 1.0 * 7 * by AT&T Knowledge Ventures * 8 * * 9 * A copy of the License is available at * 10 * http://www.opensource.org/licenses/cpl1.0.txt * 11 * (with md5 checksum 059e8cd6165cb4c31e351f2b69388fd9) * 12 * * 13 * Information and Software Systems Research * 14 * AT&T Research * 15 * Florham Park NJ * 16 * * 17 * Glenn Fowler <gsf@research.att.com> * 18 * * 19 ***********************************************************************/ 20 #pragma prototyped 21 /* 22 * Glenn Fowler 23 * AT&T Research 24 * 25 * preprocessor and proto lexical analyzer fsm 26 * define PROTOMAIN for standalone proto 27 */ 28 29 #include "pplib.h" 30 #include "ppfsm.h" 31 32 /* 33 * lexical FSM encoding 34 * derived from a standalone ansi cpp by Dennis Ritchie 35 * modified for libpp by Glenn Fowler 36 * 37 * fsm[] is initialized from fsminit[]. The encoding is blown out into 38 * fsm[] for time efficiency. When in state state, and one of the 39 * characters in ch arrives, enter nextstate. States >= TERMINAL are 40 * either final, or at least require special action. In fsminit[] there 41 * is a line for each <state,charset,nextstate>. Early entries are 42 * overwritten by later ones. C_XXX is the universal set and should 43 * always be first. Some of the fsminit[] entries are templates for 44 * groups of states. The OP entries trigger the state copies. States 45 * above TERMINAL are represented in fsm[] as negative values. S_TOK and 46 * S_TOKB encode the resulting token type in the upper bits. These actions 47 * differ in that S_TOKB has a lookahead char. 48 * 49 * fsm[] has three start states: 50 * 51 * PROTO proto (ANSI -> K&R,C++,ANSI) 52 * QUICK standalone ppcpp() 53 * TOKEN tokenizing pplex() 54 * 55 * If the next state remains the same then the fsm[] transition value is 0. 56 * MAX+1 is a power of 2 so that fsm[state][EOF==MAX+1] actually accesses 57 * fsm[state+1][0] which is ~S_EOB for all states. This preserves the 58 * power of 2 fsm[] row size for efficient array indexing. Thanks to 59 * D. G. Korn for the last two observations. The pseudo non-terminal state 60 * fsm[TERMINAL][state+1] is used to differentiate EOB from EOF. 61 * 62 * The bit layout is: 63 * 64 * TERM arg SPLICE next 65 * 15 14-8 7 6-0 66 */ 67 68 /* 69 * NOTE: these must be `control' characters for all native codesets 70 * currently ok for {ascii,ebcdic1,ebcdic2,ebcdic3} 71 */ 72 73 #define C_DEC 001 74 #define C_EOF 002 75 #define C_HEX 003 76 #define C_LET 021 77 #define C_OCT 022 78 #define C_XXX 023 79 80 #define OP (-1) 81 #define END 0 82 #define COPY 1 83 84 #define copy(t,f) (memcpy(&fsm[t][1],&fsm[f][1],(MAX+1)*sizeof(short)),fsm[TERMINAL][(t)+1]=fsm[TERMINAL][(f)+1]) 85 86 struct fsminit /* fsm initialization row */ 87 { 88 int state; /* if in this state */ 89 unsigned char ch[4]; /* and see one of these */ 90 int nextstate; /* enter this state if <TERMINAL*/ 91 }; 92 93 static struct fsminit fsminit[] = 94 { 95 /* proto start state */ 96 { PROTO, { C_XXX }, S_CHR, }, 97 { PROTO, { C_EOF }, S_EOF, }, 98 { PROTO, { C_DEC }, BAD1, }, 99 { PROTO, { '.' }, DOT, }, 100 { PROTO, { C_LET }, NID, }, 101 { PROTO, { 'L' }, LIT, }, 102 { PROTO, { 'd', 'e', 'f', 'i' }, RES1, }, 103 { PROTO, { 'r', 's', 't', 'v' }, RES1, }, 104 { PROTO, { 'w', 'N' }, RES1, }, 105 { PROTO, { '"', '\'' }, S_LITBEG, }, 106 { PROTO, { '/' }, COM1, }, 107 { PROTO, { '\n' }, S_NL, }, 108 { PROTO, { ' ','\t','\f','\v' }, WS1, }, 109 110 /* proto {do,else,extern,for,if,inline,return,static,typedef,va_start,void,while,NoN} */ 111 { RES1, { C_XXX }, S_MACRO, }, 112 { RES1, { C_LET, C_DEC }, NID, }, 113 { RES1, { 'a' }, RES1a, }, 114 { RES1, { 'e' }, RES1e, }, 115 { RES1, { 'f' }, RES1f, }, 116 { RES1, { 'h' }, RES1h, }, 117 { RES1, { 'l' }, RES1l, }, 118 { RES1, { 'n' }, RES1n, }, 119 { RES1, { 'o' }, RES1o, }, 120 { RES1, { 't' }, RES1t, }, 121 { RES1, { 'x' }, RES1x, }, 122 { RES1, { 'y' }, RES1y, }, 123 124 /* proto reserved {va_start} */ 125 { RES1a, { C_XXX }, S_RESERVED, }, 126 { RES1a, { C_LET, C_DEC }, NID, }, 127 { RES1a, { '_','s','t','a' }, RES1a, }, 128 { RES1a, { 'r' }, RES1a, }, 129 130 /* proto reserved {return} */ 131 { RES1e, { C_XXX }, S_RESERVED, }, 132 { RES1e, { C_LET, C_DEC }, NID, }, 133 { RES1e, { 't','u','r','n' }, RES1e, }, 134 135 /* proto reserved {if} */ 136 { RES1f, { C_XXX }, S_RESERVED, }, 137 { RES1f, { C_LET, C_DEC }, NID, }, 138 139 /* proto reserved {while} */ 140 { RES1h, { C_XXX }, S_RESERVED, }, 141 { RES1h, { C_LET, C_DEC }, NID, }, 142 { RES1h, { 'i','l','e' }, RES1h, }, 143 144 /* proto reserved {else} */ 145 { RES1l, { C_XXX }, S_RESERVED, }, 146 { RES1l, { C_LET, C_DEC }, NID, }, 147 { RES1l, { 's','e' }, RES1l, }, 148 149 /* proto reserved {inline} */ 150 { RES1n, { C_XXX }, S_RESERVED, }, 151 { RES1n, { C_LET, C_DEC }, NID, }, 152 { RES1n, { 'l','i','n','e' }, RES1n, }, 153 154 /* proto reserved {do,for,void} */ 155 { RES1o, { C_XXX }, S_RESERVED, }, 156 { RES1o, { C_LET, C_DEC }, NID, }, 157 { RES1o, { 'r','i','d','N' }, RES1o, }, 158 159 /* proto reserved {static} */ 160 { RES1t, { C_XXX }, S_RESERVED, }, 161 { RES1t, { C_LET, C_DEC }, NID, }, 162 { RES1t, { 'a','t','i','c' }, RES1t, }, 163 164 /* proto reserved {extern} */ 165 { RES1x, { C_XXX }, S_RESERVED, }, 166 { RES1x, { C_LET, C_DEC }, NID, }, 167 { RES1x, { 't','e','r','n' }, RES1x, }, 168 169 /* proto reserved {typedef} */ 170 { RES1y, { C_XXX }, S_RESERVED, }, 171 { RES1y, { C_LET, C_DEC }, NID, }, 172 { RES1y, { 'p','e','d','f' }, RES1y, }, 173 174 /* saw /, perhaps start of comment */ 175 { COM1, { C_XXX }, S_CHRB, }, 176 { COM1, { '*' }, COM2, }, 177 #if PROTOMAIN 178 { COM1, { '/' }, COM5, }, 179 #endif 180 181 /* saw / *, start of comment */ 182 { COM2, { C_XXX }, COM2, }, 183 { COM2, { '\n', C_EOF }, S_COMMENT, }, 184 { COM2, { '/' }, COM4, }, 185 { COM2, { '*' }, COM3, }, 186 { COM2, { '#', ';', ')' }, QUAL(COM2), }, 187 188 /* saw the * possibly ending a comment */ 189 { COM3, { C_XXX }, COM2, }, 190 { COM3, { '\n', C_EOF }, S_COMMENT, }, 191 { COM3, { '#', ';', ')' }, QUAL(COM2), }, 192 { COM3, { '*' }, COM3, }, 193 { COM3, { '/' }, S_COMMENT, }, 194 195 /* saw / in / * comment, possible malformed nest */ 196 { COM4, { C_XXX }, COM2, }, 197 { COM4, { '*', '\n', C_EOF }, S_COMMENT, }, 198 { COM4, { '/' }, COM4, }, 199 200 /* saw / /, start of comment */ 201 { COM5, { C_XXX }, COM5, }, 202 { COM5, { '\n', C_EOF }, S_COMMENT, }, 203 { COM5, { '/' }, COM6, }, 204 { COM5, { '*' }, COM7, }, 205 206 /* saw / in / / comment, possible malformed nest */ 207 { COM6, { C_XXX }, COM5, }, 208 { COM6, { '*', '\n', C_EOF }, S_COMMENT, }, 209 { COM6, { '/' }, COM6, }, 210 211 /* saw * in / /, possible malformed nest */ 212 { COM7, { C_XXX }, COM5, }, 213 { COM7, { '\n', C_EOF }, S_COMMENT, }, 214 { COM7, { '*' }, COM7, }, 215 { COM7, { '/' }, S_COMMENT, }, 216 217 /* normal identifier -- always a macro candidate */ 218 { NID, { C_XXX }, S_MACRO, }, 219 { NID, { C_LET, C_DEC }, NID, }, 220 221 /* saw ., operator or dbl constant */ 222 { DOT, { C_XXX }, S_CHRB, }, 223 { DOT, { '.' }, DOT2, }, 224 { DOT, { C_DEC }, BAD1, }, 225 226 /* saw .., possible ... */ 227 { DOT2, { C_XXX }, BACK(T_INVALID), }, 228 { DOT2, { '.' }, KEEP(T_VARIADIC), }, 229 230 /* saw L (possible start of normal wide literal) */ 231 { LIT, { C_XXX }, S_MACRO, }, 232 { LIT, { C_LET, C_DEC }, NID, }, 233 { LIT, { '"', '\'' }, QUAL(LIT1), }, 234 235 /* saw " or ' beginning literal */ 236 { LIT1, { C_XXX }, LIT1, }, 237 { LIT1, { '"', '\'' }, S_LITEND, }, 238 { LIT1, { '\n', C_EOF }, S_LITEND, }, 239 { LIT1, { '\\' }, LIT2, }, 240 241 /* saw \ in literal */ 242 { LIT2, { C_XXX }, S_LITESC, }, 243 { LIT2, { '\n', C_EOF }, S_LITEND, }, 244 245 /* eat malformed numeric constant */ 246 { BAD1, { C_XXX }, BACK(T_INVALID), }, 247 { BAD1, { C_LET, C_DEC, '.' }, BAD1, }, 248 { BAD1, { 'e', 'E' }, BAD2, }, 249 250 /* eat malformed numeric fraction|exponent */ 251 { BAD2, { C_XXX }, BACK(T_INVALID), }, 252 { BAD2, { C_LET, C_DEC, '.' }, BAD1, }, 253 { BAD2, { '+', '-' }, BAD1, }, 254 255 /* saw white space, eat it up */ 256 { WS1, { C_XXX }, S_WS, }, 257 { WS1, { ' ', '\t' }, WS1, }, 258 { WS1, { '\f', '\v' }, S_VS, }, 259 260 #if !PROTOMAIN 261 262 /* quick template */ 263 { QUICK, { C_XXX }, QTOK, }, 264 { QUICK, { C_EOF, MARK }, S_CHRB, }, 265 { QUICK, { C_LET, C_DEC }, QID, }, 266 { QUICK, { 'L' }, LIT0, }, 267 { QUICK, { '"', '\'' }, S_LITBEG, }, 268 { QUICK, { '/' }, S_CHRB, }, 269 { QUICK, { '*' }, QCOM, }, 270 { QUICK, { '#' }, SHARP1, }, 271 { QUICK, { '\n' }, S_NL, }, 272 { QUICK, { '\f', '\v' }, S_VS, }, 273 274 /* copy QUICK to QUICK+1 through MAC0+1 */ 275 { OP, {QUICK,QUICK+1,MAC0+1}, COPY, }, 276 277 /* quick start state */ 278 { QUICK, { C_EOF }, S_EOF, }, 279 { QUICK, { C_DEC }, QNUM, }, 280 { QUICK, { MARK }, QTOK, }, 281 { QUICK, { '/' }, COM1, }, 282 { QUICK, { ' ', '\t' }, QUICK, }, 283 284 /* grab non-macro tokens */ 285 { QTOK, { C_DEC }, QNUM, }, 286 287 /* grab numeric and invalid tokens */ 288 { QNUM, { C_LET, C_DEC, '.' }, QNUM, }, 289 { QNUM, { 'e', 'E' }, QEXP, }, 290 291 /* grab exponent token */ 292 { QEXP, { C_LET, C_DEC, '.' }, QNUM, }, 293 { QEXP, { '+', '-' }, QNUM, }, 294 295 /* saw *, grab possible bad comment terminator */ 296 { QCOM, { C_DEC }, QNUM, }, 297 { QCOM, { '/' }, S_COMMENT, }, 298 299 /* saw L (possible start of wide string or first macro char) */ 300 { MAC0, { 'L' }, QID, }, 301 { MAC0, { '"', '\'' }, QUAL(LIT1), }, 302 303 /* macro candidate template */ 304 { MAC0+1, { 'L' }, QID, }, 305 306 /* copy MAC0+1 to MAC0+2 through MACN */ 307 { OP, {MAC0+1,MAC0+2,MACN}, COPY }, 308 309 /* saw L (possible start of wide string or macro L) */ 310 { HIT0, { C_XXX }, S_MACRO, }, 311 { HIT0, { C_LET, C_DEC }, QID, }, 312 { HIT0, { '"', '\'' }, QUAL(LIT1), }, 313 314 /* macro hit template */ 315 { HIT0+1, { C_XXX }, S_MACRO, }, 316 { HIT0+1, { C_LET, C_DEC }, QID, }, 317 318 /* copy HIT0+1 to HIT0+2 through HITN */ 319 { OP, {HIT0+1,HIT0+2,HITN}, COPY }, 320 321 /* saw L (possible start of wide literal) */ 322 { LIT0, { C_XXX }, S_MACRO, }, 323 { LIT0, { C_LET, C_DEC }, QID, }, 324 { LIT0, { '"', '\'' }, QUAL(LIT1), }, 325 326 /* (!PROTOMAIN COM1) saw /, perhaps start of comment or /= */ 327 { COM1, { '=' }, KEEP(T_DIVEQ), }, 328 329 /* normal start state */ 330 { TOKEN, { C_XXX }, S_HUH, }, 331 { TOKEN, { C_EOF }, S_EOF, }, 332 { TOKEN, { C_DEC }, DEC1, }, 333 { TOKEN, { '0' }, OCT1, }, 334 { TOKEN, { '.' }, DOT1, }, 335 { TOKEN, { C_LET }, NID, }, 336 { TOKEN, { 'L' }, LIT, }, 337 { TOKEN, { '"', '\'', '<' }, S_LITBEG, }, 338 { TOKEN, { '/' }, COM1, }, 339 { TOKEN, { '\n' }, S_NL, }, 340 { TOKEN, { ' ', '\t' }, WS1, }, 341 { TOKEN, { '\f', '\v' }, S_VS, }, 342 { TOKEN, { '#' }, SHARP1, }, 343 { TOKEN, { ':' }, COLON1, }, 344 { TOKEN, { '%' }, PCT1, }, 345 { TOKEN, { '&' }, AND1, }, 346 { TOKEN, { '*' }, STAR1, }, 347 { TOKEN, { '+' }, PLUS1, }, 348 { TOKEN, { '-' }, MINUS1, }, 349 { TOKEN, { '=' }, EQ1, }, 350 { TOKEN, { '!' }, NOT1, }, 351 { TOKEN, { '>' }, GT1, }, 352 { TOKEN, { '^' }, CIRC1, }, 353 { TOKEN, { '|' }, OR1, }, 354 { TOKEN, { '(', ')', '[', ']' }, S_CHR, }, 355 { TOKEN, { '{', '}', ',', ';' }, S_CHR, }, 356 { TOKEN, { '~', '?' }, S_CHR, }, 357 358 /* saw 0, possible oct|hex|dec|dbl constant */ 359 { OCT1, { C_XXX }, BACK(T_DECIMAL), }, 360 { OCT1, { C_LET, C_DEC }, BAD1, }, 361 { OCT1, { C_OCT }, OCT2, }, 362 { OCT1, { 'e', 'E' }, DBL2, }, 363 { OCT1, { 'l', 'L', 'u', 'U' }, QUAL(DEC2), }, 364 { OCT1, { 'x', 'X' }, HEX1, }, 365 { OCT1, { '.' }, DBL1, }, 366 367 /* saw 0<oct>, oct constant */ 368 { OCT2, { C_XXX }, BACK(T_OCTAL), }, 369 { OCT2, { C_LET, C_DEC }, BAD1, }, 370 { OCT2, { C_OCT }, OCT2, }, 371 { OCT2, { 'e', 'E' }, DBL2, }, 372 { OCT2, { 'l', 'L', 'u', 'U' }, QUAL(OCT3), }, 373 { OCT2, { '.' }, DBL1, }, 374 375 /* oct constant qualifier */ 376 { OCT3, { C_XXX }, BACK(T_OCTAL), }, 377 { OCT3, { C_LET, C_DEC, '.' }, BAD1, }, 378 { OCT3, { 'l', 'L', 'u', 'U' }, QUAL(OCT3), }, 379 380 /* saw 0 [xX], hex constant */ 381 { HEX1, { C_XXX }, BACK(T_HEXADECIMAL), }, 382 { HEX1, { C_LET }, BAD1, }, 383 { HEX1, { C_HEX }, HEX1, }, 384 { HEX1, { 'e', 'E' }, HEX3, }, 385 { HEX1, { 'l', 'L', 'u', 'U' }, QUAL(HEX2), }, 386 { HEX1, { '.' }, HEX4, }, 387 { HEX1, { 'p', 'P' }, HEX5, }, 388 389 /* hex constant qualifier */ 390 { HEX2, { C_XXX }, BACK(T_HEXADECIMAL), }, 391 { HEX2, { C_LET, C_DEC, '.' }, BAD1, }, 392 { HEX2, { 'l', 'L', 'u', 'U' }, QUAL(HEX2), }, 393 394 /* hex [eE][-+] botch */ 395 { HEX3, { C_XXX }, BACK(T_HEXADECIMAL), }, 396 { HEX3, { C_LET, '.', '-', '+'},BAD1, }, 397 { HEX3, { C_HEX }, HEX1, }, 398 { HEX3, { 'e', 'E' }, HEX3, }, 399 { HEX3, { 'l', 'L', 'u', 'U' }, QUAL(HEX2), }, 400 401 /* hex dbl fraction */ 402 { HEX4, { C_XXX }, BACK(T_HEXDOUBLE), }, 403 { HEX4, { C_LET, '.' }, BAD1, }, 404 { HEX4, { C_HEX }, HEX4, }, 405 { HEX4, { 'p', 'P' }, HEX5, }, 406 { HEX4, { 'f', 'F', 'l', 'L' }, QUAL(HEX8), }, 407 408 /* optional hex dbl exponent sign */ 409 { HEX5, { C_XXX }, BACK(T_INVALID), }, 410 { HEX5, { C_LET, '.' }, BAD1, }, 411 { HEX5, { '+', '-' }, HEX6, }, 412 { HEX5, { C_DEC }, HEX7, }, 413 414 /* mandatory hex dbl exponent first digit */ 415 { HEX6, { C_XXX }, BACK(T_INVALID), }, 416 { HEX6, { C_LET, '.' }, BAD1, }, 417 { HEX6, { C_DEC }, HEX7, }, 418 419 /* hex dbl exponent digits */ 420 { HEX7, { C_XXX }, BACK(T_HEXDOUBLE), }, 421 { HEX7, { C_LET, '.' }, BAD1, }, 422 { HEX7, { C_DEC }, HEX7, }, 423 { HEX7, { 'f', 'F', 'l', 'L' }, QUAL(HEX8), }, 424 425 /* hex dbl constant qualifier */ 426 { HEX8, { C_XXX }, BACK(T_HEXDOUBLE), }, 427 { HEX8, { C_LET, '.' }, BAD1, }, 428 { HEX8, { 'f', 'F', 'l', 'L' }, QUAL(HEX8), }, 429 430 /* saw <dec>, dec constant */ 431 { DEC1, { C_XXX }, BACK(T_DECIMAL), }, 432 { DEC1, { C_LET }, BAD1, }, 433 { DEC1, { C_DEC }, DEC1, }, 434 { DEC1, { 'e', 'E' }, DBL2, }, 435 { DEC1, { 'l', 'L', 'u', 'U' }, QUAL(DEC2), }, 436 { DEC1, { '.' }, DBL1, }, 437 438 /* dec constant qualifier */ 439 { DEC2, { C_XXX }, BACK(T_DECIMAL), }, 440 { DEC2, { C_LET, C_DEC }, BAD1, }, 441 { DEC2, { 'l', 'L', 'u', 'U' }, QUAL(DEC2), }, 442 443 /* saw ., operator or dbl constant */ 444 { DOT1, { C_XXX }, S_CHRB, }, 445 { DOT1, { '.' }, DOT2, }, 446 { DOT1, { C_DEC }, DBL1, }, 447 448 /* dbl fraction */ 449 { DBL1, { C_XXX }, BACK(T_DOUBLE), }, 450 { DBL1, { C_LET, '.' }, BAD1, }, 451 { DBL1, { C_DEC }, DBL1, }, 452 { DBL1, { 'e', 'E' }, DBL2, }, 453 { DBL1, { 'f', 'F', 'l', 'L' }, QUAL(DBL5), }, 454 455 /* optional dbl exponent sign */ 456 { DBL2, { C_XXX }, BACK(T_INVALID), }, 457 { DBL2, { C_LET, '.' }, BAD1, }, 458 { DBL2, { '+', '-' }, DBL3, }, 459 { DBL2, { C_DEC }, DBL4, }, 460 461 /* mandatory dbl exponent first digit */ 462 { DBL3, { C_XXX }, BACK(T_INVALID), }, 463 { DBL3, { C_LET, '.' }, BAD1, }, 464 { DBL3, { C_DEC }, DBL4, }, 465 466 /* dbl exponent digits */ 467 { DBL4, { C_XXX }, BACK(T_DOUBLE), }, 468 { DBL4, { C_LET, '.' }, BAD1, }, 469 { DBL4, { C_DEC }, DBL4, }, 470 { DBL4, { 'f', 'F', 'l', 'L' }, QUAL(DBL5), }, 471 472 /* dbl constant qualifier */ 473 { DBL5, { C_XXX }, BACK(T_DOUBLE), }, 474 { DBL5, { C_LET, '.' }, BAD1, }, 475 { DBL5, { 'f', 'F', 'l', 'L' }, QUAL(DBL5), }, 476 477 /* saw < starting include header */ 478 { HDR1, { C_XXX }, HDR1, }, 479 { HDR1, { '>', '\n', C_EOF }, S_LITEND, }, 480 481 /* saw <binop><space> expecting = */ 482 { BIN1, { C_XXX }, S_HUH, }, 483 { BIN1, { ' ', '\t' }, BIN1, }, 484 485 /* 2-char ops */ 486 487 { SHARP1, { C_XXX }, S_SHARP, }, 488 489 { PCT1, { C_XXX }, S_CHRB, }, 490 { PCT1, { '=' }, KEEP(T_MODEQ), }, 491 492 { AND1, { C_XXX }, S_CHRB, }, 493 { AND1, { '=' }, KEEP(T_ANDEQ), }, 494 { AND1, { '&' }, KEEP(T_ANDAND), }, 495 496 { STAR1, { C_XXX }, S_CHRB, }, 497 { STAR1, { '=' }, KEEP(T_MPYEQ), }, 498 { STAR1, { '/' }, S_COMMENT, }, 499 500 { PLUS1, { C_XXX }, S_CHRB, }, 501 { PLUS1, { '=' }, KEEP(T_ADDEQ), }, 502 { PLUS1, { '+' }, KEEP(T_ADDADD), }, 503 504 { MINUS1, { C_XXX }, S_CHRB, }, 505 { MINUS1, { '=' }, KEEP(T_SUBEQ), }, 506 { MINUS1, { '-' }, KEEP(T_SUBSUB), }, 507 { MINUS1, { '>' }, KEEP(T_PTRMEM), }, 508 509 { COLON1, { C_XXX }, S_CHRB, }, 510 { COLON1, { '=', '>' }, S_HUH, }, 511 512 { LT1, { C_XXX }, S_CHRB, }, 513 { LT1, { '=' }, KEEP(T_LE), }, 514 { LT1, { '<' }, LSH1, }, 515 516 { EQ1, { C_XXX }, S_CHRB, }, 517 { EQ1, { '=' }, KEEP(T_EQ), }, 518 519 { NOT1, { C_XXX }, S_CHRB, }, 520 { NOT1, { '=' }, KEEP(T_NE), }, 521 522 { GT1, { C_XXX }, S_CHRB, }, 523 { GT1, { '=' }, KEEP(T_GE), }, 524 { GT1, { '>' }, RSH1, }, 525 526 { CIRC1, { C_XXX }, S_CHRB, }, 527 { CIRC1, { '=' }, KEEP(T_XOREQ), }, 528 529 { OR1, { C_XXX }, S_CHRB, }, 530 { OR1, { '=' }, KEEP(T_OREQ), }, 531 { OR1, { '|' }, KEEP(T_OROR), }, 532 533 /* 3-char ops */ 534 535 { ARROW1, { C_XXX }, BACK(T_PTRMEM), }, 536 { ARROW1, { '*' }, KEEP(T_PTRMEMREF), }, 537 538 { LSH1, { C_XXX }, BACK(T_LSHIFT), }, 539 { LSH1, { '=' }, KEEP(T_LSHIFTEQ), }, 540 541 { RSH1, { C_XXX }, BACK(T_RSHIFT), }, 542 { RSH1, { '=' }, KEEP(T_RSHIFTEQ), }, 543 544 #endif 545 546 /* end */ 547 { OP, { 0 }, END, } 548 }; 549 550 short fsm[TERMINAL+1][MAX+1]; 551 552 char trigraph[MAX+1]; 553 554 #if PROTOMAIN 555 static char spl[] = { '\\', '\r', 0 }; 556 static char aln[] = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz_$@"; 557 #else 558 static char spl[] = { MARK, '?', '\\', '\r', CC_sub, 0 }; 559 static char aln[] = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz_"; 560 #endif 561 static char* let = &aln[10]; 562 static char hex[] = "fedcbaFEDCBA9876543210"; 563 static char* dec = &hex[12]; 564 static char* oct = &hex[14]; 565 566 /* 567 * runtime FSM modifications 568 * ppfsm(FSM_INIT,0) must be called first 569 */ 570 571 void 572 ppfsm(int op, register char* s) 573 { 574 register int c; 575 register int n; 576 register int i; 577 register short* rp; 578 register struct fsminit* fp; 579 #if !PROTOMAIN 580 char* t; 581 int x; 582 #endif 583 584 switch (op) 585 { 586 587 #if !PROTOMAIN 588 589 case FSM_IDADD: 590 while (c = *s++) 591 if (!ppisid(c)) 592 { 593 if (fsm[TOKEN][c] == ~S_HUH) 594 { 595 setid(c); 596 for (i = 0; i < TERMINAL; i++) 597 fsm[i][c] = IDSTATE(fsm[i]['_']); 598 } 599 else error(2, "%c: cannot add to identifier set", c); 600 } 601 break; 602 603 case FSM_IDDEL: 604 while (c = *s++) 605 if (ppisid(c)) 606 { 607 clrid(c); 608 for (i = 0; i < TERMINAL; i++) 609 fsm[i][c] = ~S_HUH; 610 } 611 break; 612 613 #endif 614 615 case FSM_INIT: 616 for (fp = fsminit;; fp++) 617 { 618 if ((n = fp->nextstate) >= TERMINAL) n = ~n; 619 if (fp->state == OP) 620 { 621 #if !PROTOMAIN 622 switch (n) 623 { 624 case COPY: 625 c = fp->ch[0]; 626 n = fp->ch[2]; 627 for (i = fp->ch[1]; i <= n; i++) 628 copy(i, c); 629 continue; 630 default: 631 break; 632 } 633 #endif 634 break; 635 } 636 rp = fsm[fp->state]; 637 for (i = 0; i < sizeof(fp->ch) && (c = fp->ch[i]); i++) 638 { 639 switch (c) 640 { 641 case C_XXX: 642 for (c = 0; c <= MAX; c++) 643 rp[c] = n; 644 /*FALLTHROUGH*/ 645 646 case C_EOF: 647 fsm[TERMINAL][fp->state+1] = n < 0 ? ~n : n; 648 continue; 649 650 case C_LET: 651 s = let; 652 break; 653 654 case C_HEX: 655 s = hex; 656 break; 657 658 case C_DEC: 659 s = dec; 660 break; 661 662 case C_OCT: 663 s = oct; 664 break; 665 666 default: 667 rp[c] = n; 668 continue; 669 } 670 while (c = *s++) 671 rp[c] = n; 672 } 673 } 674 675 /* 676 * install splice special cases 677 * and same non-terminal transitions 678 */ 679 680 for (i = 0; i < TERMINAL; i++) 681 { 682 rp = fsm[i]; 683 s = spl; 684 while (c = *s++) 685 if (c != MARK || !INCOMMENT(rp)) 686 { 687 if (rp[c] >= 0) rp[c] = ~rp[c]; 688 rp[c] &= ~SPLICE; 689 } 690 rp[EOB] = ~S_EOB; 691 for (c = 0; c <= MAX; c++) 692 if (rp[c] == i) 693 rp[c] = 0; 694 } 695 fsm[TERMINAL][0] = ~S_EOB; 696 697 #if !PROTOMAIN 698 699 /* 700 * default character types 701 */ 702 703 s = let; 704 while (c = *s++) 705 setid(c); 706 s = dec; 707 while (c = *s++) 708 setdig(c); 709 s = spl; 710 do setsplice(c = *s++); while (c); 711 712 /* 713 * trigraph map 714 */ 715 716 trigraph['='] = '#'; 717 trigraph['('] = '['; 718 trigraph['/'] = '\\'; 719 trigraph[')'] = ']'; 720 trigraph['\''] = '^'; 721 trigraph['<'] = '{'; 722 trigraph['!'] = '|'; 723 trigraph['>'] = '}'; 724 trigraph['-'] = '~'; 725 #endif 726 break; 727 728 #if !PROTOMAIN 729 730 case FSM_PLUSPLUS: 731 if (pp.option & PLUSPLUS) 732 { 733 fsm[COLON1][':'] = ~KEEP(T_SCOPE); 734 fsm[DOT1]['*'] = ~KEEP(T_DOTREF); 735 fsm[MINUS1]['>'] = ARROW1; 736 fsm[COM1]['/'] = COM5; 737 t = "%<:"; 738 for (i = 0; i < TERMINAL; i++) 739 { 740 rp = fsm[i]; 741 if (!INCOMMENT(rp) && !INQUOTE(rp)) 742 { 743 s = t; 744 while (c = *s++) 745 { 746 if (rp[c] > 0) rp[c] = ~rp[c]; 747 else if (!rp[c]) rp[c] = ~i; 748 rp[c] &= ~SPLICE; 749 } 750 } 751 } 752 s = t; 753 while (c = *s++) setsplice(c); 754 } 755 else 756 { 757 fsm[COLON1][':'] = ~S_CHRB; 758 fsm[DOT1]['*'] = ~S_CHRB; 759 fsm[MINUS1]['>'] = ~KEEP(T_PTRMEM); 760 fsm[COM1]['/'] = (pp.option & PLUSCOMMENT) ? COM5 : ~S_CHRB; 761 } 762 break; 763 764 #if COMPATIBLE 765 766 case FSM_COMPATIBILITY: 767 if (pp.state & COMPATIBILITY) 768 { 769 fsm[HEX1]['e'] = HEX1; 770 fsm[HEX1]['E'] = HEX1; 771 fsm[QNUM]['e'] = QNUM; 772 fsm[QNUM]['E'] = QNUM; 773 fsm[QNUM]['u'] = ~QUAL(QNUM); 774 fsm[QNUM]['U'] = ~QUAL(QNUM); 775 } 776 else 777 { 778 fsm[HEX1]['e'] = HEX3; 779 fsm[HEX1]['E'] = HEX3; 780 fsm[QNUM]['e'] = QEXP; 781 fsm[QNUM]['E'] = QEXP; 782 fsm[QNUM]['u'] = QNUM; 783 fsm[QNUM]['U'] = QNUM; 784 } 785 break; 786 787 #endif 788 789 case FSM_QUOTADD: 790 while (c = *s++) 791 if (fsm[TOKEN][c] == ~S_HUH) 792 for (i = 0; i < TERMINAL; i++) 793 fsm[i][c] = fsm[i]['"']; 794 else error(2, "%c: cannot add to quote set", c); 795 break; 796 797 case FSM_QUOTDEL: 798 while (c = *s++) 799 if (c != '"' && fsm[TOKEN][c] == fsm[TOKEN]['"']) 800 for (i = 0; i < TERMINAL; i++) 801 fsm[i][c] = fsm[i]['_']; 802 break; 803 804 case FSM_OPSPACE: 805 n = s ? BIN1 : ~S_CHRB; 806 fsm[COM1][' '] = fsm[COM1]['\t'] = n; 807 fsm[AND1][' '] = fsm[AND1]['\t'] = n; 808 fsm[STAR1][' '] = fsm[STAR1]['\t'] = n; 809 fsm[PCT1][' '] = fsm[PCT1]['\t'] = n; 810 fsm[PLUS1][' '] = fsm[PLUS1]['\t'] = n; 811 fsm[MINUS1][' '] = fsm[MINUS1]['\t'] = n; 812 fsm[CIRC1][' '] = fsm[CIRC1]['\t'] = n; 813 fsm[OR1][' '] = fsm[OR1]['\t'] = n; 814 fsm[LSH1][' '] = fsm[LSH1]['\t'] = s ? BIN1 : ~BACK(T_LSHIFT); 815 fsm[RSH1][' '] = fsm[RSH1]['\t'] = s ? BIN1 : ~BACK(T_RSHIFT); 816 break; 817 818 case FSM_MACRO: 819 if (pp.truncate && strlen(s) >= pp.truncate) 820 { 821 x = s[pp.truncate]; 822 s[pp.truncate] = 0; 823 } 824 else x = -1; 825 i = MAC0 + ((c = *s++) != 'L'); 826 if ((n = fsm[QUICK][c]) != (i + NMAC)) 827 { 828 n = i; 829 if (!*s) n += NMAC; 830 } 831 if (fsm[QUICK][c] != n) 832 fsm[QUICK][c] = fsm[QCOM][c] = fsm[QTOK][c] = n; 833 if (c = *s++) 834 { 835 for (;;) 836 { 837 if ((i = n) < HIT0) 838 { 839 if (n < MACN) n++; 840 if (!*s) 841 { 842 n += NMAC; 843 break; 844 } 845 if (fsm[i][c] < HIT0) 846 fsm[i][c] = n; 847 if (fsm[i + NMAC][c] < HIT0) 848 fsm[i + NMAC][c] = n; 849 } 850 else 851 { 852 if (n < HITN) n++; 853 if (!*s) break; 854 if (fsm[i][c] < HIT0) 855 { 856 n -= NMAC; 857 fsm[i][c] = n; 858 } 859 } 860 c = *s++; 861 } 862 if (x >= 0) 863 { 864 *s = x; 865 for (n = CHAR_MIN; n <= CHAR_MAX; n++) 866 if (ppisidig(n)) 867 fsm[HITN][n] = HITN; 868 n = HITN; 869 } 870 if (fsm[i][c] < n) 871 fsm[i][c] = n; 872 if (i < HIT0 && fsm[i + NMAC][c] < n) 873 fsm[i + NMAC][c] = n; 874 } 875 break; 876 877 #endif 878 879 } 880 } 881 882 #if !PROTOMAIN 883 884 /* 885 * file buffer refill 886 * c is current input char 887 */ 888 889 void 890 refill(register int c) 891 { 892 if (pp.in->flags & IN_eof) 893 { 894 pp.in->nextchr--; 895 c = 0; 896 } 897 else 898 { 899 *((pp.in->nextchr = pp.in->buffer + PPBAKSIZ) - 1) = c; 900 c = 901 #if PROTOTYPE 902 (pp.in->flags & IN_prototype) ? pppread(pp.in->nextchr) : 903 #endif 904 read(pp.in->fd, pp.in->nextchr, PPBUFSIZ); 905 } 906 if (c > 0) 907 { 908 if (pp.in->nextchr[c - 1] == '\n') pp.in->flags |= IN_newline; 909 else pp.in->flags &= ~IN_newline; 910 #if PROTOTYPE 911 if (!(pp.in->flags & IN_prototype)) 912 #endif 913 if (c < PPBUFSIZ && (pp.in->flags & IN_regular)) 914 { 915 pp.in->flags |= IN_eof; 916 close(pp.in->fd); 917 pp.in->fd = -1; 918 } 919 } 920 else 921 { 922 if (c < 0) 923 { 924 error(ERROR_SYSTEM|3, "read error"); 925 c = 0; 926 } 927 else if ((pp.in->flags ^ pp.in->prev->flags) & IN_c) 928 { 929 static char ket[] = { 0, '}', '\n', 0 }; 930 931 pp.in->flags ^= IN_c; 932 pp.in->nextchr = ket + 1; 933 c = 2; 934 } 935 pp.in->flags |= IN_eof; 936 } 937 #if CHECKPOINT 938 pp.in->buflen = c; 939 #endif 940 pp.in->nextchr[c] = 0; 941 debug((-7, "refill(\"%s\") = %d = \"%-.*s%s\"", error_info.file, c, (c > 32 ? 32 : c), pp.in->nextchr, c > 32 ? "..." : "")); 942 if (pp.test & 0x0080) 943 sfprintf(sfstderr, "===== refill(\"%s\") = %d =====\n%s\n===== eob(\"%s\") =====\n", error_info.file, c, pp.in->nextchr, error_info.file); 944 } 945 946 #endif 947