1 /***********************************************************************
2 * *
3 * This software is part of the ast package *
4 * Copyright (c) 1986-2011 AT&T Intellectual Property *
5 * and is licensed under the *
6 * Eclipse Public License, Version 1.0 *
7 * by AT&T Intellectual Property *
8 * *
9 * A copy of the License is available at *
10 * http://www.eclipse.org/org/documents/epl-v10.html *
11 * (with md5 checksum b35adb5213ca9657e911e9befb180842) *
12 * *
13 * Information and Software Systems Research *
14 * AT&T Research *
15 * Florham Park NJ *
16 * *
17 * Glenn Fowler <gsf@research.att.com> *
18 * *
19 ***********************************************************************/
20 #pragma prototyped
21 /*
22 * Glenn Fowler
23 * AT&T Research
24 *
25 * preprocessor and proto lexical analyzer fsm
26 * define PROTOMAIN for standalone proto
27 */
28
29 #include "pplib.h"
30 #include "ppfsm.h"
31
32 /*
33 * lexical FSM encoding
34 * derived from a standalone ansi cpp by Dennis Ritchie
35 * modified for libpp by Glenn Fowler
36 *
37 * fsm[] is initialized from fsminit[]. The encoding is blown out into
38 * fsm[] for time efficiency. When in state state, and one of the
39 * characters in ch arrives, enter nextstate. States >= TERMINAL are
40 * either final, or at least require special action. In fsminit[] there
41 * is a line for each <state,charset,nextstate>. Early entries are
42 * overwritten by later ones. C_XXX is the universal set and should
43 * always be first. Some of the fsminit[] entries are templates for
44 * groups of states. The OP entries trigger the state copies. States
45 * above TERMINAL are represented in fsm[] as negative values. S_TOK and
46 * S_TOKB encode the resulting token type in the upper bits. These actions
47 * differ in that S_TOKB has a lookahead char.
48 *
49 * fsm[] has three start states:
50 *
51 * PROTO proto (ANSI -> K&R,C++,ANSI)
52 * QUICK standalone ppcpp()
53 * TOKEN tokenizing pplex()
54 *
55 * If the next state remains the same then the fsm[] transition value is 0.
56 * MAX+1 is a power of 2 so that fsm[state][EOF==MAX+1] actually accesses
57 * fsm[state+1][0] which is ~S_EOB for all states. This preserves the
58 * power of 2 fsm[] row size for efficient array indexing. Thanks to
59 * D. G. Korn for the last two observations. The pseudo non-terminal state
60 * fsm[TERMINAL][state+1] is used to differentiate EOB from EOF.
61 *
62 * The bit layout is:
63 *
64 * TERM arg SPLICE next
65 * 15 14-8 7 6-0
66 */
67
68 /*
69 * NOTE: these must be `control' characters for all native codesets
70 * currently ok for {ascii,ebcdic1,ebcdic2,ebcdic3}
71 */
72
73 #define C_DEC 001
74 #define C_EOF 002
75 #define C_HEX 003
76 #define C_LET 021
77 #define C_OCT 022
78 #define C_XXX 023
79
80 #define OP (-1)
81 #define END 0
82 #define COPY 1
83
84 #define copy(t,f) (memcpy(&fsm[t][1],&fsm[f][1],(MAX+1)*sizeof(short)),fsm[TERMINAL][(t)+1]=fsm[TERMINAL][(f)+1])
85
86 struct fsminit /* fsm initialization row */
87 {
88 int state; /* if in this state */
89 unsigned char ch[4]; /* and see one of these */
90 int nextstate; /* enter this state if <TERMINAL*/
91 };
92
93 static struct fsminit fsminit[] =
94 {
95 /* proto start state */
96 { PROTO, { C_XXX }, S_CHR, },
97 { PROTO, { C_EOF }, S_EOF, },
98 { PROTO, { C_DEC }, BAD1, },
99 { PROTO, { '.' }, DOT, },
100 { PROTO, { C_LET }, NID, },
101 { PROTO, { 'L' }, LIT, },
102 { PROTO, { 'd', 'e', 'f', 'i' }, RES1, },
103 { PROTO, { 'r', 's', 't', 'v' }, RES1, },
104 { PROTO, { 'w', 'N' }, RES1, },
105 { PROTO, { '"', '\'' }, S_LITBEG, },
106 { PROTO, { '/' }, COM1, },
107 { PROTO, { '\n' }, S_NL, },
108 { PROTO, { ' ','\t','\f','\v' }, WS1, },
109
110 /* proto {do,else,extern,for,if,inline,return,static,typedef,va_start,void,while,NoN} */
111 { RES1, { C_XXX }, S_MACRO, },
112 { RES1, { C_LET, C_DEC }, NID, },
113 { RES1, { 'a' }, RES1a, },
114 { RES1, { 'e' }, RES1e, },
115 { RES1, { 'f' }, RES1f, },
116 { RES1, { 'h' }, RES1h, },
117 { RES1, { 'l' }, RES1l, },
118 { RES1, { 'n' }, RES1n, },
119 { RES1, { 'o' }, RES1o, },
120 { RES1, { 't' }, RES1t, },
121 { RES1, { 'x' }, RES1x, },
122 { RES1, { 'y' }, RES1y, },
123
124 /* proto reserved {va_start} */
125 { RES1a, { C_XXX }, S_RESERVED, },
126 { RES1a, { C_LET, C_DEC }, NID, },
127 { RES1a, { '_','s','t','a' }, RES1a, },
128 { RES1a, { 'r' }, RES1a, },
129
130 /* proto reserved {return} */
131 { RES1e, { C_XXX }, S_RESERVED, },
132 { RES1e, { C_LET, C_DEC }, NID, },
133 { RES1e, { 't','u','r','n' }, RES1e, },
134
135 /* proto reserved {if} */
136 { RES1f, { C_XXX }, S_RESERVED, },
137 { RES1f, { C_LET, C_DEC }, NID, },
138
139 /* proto reserved {while} */
140 { RES1h, { C_XXX }, S_RESERVED, },
141 { RES1h, { C_LET, C_DEC }, NID, },
142 { RES1h, { 'i','l','e' }, RES1h, },
143
144 /* proto reserved {else} */
145 { RES1l, { C_XXX }, S_RESERVED, },
146 { RES1l, { C_LET, C_DEC }, NID, },
147 { RES1l, { 's','e' }, RES1l, },
148
149 /* proto reserved {inline} */
150 { RES1n, { C_XXX }, S_RESERVED, },
151 { RES1n, { C_LET, C_DEC }, NID, },
152 { RES1n, { 'l','i','n','e' }, RES1n, },
153
154 /* proto reserved {do,for,void} */
155 { RES1o, { C_XXX }, S_RESERVED, },
156 { RES1o, { C_LET, C_DEC }, NID, },
157 { RES1o, { 'r','i','d','N' }, RES1o, },
158
159 /* proto reserved {static} */
160 { RES1t, { C_XXX }, S_RESERVED, },
161 { RES1t, { C_LET, C_DEC }, NID, },
162 { RES1t, { 'a','t','i','c' }, RES1t, },
163
164 /* proto reserved {extern} */
165 { RES1x, { C_XXX }, S_RESERVED, },
166 { RES1x, { C_LET, C_DEC }, NID, },
167 { RES1x, { 't','e','r','n' }, RES1x, },
168
169 /* proto reserved {typedef} */
170 { RES1y, { C_XXX }, S_RESERVED, },
171 { RES1y, { C_LET, C_DEC }, NID, },
172 { RES1y, { 'p','e','d','f' }, RES1y, },
173
174 /* saw /, perhaps start of comment */
175 { COM1, { C_XXX }, S_CHRB, },
176 { COM1, { '*' }, COM2, },
177 #if PROTOMAIN
178 { COM1, { '/' }, COM5, },
179 #endif
180
181 /* saw / *, start of comment */
182 { COM2, { C_XXX }, COM2, },
183 { COM2, { '\n', C_EOF }, S_COMMENT, },
184 { COM2, { '/' }, COM4, },
185 { COM2, { '*' }, COM3, },
186 { COM2, { '#', ';', ')' }, QUAL(COM2), },
187
188 /* saw the * possibly ending a comment */
189 { COM3, { C_XXX }, COM2, },
190 { COM3, { '\n', C_EOF }, S_COMMENT, },
191 { COM3, { '#', ';', ')' }, QUAL(COM2), },
192 { COM3, { '*' }, COM3, },
193 { COM3, { '/' }, S_COMMENT, },
194
195 /* saw / in / * comment, possible malformed nest */
196 { COM4, { C_XXX }, COM2, },
197 { COM4, { '*', '\n', C_EOF }, S_COMMENT, },
198 { COM4, { '/' }, COM4, },
199
200 /* saw / /, start of comment */
201 { COM5, { C_XXX }, COM5, },
202 { COM5, { '\n', C_EOF }, S_COMMENT, },
203 { COM5, { '/' }, COM6, },
204 { COM5, { '*' }, COM7, },
205
206 /* saw / in / / comment, possible malformed nest */
207 { COM6, { C_XXX }, COM5, },
208 { COM6, { '*', '\n', C_EOF }, S_COMMENT, },
209 { COM6, { '/' }, COM6, },
210
211 /* saw * in / /, possible malformed nest */
212 { COM7, { C_XXX }, COM5, },
213 { COM7, { '\n', C_EOF }, S_COMMENT, },
214 { COM7, { '*' }, COM7, },
215 { COM7, { '/' }, S_COMMENT, },
216
217 /* normal identifier -- always a macro candidate */
218 { NID, { C_XXX }, S_MACRO, },
219 { NID, { C_LET, C_DEC }, NID, },
220
221 /* saw ., operator or dbl constant */
222 { DOT, { C_XXX }, S_CHRB, },
223 { DOT, { '.' }, DOT2, },
224 { DOT, { C_DEC }, BAD1, },
225
226 /* saw .., possible ... */
227 { DOT2, { C_XXX }, BACK(T_INVALID), },
228 { DOT2, { '.' }, KEEP(T_VARIADIC), },
229
230 /* saw L (possible start of normal wide literal) */
231 { LIT, { C_XXX }, S_MACRO, },
232 { LIT, { C_LET, C_DEC }, NID, },
233 { LIT, { '"', '\'' }, QUAL(LIT1), },
234
235 /* saw " or ' beginning literal */
236 { LIT1, { C_XXX }, LIT1, },
237 { LIT1, { '"', '\'' }, S_LITEND, },
238 { LIT1, { '\n', C_EOF }, S_LITEND, },
239 { LIT1, { '\\' }, LIT2, },
240
241 /* saw \ in literal */
242 { LIT2, { C_XXX }, S_LITESC, },
243 { LIT2, { '\n', C_EOF }, S_LITEND, },
244
245 /* eat malformed numeric constant */
246 { BAD1, { C_XXX }, BACK(T_INVALID), },
247 { BAD1, { C_LET, C_DEC, '.' }, BAD1, },
248 { BAD1, { 'e', 'E' }, BAD2, },
249
250 /* eat malformed numeric fraction|exponent */
251 { BAD2, { C_XXX }, BACK(T_INVALID), },
252 { BAD2, { C_LET, C_DEC, '.' }, BAD1, },
253 { BAD2, { '+', '-' }, BAD1, },
254
255 /* saw white space, eat it up */
256 { WS1, { C_XXX }, S_WS, },
257 { WS1, { ' ', '\t' }, WS1, },
258 { WS1, { '\f', '\v' }, S_VS, },
259
260 #if !PROTOMAIN
261
262 /* quick template */
263 { QUICK, { C_XXX }, QTOK, },
264 { QUICK, { C_EOF, MARK }, S_CHRB, },
265 { QUICK, { C_LET, C_DEC }, QID, },
266 { QUICK, { 'L' }, LIT0, },
267 { QUICK, { '"', '\'' }, S_LITBEG, },
268 { QUICK, { '/' }, S_CHRB, },
269 { QUICK, { '*' }, QCOM, },
270 { QUICK, { '#' }, SHARP1, },
271 { QUICK, { '\n' }, S_NL, },
272 { QUICK, { '\f', '\v' }, S_VS, },
273
274 /* copy QUICK to QUICK+1 through MAC0+1 */
275 { OP, {QUICK,QUICK+1,MAC0+1}, COPY, },
276
277 /* quick start state */
278 { QUICK, { C_EOF }, S_EOF, },
279 { QUICK, { C_DEC }, QNUM, },
280 { QUICK, { MARK }, QTOK, },
281 { QUICK, { '/' }, COM1, },
282 { QUICK, { ' ', '\t' }, QUICK, },
283
284 /* grab non-macro tokens */
285 { QTOK, { C_DEC }, QNUM, },
286
287 /* grab numeric and invalid tokens */
288 { QNUM, { C_LET, C_DEC, '.' }, QNUM, },
289 { QNUM, { 'e', 'E' }, QEXP, },
290
291 /* grab exponent token */
292 { QEXP, { C_LET, C_DEC, '.' }, QNUM, },
293 { QEXP, { '+', '-' }, QNUM, },
294
295 /* saw *, grab possible bad comment terminator */
296 { QCOM, { C_DEC }, QNUM, },
297 { QCOM, { '/' }, S_COMMENT, },
298
299 /* saw L (possible start of wide string or first macro char) */
300 { MAC0, { 'L' }, QID, },
301 { MAC0, { '"', '\'' }, QUAL(LIT1), },
302
303 /* macro candidate template */
304 { MAC0+1, { 'L' }, QID, },
305
306 /* copy MAC0+1 to MAC0+2 through MACN */
307 { OP, {MAC0+1,MAC0+2,MACN}, COPY },
308
309 /* saw L (possible start of wide string or macro L) */
310 { HIT0, { C_XXX }, S_MACRO, },
311 { HIT0, { C_LET, C_DEC }, QID, },
312 { HIT0, { '"', '\'' }, QUAL(LIT1), },
313
314 /* macro hit template */
315 { HIT0+1, { C_XXX }, S_MACRO, },
316 { HIT0+1, { C_LET, C_DEC }, QID, },
317
318 /* copy HIT0+1 to HIT0+2 through HITN */
319 { OP, {HIT0+1,HIT0+2,HITN}, COPY },
320
321 /* saw L (possible start of wide literal) */
322 { LIT0, { C_XXX }, S_MACRO, },
323 { LIT0, { C_LET, C_DEC }, QID, },
324 { LIT0, { '"', '\'' }, QUAL(LIT1), },
325
326 /* (!PROTOMAIN COM1) saw /, perhaps start of comment or /= */
327 { COM1, { '=' }, KEEP(T_DIVEQ), },
328
329 /* normal start state */
330 { TOKEN, { C_XXX }, S_HUH, },
331 { TOKEN, { C_EOF }, S_EOF, },
332 { TOKEN, { C_DEC }, DEC1, },
333 { TOKEN, { '0' }, OCT1, },
334 { TOKEN, { '.' }, DOT1, },
335 { TOKEN, { C_LET }, NID, },
336 { TOKEN, { 'L' }, LIT, },
337 { TOKEN, { '"', '\'', '<' }, S_LITBEG, },
338 { TOKEN, { '/' }, COM1, },
339 { TOKEN, { '\n' }, S_NL, },
340 { TOKEN, { ' ', '\t' }, WS1, },
341 { TOKEN, { '\f', '\v' }, S_VS, },
342 { TOKEN, { '#' }, SHARP1, },
343 { TOKEN, { ':' }, COLON1, },
344 { TOKEN, { '%' }, PCT1, },
345 { TOKEN, { '&' }, AND1, },
346 { TOKEN, { '*' }, STAR1, },
347 { TOKEN, { '+' }, PLUS1, },
348 { TOKEN, { '-' }, MINUS1, },
349 { TOKEN, { '=' }, EQ1, },
350 { TOKEN, { '!' }, NOT1, },
351 { TOKEN, { '>' }, GT1, },
352 { TOKEN, { '^' }, CIRC1, },
353 { TOKEN, { '|' }, OR1, },
354 { TOKEN, { '(', ')', '[', ']' }, S_CHR, },
355 { TOKEN, { '{', '}', ',', ';' }, S_CHR, },
356 { TOKEN, { '~', '?' }, S_CHR, },
357
358 /* saw 0, possible oct|hex|dec|dbl constant */
359 { OCT1, { C_XXX }, BACK(T_DECIMAL), },
360 { OCT1, { C_LET, C_DEC }, BAD1, },
361 { OCT1, { C_OCT }, OCT2, },
362 { OCT1, { 'e', 'E' }, DBL2, },
363 { OCT1, { 'l', 'L', 'u', 'U' }, QUAL(DEC2), },
364 { OCT1, { 'x', 'X' }, HEX1, },
365 { OCT1, { '.' }, DBL1, },
366
367 /* saw 0<oct>, oct constant */
368 { OCT2, { C_XXX }, BACK(T_OCTAL), },
369 { OCT2, { C_LET, C_DEC }, BAD1, },
370 { OCT2, { C_OCT }, OCT2, },
371 { OCT2, { 'e', 'E' }, DBL2, },
372 { OCT2, { 'l', 'L', 'u', 'U' }, QUAL(OCT3), },
373 { OCT2, { '.' }, DBL1, },
374
375 /* oct constant qualifier */
376 { OCT3, { C_XXX }, BACK(T_OCTAL), },
377 { OCT3, { C_LET, C_DEC, '.' }, BAD1, },
378 { OCT3, { 'l', 'L', 'u', 'U' }, QUAL(OCT3), },
379
380 /* saw 0 [xX], hex constant */
381 { HEX1, { C_XXX }, BACK(T_HEXADECIMAL), },
382 { HEX1, { C_LET }, BAD1, },
383 { HEX1, { C_HEX }, HEX1, },
384 { HEX1, { 'e', 'E' }, HEX3, },
385 { HEX1, { 'l', 'L', 'u', 'U' }, QUAL(HEX2), },
386 { HEX1, { '.' }, HEX4, },
387 { HEX1, { 'p', 'P' }, HEX5, },
388
389 /* hex constant qualifier */
390 { HEX2, { C_XXX }, BACK(T_HEXADECIMAL), },
391 { HEX2, { C_LET, C_DEC, '.' }, BAD1, },
392 { HEX2, { 'l', 'L', 'u', 'U' }, QUAL(HEX2), },
393
394 /* hex [eE][-+] botch */
395 { HEX3, { C_XXX }, BACK(T_HEXADECIMAL), },
396 { HEX3, { C_LET, '.', '-', '+'},BAD1, },
397 { HEX3, { C_HEX }, HEX1, },
398 { HEX3, { 'e', 'E' }, HEX3, },
399 { HEX3, { 'l', 'L', 'u', 'U' }, QUAL(HEX2), },
400
401 /* hex dbl fraction */
402 { HEX4, { C_XXX }, BACK(T_HEXDOUBLE), },
403 { HEX4, { C_LET, '.' }, BAD1, },
404 { HEX4, { C_HEX }, HEX4, },
405 { HEX4, { 'p', 'P' }, HEX5, },
406 { HEX4, { 'f', 'F', 'l', 'L' }, QUAL(HEX8), },
407
408 /* optional hex dbl exponent sign */
409 { HEX5, { C_XXX }, BACK(T_INVALID), },
410 { HEX5, { C_LET, '.' }, BAD1, },
411 { HEX5, { '+', '-' }, HEX6, },
412 { HEX5, { C_DEC }, HEX7, },
413
414 /* mandatory hex dbl exponent first digit */
415 { HEX6, { C_XXX }, BACK(T_INVALID), },
416 { HEX6, { C_LET, '.' }, BAD1, },
417 { HEX6, { C_DEC }, HEX7, },
418
419 /* hex dbl exponent digits */
420 { HEX7, { C_XXX }, BACK(T_HEXDOUBLE), },
421 { HEX7, { C_LET, '.' }, BAD1, },
422 { HEX7, { C_DEC }, HEX7, },
423 { HEX7, { 'f', 'F', 'l', 'L' }, QUAL(HEX8), },
424
425 /* hex dbl constant qualifier */
426 { HEX8, { C_XXX }, BACK(T_HEXDOUBLE), },
427 { HEX8, { C_LET, '.' }, BAD1, },
428 { HEX8, { 'f', 'F', 'l', 'L' }, QUAL(HEX8), },
429
430 /* saw <dec>, dec constant */
431 { DEC1, { C_XXX }, BACK(T_DECIMAL), },
432 { DEC1, { C_LET }, BAD1, },
433 { DEC1, { C_DEC }, DEC1, },
434 { DEC1, { 'e', 'E' }, DBL2, },
435 { DEC1, { 'l', 'L', 'u', 'U' }, QUAL(DEC2), },
436 { DEC1, { '.' }, DBL1, },
437
438 /* dec constant qualifier */
439 { DEC2, { C_XXX }, BACK(T_DECIMAL), },
440 { DEC2, { C_LET, C_DEC }, BAD1, },
441 { DEC2, { 'l', 'L', 'u', 'U' }, QUAL(DEC2), },
442
443 /* saw ., operator or dbl constant */
444 { DOT1, { C_XXX }, S_CHRB, },
445 { DOT1, { '.' }, DOT2, },
446 { DOT1, { C_DEC }, DBL1, },
447
448 /* dbl fraction */
449 { DBL1, { C_XXX }, BACK(T_DOUBLE), },
450 { DBL1, { C_LET, '.' }, BAD1, },
451 { DBL1, { C_DEC }, DBL1, },
452 { DBL1, { 'e', 'E' }, DBL2, },
453 { DBL1, { 'f', 'F', 'l', 'L' }, QUAL(DBL5), },
454
455 /* optional dbl exponent sign */
456 { DBL2, { C_XXX }, BACK(T_INVALID), },
457 { DBL2, { C_LET, '.' }, BAD1, },
458 { DBL2, { '+', '-' }, DBL3, },
459 { DBL2, { C_DEC }, DBL4, },
460
461 /* mandatory dbl exponent first digit */
462 { DBL3, { C_XXX }, BACK(T_INVALID), },
463 { DBL3, { C_LET, '.' }, BAD1, },
464 { DBL3, { C_DEC }, DBL4, },
465
466 /* dbl exponent digits */
467 { DBL4, { C_XXX }, BACK(T_DOUBLE), },
468 { DBL4, { C_LET, '.' }, BAD1, },
469 { DBL4, { C_DEC }, DBL4, },
470 { DBL4, { 'f', 'F', 'l', 'L' }, QUAL(DBL5), },
471
472 /* dbl constant qualifier */
473 { DBL5, { C_XXX }, BACK(T_DOUBLE), },
474 { DBL5, { C_LET, '.' }, BAD1, },
475 { DBL5, { 'f', 'F', 'l', 'L' }, QUAL(DBL5), },
476
477 /* saw < starting include header */
478 { HDR1, { C_XXX }, HDR1, },
479 { HDR1, { '>', '\n', C_EOF }, S_LITEND, },
480
481 /* saw <binop><space> expecting = */
482 { BIN1, { C_XXX }, S_HUH, },
483 { BIN1, { ' ', '\t' }, BIN1, },
484
485 /* 2-char ops */
486
487 { SHARP1, { C_XXX }, S_SHARP, },
488
489 { PCT1, { C_XXX }, S_CHRB, },
490 { PCT1, { '=' }, KEEP(T_MODEQ), },
491
492 { AND1, { C_XXX }, S_CHRB, },
493 { AND1, { '=' }, KEEP(T_ANDEQ), },
494 { AND1, { '&' }, KEEP(T_ANDAND), },
495
496 { STAR1, { C_XXX }, S_CHRB, },
497 { STAR1, { '=' }, KEEP(T_MPYEQ), },
498 { STAR1, { '/' }, S_COMMENT, },
499
500 { PLUS1, { C_XXX }, S_CHRB, },
501 { PLUS1, { '=' }, KEEP(T_ADDEQ), },
502 { PLUS1, { '+' }, KEEP(T_ADDADD), },
503
504 { MINUS1, { C_XXX }, S_CHRB, },
505 { MINUS1, { '=' }, KEEP(T_SUBEQ), },
506 { MINUS1, { '-' }, KEEP(T_SUBSUB), },
507 { MINUS1, { '>' }, KEEP(T_PTRMEM), },
508
509 { COLON1, { C_XXX }, S_CHRB, },
510 { COLON1, { '=', '>' }, S_HUH, },
511
512 { LT1, { C_XXX }, S_CHRB, },
513 { LT1, { '=' }, KEEP(T_LE), },
514 { LT1, { '<' }, LSH1, },
515
516 { EQ1, { C_XXX }, S_CHRB, },
517 { EQ1, { '=' }, KEEP(T_EQ), },
518
519 { NOT1, { C_XXX }, S_CHRB, },
520 { NOT1, { '=' }, KEEP(T_NE), },
521
522 { GT1, { C_XXX }, S_CHRB, },
523 { GT1, { '=' }, KEEP(T_GE), },
524 { GT1, { '>' }, RSH1, },
525
526 { CIRC1, { C_XXX }, S_CHRB, },
527 { CIRC1, { '=' }, KEEP(T_XOREQ), },
528
529 { OR1, { C_XXX }, S_CHRB, },
530 { OR1, { '=' }, KEEP(T_OREQ), },
531 { OR1, { '|' }, KEEP(T_OROR), },
532
533 /* 3-char ops */
534
535 { ARROW1, { C_XXX }, BACK(T_PTRMEM), },
536 { ARROW1, { '*' }, KEEP(T_PTRMEMREF), },
537
538 { LSH1, { C_XXX }, BACK(T_LSHIFT), },
539 { LSH1, { '=' }, KEEP(T_LSHIFTEQ), },
540
541 { RSH1, { C_XXX }, BACK(T_RSHIFT), },
542 { RSH1, { '=' }, KEEP(T_RSHIFTEQ), },
543
544 #endif
545
546 /* end */
547 { OP, { 0 }, END, }
548 };
549
550 short fsm[TERMINAL+1][MAX+1];
551
552 char trigraph[MAX+1];
553
554 #if PROTOMAIN
555 static char spl[] = { '\\', '\r', 0 };
556 static char aln[] = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz_$@";
557 #else
558 static char spl[] = { MARK, '?', '\\', '\r', CC_sub, 0 };
559 static char aln[] = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz_";
560 #endif
561 static char* let = &aln[10];
562 static char hex[] = "fedcbaFEDCBA9876543210";
563 static char* dec = &hex[12];
564 static char* oct = &hex[14];
565
566 /*
567 * runtime FSM modifications
568 * ppfsm(FSM_INIT,0) must be called first
569 */
570
571 void
ppfsm(int op,register char * s)572 ppfsm(int op, register char* s)
573 {
574 register int c;
575 register int n;
576 register int i;
577 register short* rp;
578 register struct fsminit* fp;
579 #if !PROTOMAIN
580 char* t;
581 int x;
582 #endif
583
584 switch (op)
585 {
586
587 #if !PROTOMAIN
588
589 case FSM_IDADD:
590 while (c = *s++)
591 if (!ppisid(c))
592 {
593 if (fsm[TOKEN][c] == ~S_HUH)
594 {
595 setid(c);
596 for (i = 0; i < TERMINAL; i++)
597 fsm[i][c] = IDSTATE(fsm[i]['_']);
598 }
599 else error(2, "%c: cannot add to identifier set", c);
600 }
601 break;
602
603 case FSM_IDDEL:
604 while (c = *s++)
605 if (ppisid(c))
606 {
607 clrid(c);
608 for (i = 0; i < TERMINAL; i++)
609 fsm[i][c] = ~S_HUH;
610 }
611 break;
612
613 #endif
614
615 case FSM_INIT:
616 for (fp = fsminit;; fp++)
617 {
618 if ((n = fp->nextstate) >= TERMINAL) n = ~n;
619 if (fp->state == OP)
620 {
621 #if !PROTOMAIN
622 switch (n)
623 {
624 case COPY:
625 c = fp->ch[0];
626 n = fp->ch[2];
627 for (i = fp->ch[1]; i <= n; i++)
628 copy(i, c);
629 continue;
630 default:
631 break;
632 }
633 #endif
634 break;
635 }
636 rp = fsm[fp->state];
637 for (i = 0; i < sizeof(fp->ch) && (c = fp->ch[i]); i++)
638 {
639 switch (c)
640 {
641 case C_XXX:
642 for (c = 0; c <= MAX; c++)
643 rp[c] = n;
644 /*FALLTHROUGH*/
645
646 case C_EOF:
647 fsm[TERMINAL][fp->state+1] = n < 0 ? ~n : n;
648 continue;
649
650 case C_LET:
651 s = let;
652 break;
653
654 case C_HEX:
655 s = hex;
656 break;
657
658 case C_DEC:
659 s = dec;
660 break;
661
662 case C_OCT:
663 s = oct;
664 break;
665
666 default:
667 rp[c] = n;
668 continue;
669 }
670 while (c = *s++)
671 rp[c] = n;
672 }
673 }
674
675 /*
676 * install splice special cases
677 * and same non-terminal transitions
678 */
679
680 for (i = 0; i < TERMINAL; i++)
681 {
682 rp = fsm[i];
683 s = spl;
684 while (c = *s++)
685 if (c != MARK || !INCOMMENT(rp))
686 {
687 if (rp[c] >= 0) rp[c] = ~rp[c];
688 rp[c] &= ~SPLICE;
689 }
690 rp[EOB] = ~S_EOB;
691 for (c = 0; c <= MAX; c++)
692 if (rp[c] == i)
693 rp[c] = 0;
694 }
695 fsm[TERMINAL][0] = ~S_EOB;
696
697 #if !PROTOMAIN
698
699 /*
700 * default character types
701 */
702
703 s = let;
704 while (c = *s++)
705 setid(c);
706 s = dec;
707 while (c = *s++)
708 setdig(c);
709 s = spl;
710 do setsplice(c = *s++); while (c);
711
712 /*
713 * trigraph map
714 */
715
716 trigraph['='] = '#';
717 trigraph['('] = '[';
718 trigraph['/'] = '\\';
719 trigraph[')'] = ']';
720 trigraph['\''] = '^';
721 trigraph['<'] = '{';
722 trigraph['!'] = '|';
723 trigraph['>'] = '}';
724 trigraph['-'] = '~';
725 #endif
726 break;
727
728 #if !PROTOMAIN
729
730 case FSM_PLUSPLUS:
731 if (pp.option & PLUSPLUS)
732 {
733 fsm[COLON1][':'] = ~KEEP(T_SCOPE);
734 fsm[DOT1]['*'] = ~KEEP(T_DOTREF);
735 fsm[MINUS1]['>'] = ARROW1;
736 fsm[COM1]['/'] = COM5;
737 t = "%<:";
738 for (i = 0; i < TERMINAL; i++)
739 {
740 rp = fsm[i];
741 if (!INCOMMENT(rp) && !INQUOTE(rp))
742 {
743 s = t;
744 while (c = *s++)
745 {
746 if (rp[c] > 0) rp[c] = ~rp[c];
747 else if (!rp[c]) rp[c] = ~i;
748 rp[c] &= ~SPLICE;
749 }
750 }
751 }
752 s = t;
753 while (c = *s++) setsplice(c);
754 }
755 else
756 {
757 fsm[COLON1][':'] = ~S_CHRB;
758 fsm[DOT1]['*'] = ~S_CHRB;
759 fsm[MINUS1]['>'] = ~KEEP(T_PTRMEM);
760 fsm[COM1]['/'] = (pp.option & PLUSCOMMENT) ? COM5 : ~S_CHRB;
761 }
762 break;
763
764 #if COMPATIBLE
765
766 case FSM_COMPATIBILITY:
767 if (pp.state & COMPATIBILITY)
768 {
769 fsm[HEX1]['e'] = HEX1;
770 fsm[HEX1]['E'] = HEX1;
771 fsm[QNUM]['e'] = QNUM;
772 fsm[QNUM]['E'] = QNUM;
773 fsm[QNUM]['u'] = ~QUAL(QNUM);
774 fsm[QNUM]['U'] = ~QUAL(QNUM);
775 }
776 else
777 {
778 fsm[HEX1]['e'] = HEX3;
779 fsm[HEX1]['E'] = HEX3;
780 fsm[QNUM]['e'] = QEXP;
781 fsm[QNUM]['E'] = QEXP;
782 fsm[QNUM]['u'] = QNUM;
783 fsm[QNUM]['U'] = QNUM;
784 }
785 break;
786
787 #endif
788
789 case FSM_QUOTADD:
790 while (c = *s++)
791 if (fsm[TOKEN][c] == ~S_HUH)
792 for (i = 0; i < TERMINAL; i++)
793 fsm[i][c] = fsm[i]['"'];
794 else error(2, "%c: cannot add to quote set", c);
795 break;
796
797 case FSM_QUOTDEL:
798 while (c = *s++)
799 if (c != '"' && fsm[TOKEN][c] == fsm[TOKEN]['"'])
800 for (i = 0; i < TERMINAL; i++)
801 fsm[i][c] = fsm[i]['_'];
802 break;
803
804 case FSM_OPSPACE:
805 n = s ? BIN1 : ~S_CHRB;
806 fsm[COM1][' '] = fsm[COM1]['\t'] = n;
807 fsm[AND1][' '] = fsm[AND1]['\t'] = n;
808 fsm[STAR1][' '] = fsm[STAR1]['\t'] = n;
809 fsm[PCT1][' '] = fsm[PCT1]['\t'] = n;
810 fsm[PLUS1][' '] = fsm[PLUS1]['\t'] = n;
811 fsm[MINUS1][' '] = fsm[MINUS1]['\t'] = n;
812 fsm[CIRC1][' '] = fsm[CIRC1]['\t'] = n;
813 fsm[OR1][' '] = fsm[OR1]['\t'] = n;
814 fsm[LSH1][' '] = fsm[LSH1]['\t'] = s ? BIN1 : ~BACK(T_LSHIFT);
815 fsm[RSH1][' '] = fsm[RSH1]['\t'] = s ? BIN1 : ~BACK(T_RSHIFT);
816 break;
817
818 case FSM_MACRO:
819 if (pp.truncate && strlen(s) >= pp.truncate)
820 {
821 x = s[pp.truncate];
822 s[pp.truncate] = 0;
823 }
824 else x = -1;
825 i = MAC0 + ((c = *s++) != 'L');
826 if ((n = fsm[QUICK][c]) != (i + NMAC))
827 {
828 n = i;
829 if (!*s) n += NMAC;
830 }
831 if (fsm[QUICK][c] != n)
832 fsm[QUICK][c] = fsm[QCOM][c] = fsm[QTOK][c] = n;
833 if (c = *s++)
834 {
835 for (;;)
836 {
837 if ((i = n) < HIT0)
838 {
839 if (n < MACN) n++;
840 if (!*s)
841 {
842 n += NMAC;
843 break;
844 }
845 if (fsm[i][c] < HIT0)
846 fsm[i][c] = n;
847 if (fsm[i + NMAC][c] < HIT0)
848 fsm[i + NMAC][c] = n;
849 }
850 else
851 {
852 if (n < HITN) n++;
853 if (!*s) break;
854 if (fsm[i][c] < HIT0)
855 {
856 n -= NMAC;
857 fsm[i][c] = n;
858 }
859 }
860 c = *s++;
861 }
862 if (x >= 0)
863 {
864 *s = x;
865 for (n = CHAR_MIN; n <= CHAR_MAX; n++)
866 if (ppisidig(n))
867 fsm[HITN][n] = HITN;
868 n = HITN;
869 }
870 if (fsm[i][c] < n)
871 fsm[i][c] = n;
872 if (i < HIT0 && fsm[i + NMAC][c] < n)
873 fsm[i + NMAC][c] = n;
874 }
875 break;
876
877 #endif
878
879 }
880 }
881
882 #if !PROTOMAIN
883
884 /*
885 * file buffer refill
886 * c is current input char
887 */
888
889 void
refill(register int c)890 refill(register int c)
891 {
892 if (pp.in->flags & IN_eof)
893 {
894 pp.in->nextchr--;
895 c = 0;
896 }
897 else
898 {
899 *((pp.in->nextchr = pp.in->buffer + PPBAKSIZ) - 1) = c;
900 c =
901 #if PROTOTYPE
902 (pp.in->flags & IN_prototype) ? pppread(pp.in->nextchr) :
903 #endif
904 read(pp.in->fd, pp.in->nextchr, PPBUFSIZ);
905 }
906 if (c > 0)
907 {
908 if (pp.in->nextchr[c - 1] == '\n') pp.in->flags |= IN_newline;
909 else pp.in->flags &= ~IN_newline;
910 #if PROTOTYPE
911 if (!(pp.in->flags & IN_prototype))
912 #endif
913 if (c < PPBUFSIZ && (pp.in->flags & IN_regular))
914 {
915 pp.in->flags |= IN_eof;
916 close(pp.in->fd);
917 pp.in->fd = -1;
918 }
919 }
920 else
921 {
922 if (c < 0)
923 {
924 error(ERROR_SYSTEM|3, "read error");
925 c = 0;
926 }
927 else if ((pp.in->flags ^ pp.in->prev->flags) & IN_c)
928 {
929 static char ket[] = { 0, '}', '\n', 0 };
930
931 pp.in->flags ^= IN_c;
932 pp.in->nextchr = ket + 1;
933 c = 2;
934 }
935 pp.in->flags |= IN_eof;
936 }
937 #if CHECKPOINT
938 pp.in->buflen = c;
939 #endif
940 pp.in->nextchr[c] = 0;
941 debug((-7, "refill(\"%s\") = %d = \"%-.*s%s\"", error_info.file, c, (c > 32 ? 32 : c), pp.in->nextchr, c > 32 ? "..." : ""));
942 if (pp.test & 0x0080)
943 sfprintf(sfstderr, "===== refill(\"%s\") = %d =====\n%s\n===== eob(\"%s\") =====\n", error_info.file, c, pp.in->nextchr, error_info.file);
944 }
945
946 #endif
947