1*906afcb8SAndy Fiddaman /*********************************************************************** 2*906afcb8SAndy Fiddaman * * 3*906afcb8SAndy Fiddaman * This software is part of the ast package * 4*906afcb8SAndy Fiddaman * Copyright (c) 1986-2011 AT&T Intellectual Property * 5*906afcb8SAndy Fiddaman * and is licensed under the * 6*906afcb8SAndy Fiddaman * Eclipse Public License, Version 1.0 * 7*906afcb8SAndy Fiddaman * by AT&T Intellectual Property * 8*906afcb8SAndy Fiddaman * * 9*906afcb8SAndy Fiddaman * A copy of the License is available at * 10*906afcb8SAndy Fiddaman * http://www.eclipse.org/org/documents/epl-v10.html * 11*906afcb8SAndy Fiddaman * (with md5 checksum b35adb5213ca9657e911e9befb180842) * 12*906afcb8SAndy Fiddaman * * 13*906afcb8SAndy Fiddaman * Information and Software Systems Research * 14*906afcb8SAndy Fiddaman * AT&T Research * 15*906afcb8SAndy Fiddaman * Florham Park NJ * 16*906afcb8SAndy Fiddaman * * 17*906afcb8SAndy Fiddaman * Glenn Fowler <gsf@research.att.com> * 18*906afcb8SAndy Fiddaman * * 19*906afcb8SAndy Fiddaman ***********************************************************************/ 20*906afcb8SAndy Fiddaman #pragma prototyped 21*906afcb8SAndy Fiddaman /* 22*906afcb8SAndy Fiddaman * Glenn Fowler 23*906afcb8SAndy Fiddaman * AT&T Research 24*906afcb8SAndy Fiddaman * 25*906afcb8SAndy Fiddaman * preprocessor and proto lexical analyzer fsm 26*906afcb8SAndy Fiddaman * define PROTOMAIN for standalone proto 27*906afcb8SAndy Fiddaman */ 28*906afcb8SAndy Fiddaman 29*906afcb8SAndy Fiddaman #include "pplib.h" 30*906afcb8SAndy Fiddaman #include "ppfsm.h" 31*906afcb8SAndy Fiddaman 32*906afcb8SAndy Fiddaman /* 33*906afcb8SAndy Fiddaman * lexical FSM encoding 34*906afcb8SAndy Fiddaman * derived from a standalone ansi cpp by Dennis Ritchie 35*906afcb8SAndy Fiddaman * modified for libpp by Glenn Fowler 36*906afcb8SAndy Fiddaman * 37*906afcb8SAndy Fiddaman * fsm[] is initialized from fsminit[]. The encoding is blown out into 38*906afcb8SAndy Fiddaman * fsm[] for time efficiency. When in state state, and one of the 39*906afcb8SAndy Fiddaman * characters in ch arrives, enter nextstate. States >= TERMINAL are 40*906afcb8SAndy Fiddaman * either final, or at least require special action. In fsminit[] there 41*906afcb8SAndy Fiddaman * is a line for each <state,charset,nextstate>. Early entries are 42*906afcb8SAndy Fiddaman * overwritten by later ones. C_XXX is the universal set and should 43*906afcb8SAndy Fiddaman * always be first. Some of the fsminit[] entries are templates for 44*906afcb8SAndy Fiddaman * groups of states. The OP entries trigger the state copies. States 45*906afcb8SAndy Fiddaman * above TERMINAL are represented in fsm[] as negative values. S_TOK and 46*906afcb8SAndy Fiddaman * S_TOKB encode the resulting token type in the upper bits. These actions 47*906afcb8SAndy Fiddaman * differ in that S_TOKB has a lookahead char. 48*906afcb8SAndy Fiddaman * 49*906afcb8SAndy Fiddaman * fsm[] has three start states: 50*906afcb8SAndy Fiddaman * 51*906afcb8SAndy Fiddaman * PROTO proto (ANSI -> K&R,C++,ANSI) 52*906afcb8SAndy Fiddaman * QUICK standalone ppcpp() 53*906afcb8SAndy Fiddaman * TOKEN tokenizing pplex() 54*906afcb8SAndy Fiddaman * 55*906afcb8SAndy Fiddaman * If the next state remains the same then the fsm[] transition value is 0. 56*906afcb8SAndy Fiddaman * MAX+1 is a power of 2 so that fsm[state][EOF==MAX+1] actually accesses 57*906afcb8SAndy Fiddaman * fsm[state+1][0] which is ~S_EOB for all states. This preserves the 58*906afcb8SAndy Fiddaman * power of 2 fsm[] row size for efficient array indexing. Thanks to 59*906afcb8SAndy Fiddaman * D. G. Korn for the last two observations. The pseudo non-terminal state 60*906afcb8SAndy Fiddaman * fsm[TERMINAL][state+1] is used to differentiate EOB from EOF. 61*906afcb8SAndy Fiddaman * 62*906afcb8SAndy Fiddaman * The bit layout is: 63*906afcb8SAndy Fiddaman * 64*906afcb8SAndy Fiddaman * TERM arg SPLICE next 65*906afcb8SAndy Fiddaman * 15 14-8 7 6-0 66*906afcb8SAndy Fiddaman */ 67*906afcb8SAndy Fiddaman 68*906afcb8SAndy Fiddaman /* 69*906afcb8SAndy Fiddaman * NOTE: these must be `control' characters for all native codesets 70*906afcb8SAndy Fiddaman * currently ok for {ascii,ebcdic1,ebcdic2,ebcdic3} 71*906afcb8SAndy Fiddaman */ 72*906afcb8SAndy Fiddaman 73*906afcb8SAndy Fiddaman #define C_DEC 001 74*906afcb8SAndy Fiddaman #define C_EOF 002 75*906afcb8SAndy Fiddaman #define C_HEX 003 76*906afcb8SAndy Fiddaman #define C_LET 021 77*906afcb8SAndy Fiddaman #define C_OCT 022 78*906afcb8SAndy Fiddaman #define C_XXX 023 79*906afcb8SAndy Fiddaman 80*906afcb8SAndy Fiddaman #define OP (-1) 81*906afcb8SAndy Fiddaman #define END 0 82*906afcb8SAndy Fiddaman #define COPY 1 83*906afcb8SAndy Fiddaman 84*906afcb8SAndy Fiddaman #define copy(t,f) (memcpy(&fsm[t][1],&fsm[f][1],(MAX+1)*sizeof(short)),fsm[TERMINAL][(t)+1]=fsm[TERMINAL][(f)+1]) 85*906afcb8SAndy Fiddaman 86*906afcb8SAndy Fiddaman struct fsminit /* fsm initialization row */ 87*906afcb8SAndy Fiddaman { 88*906afcb8SAndy Fiddaman int state; /* if in this state */ 89*906afcb8SAndy Fiddaman unsigned char ch[4]; /* and see one of these */ 90*906afcb8SAndy Fiddaman int nextstate; /* enter this state if <TERMINAL*/ 91*906afcb8SAndy Fiddaman }; 92*906afcb8SAndy Fiddaman 93*906afcb8SAndy Fiddaman static struct fsminit fsminit[] = 94*906afcb8SAndy Fiddaman { 95*906afcb8SAndy Fiddaman /* proto start state */ 96*906afcb8SAndy Fiddaman { PROTO, { C_XXX }, S_CHR, }, 97*906afcb8SAndy Fiddaman { PROTO, { C_EOF }, S_EOF, }, 98*906afcb8SAndy Fiddaman { PROTO, { C_DEC }, BAD1, }, 99*906afcb8SAndy Fiddaman { PROTO, { '.' }, DOT, }, 100*906afcb8SAndy Fiddaman { PROTO, { C_LET }, NID, }, 101*906afcb8SAndy Fiddaman { PROTO, { 'L' }, LIT, }, 102*906afcb8SAndy Fiddaman { PROTO, { 'd', 'e', 'f', 'i' }, RES1, }, 103*906afcb8SAndy Fiddaman { PROTO, { 'r', 's', 't', 'v' }, RES1, }, 104*906afcb8SAndy Fiddaman { PROTO, { 'w', 'N' }, RES1, }, 105*906afcb8SAndy Fiddaman { PROTO, { '"', '\'' }, S_LITBEG, }, 106*906afcb8SAndy Fiddaman { PROTO, { '/' }, COM1, }, 107*906afcb8SAndy Fiddaman { PROTO, { '\n' }, S_NL, }, 108*906afcb8SAndy Fiddaman { PROTO, { ' ','\t','\f','\v' }, WS1, }, 109*906afcb8SAndy Fiddaman 110*906afcb8SAndy Fiddaman /* proto {do,else,extern,for,if,inline,return,static,typedef,va_start,void,while,NoN} */ 111*906afcb8SAndy Fiddaman { RES1, { C_XXX }, S_MACRO, }, 112*906afcb8SAndy Fiddaman { RES1, { C_LET, C_DEC }, NID, }, 113*906afcb8SAndy Fiddaman { RES1, { 'a' }, RES1a, }, 114*906afcb8SAndy Fiddaman { RES1, { 'e' }, RES1e, }, 115*906afcb8SAndy Fiddaman { RES1, { 'f' }, RES1f, }, 116*906afcb8SAndy Fiddaman { RES1, { 'h' }, RES1h, }, 117*906afcb8SAndy Fiddaman { RES1, { 'l' }, RES1l, }, 118*906afcb8SAndy Fiddaman { RES1, { 'n' }, RES1n, }, 119*906afcb8SAndy Fiddaman { RES1, { 'o' }, RES1o, }, 120*906afcb8SAndy Fiddaman { RES1, { 't' }, RES1t, }, 121*906afcb8SAndy Fiddaman { RES1, { 'x' }, RES1x, }, 122*906afcb8SAndy Fiddaman { RES1, { 'y' }, RES1y, }, 123*906afcb8SAndy Fiddaman 124*906afcb8SAndy Fiddaman /* proto reserved {va_start} */ 125*906afcb8SAndy Fiddaman { RES1a, { C_XXX }, S_RESERVED, }, 126*906afcb8SAndy Fiddaman { RES1a, { C_LET, C_DEC }, NID, }, 127*906afcb8SAndy Fiddaman { RES1a, { '_','s','t','a' }, RES1a, }, 128*906afcb8SAndy Fiddaman { RES1a, { 'r' }, RES1a, }, 129*906afcb8SAndy Fiddaman 130*906afcb8SAndy Fiddaman /* proto reserved {return} */ 131*906afcb8SAndy Fiddaman { RES1e, { C_XXX }, S_RESERVED, }, 132*906afcb8SAndy Fiddaman { RES1e, { C_LET, C_DEC }, NID, }, 133*906afcb8SAndy Fiddaman { RES1e, { 't','u','r','n' }, RES1e, }, 134*906afcb8SAndy Fiddaman 135*906afcb8SAndy Fiddaman /* proto reserved {if} */ 136*906afcb8SAndy Fiddaman { RES1f, { C_XXX }, S_RESERVED, }, 137*906afcb8SAndy Fiddaman { RES1f, { C_LET, C_DEC }, NID, }, 138*906afcb8SAndy Fiddaman 139*906afcb8SAndy Fiddaman /* proto reserved {while} */ 140*906afcb8SAndy Fiddaman { RES1h, { C_XXX }, S_RESERVED, }, 141*906afcb8SAndy Fiddaman { RES1h, { C_LET, C_DEC }, NID, }, 142*906afcb8SAndy Fiddaman { RES1h, { 'i','l','e' }, RES1h, }, 143*906afcb8SAndy Fiddaman 144*906afcb8SAndy Fiddaman /* proto reserved {else} */ 145*906afcb8SAndy Fiddaman { RES1l, { C_XXX }, S_RESERVED, }, 146*906afcb8SAndy Fiddaman { RES1l, { C_LET, C_DEC }, NID, }, 147*906afcb8SAndy Fiddaman { RES1l, { 's','e' }, RES1l, }, 148*906afcb8SAndy Fiddaman 149*906afcb8SAndy Fiddaman /* proto reserved {inline} */ 150*906afcb8SAndy Fiddaman { RES1n, { C_XXX }, S_RESERVED, }, 151*906afcb8SAndy Fiddaman { RES1n, { C_LET, C_DEC }, NID, }, 152*906afcb8SAndy Fiddaman { RES1n, { 'l','i','n','e' }, RES1n, }, 153*906afcb8SAndy Fiddaman 154*906afcb8SAndy Fiddaman /* proto reserved {do,for,void} */ 155*906afcb8SAndy Fiddaman { RES1o, { C_XXX }, S_RESERVED, }, 156*906afcb8SAndy Fiddaman { RES1o, { C_LET, C_DEC }, NID, }, 157*906afcb8SAndy Fiddaman { RES1o, { 'r','i','d','N' }, RES1o, }, 158*906afcb8SAndy Fiddaman 159*906afcb8SAndy Fiddaman /* proto reserved {static} */ 160*906afcb8SAndy Fiddaman { RES1t, { C_XXX }, S_RESERVED, }, 161*906afcb8SAndy Fiddaman { RES1t, { C_LET, C_DEC }, NID, }, 162*906afcb8SAndy Fiddaman { RES1t, { 'a','t','i','c' }, RES1t, }, 163*906afcb8SAndy Fiddaman 164*906afcb8SAndy Fiddaman /* proto reserved {extern} */ 165*906afcb8SAndy Fiddaman { RES1x, { C_XXX }, S_RESERVED, }, 166*906afcb8SAndy Fiddaman { RES1x, { C_LET, C_DEC }, NID, }, 167*906afcb8SAndy Fiddaman { RES1x, { 't','e','r','n' }, RES1x, }, 168*906afcb8SAndy Fiddaman 169*906afcb8SAndy Fiddaman /* proto reserved {typedef} */ 170*906afcb8SAndy Fiddaman { RES1y, { C_XXX }, S_RESERVED, }, 171*906afcb8SAndy Fiddaman { RES1y, { C_LET, C_DEC }, NID, }, 172*906afcb8SAndy Fiddaman { RES1y, { 'p','e','d','f' }, RES1y, }, 173*906afcb8SAndy Fiddaman 174*906afcb8SAndy Fiddaman /* saw /, perhaps start of comment */ 175*906afcb8SAndy Fiddaman { COM1, { C_XXX }, S_CHRB, }, 176*906afcb8SAndy Fiddaman { COM1, { '*' }, COM2, }, 177*906afcb8SAndy Fiddaman #if PROTOMAIN 178*906afcb8SAndy Fiddaman { COM1, { '/' }, COM5, }, 179*906afcb8SAndy Fiddaman #endif 180*906afcb8SAndy Fiddaman 181*906afcb8SAndy Fiddaman /* saw / *, start of comment */ 182*906afcb8SAndy Fiddaman { COM2, { C_XXX }, COM2, }, 183*906afcb8SAndy Fiddaman { COM2, { '\n', C_EOF }, S_COMMENT, }, 184*906afcb8SAndy Fiddaman { COM2, { '/' }, COM4, }, 185*906afcb8SAndy Fiddaman { COM2, { '*' }, COM3, }, 186*906afcb8SAndy Fiddaman { COM2, { '#', ';', ')' }, QUAL(COM2), }, 187*906afcb8SAndy Fiddaman 188*906afcb8SAndy Fiddaman /* saw the * possibly ending a comment */ 189*906afcb8SAndy Fiddaman { COM3, { C_XXX }, COM2, }, 190*906afcb8SAndy Fiddaman { COM3, { '\n', C_EOF }, S_COMMENT, }, 191*906afcb8SAndy Fiddaman { COM3, { '#', ';', ')' }, QUAL(COM2), }, 192*906afcb8SAndy Fiddaman { COM3, { '*' }, COM3, }, 193*906afcb8SAndy Fiddaman { COM3, { '/' }, S_COMMENT, }, 194*906afcb8SAndy Fiddaman 195*906afcb8SAndy Fiddaman /* saw / in / * comment, possible malformed nest */ 196*906afcb8SAndy Fiddaman { COM4, { C_XXX }, COM2, }, 197*906afcb8SAndy Fiddaman { COM4, { '*', '\n', C_EOF }, S_COMMENT, }, 198*906afcb8SAndy Fiddaman { COM4, { '/' }, COM4, }, 199*906afcb8SAndy Fiddaman 200*906afcb8SAndy Fiddaman /* saw / /, start of comment */ 201*906afcb8SAndy Fiddaman { COM5, { C_XXX }, COM5, }, 202*906afcb8SAndy Fiddaman { COM5, { '\n', C_EOF }, S_COMMENT, }, 203*906afcb8SAndy Fiddaman { COM5, { '/' }, COM6, }, 204*906afcb8SAndy Fiddaman { COM5, { '*' }, COM7, }, 205*906afcb8SAndy Fiddaman 206*906afcb8SAndy Fiddaman /* saw / in / / comment, possible malformed nest */ 207*906afcb8SAndy Fiddaman { COM6, { C_XXX }, COM5, }, 208*906afcb8SAndy Fiddaman { COM6, { '*', '\n', C_EOF }, S_COMMENT, }, 209*906afcb8SAndy Fiddaman { COM6, { '/' }, COM6, }, 210*906afcb8SAndy Fiddaman 211*906afcb8SAndy Fiddaman /* saw * in / /, possible malformed nest */ 212*906afcb8SAndy Fiddaman { COM7, { C_XXX }, COM5, }, 213*906afcb8SAndy Fiddaman { COM7, { '\n', C_EOF }, S_COMMENT, }, 214*906afcb8SAndy Fiddaman { COM7, { '*' }, COM7, }, 215*906afcb8SAndy Fiddaman { COM7, { '/' }, S_COMMENT, }, 216*906afcb8SAndy Fiddaman 217*906afcb8SAndy Fiddaman /* normal identifier -- always a macro candidate */ 218*906afcb8SAndy Fiddaman { NID, { C_XXX }, S_MACRO, }, 219*906afcb8SAndy Fiddaman { NID, { C_LET, C_DEC }, NID, }, 220*906afcb8SAndy Fiddaman 221*906afcb8SAndy Fiddaman /* saw ., operator or dbl constant */ 222*906afcb8SAndy Fiddaman { DOT, { C_XXX }, S_CHRB, }, 223*906afcb8SAndy Fiddaman { DOT, { '.' }, DOT2, }, 224*906afcb8SAndy Fiddaman { DOT, { C_DEC }, BAD1, }, 225*906afcb8SAndy Fiddaman 226*906afcb8SAndy Fiddaman /* saw .., possible ... */ 227*906afcb8SAndy Fiddaman { DOT2, { C_XXX }, BACK(T_INVALID), }, 228*906afcb8SAndy Fiddaman { DOT2, { '.' }, KEEP(T_VARIADIC), }, 229*906afcb8SAndy Fiddaman 230*906afcb8SAndy Fiddaman /* saw L (possible start of normal wide literal) */ 231*906afcb8SAndy Fiddaman { LIT, { C_XXX }, S_MACRO, }, 232*906afcb8SAndy Fiddaman { LIT, { C_LET, C_DEC }, NID, }, 233*906afcb8SAndy Fiddaman { LIT, { '"', '\'' }, QUAL(LIT1), }, 234*906afcb8SAndy Fiddaman 235*906afcb8SAndy Fiddaman /* saw " or ' beginning literal */ 236*906afcb8SAndy Fiddaman { LIT1, { C_XXX }, LIT1, }, 237*906afcb8SAndy Fiddaman { LIT1, { '"', '\'' }, S_LITEND, }, 238*906afcb8SAndy Fiddaman { LIT1, { '\n', C_EOF }, S_LITEND, }, 239*906afcb8SAndy Fiddaman { LIT1, { '\\' }, LIT2, }, 240*906afcb8SAndy Fiddaman 241*906afcb8SAndy Fiddaman /* saw \ in literal */ 242*906afcb8SAndy Fiddaman { LIT2, { C_XXX }, S_LITESC, }, 243*906afcb8SAndy Fiddaman { LIT2, { '\n', C_EOF }, S_LITEND, }, 244*906afcb8SAndy Fiddaman 245*906afcb8SAndy Fiddaman /* eat malformed numeric constant */ 246*906afcb8SAndy Fiddaman { BAD1, { C_XXX }, BACK(T_INVALID), }, 247*906afcb8SAndy Fiddaman { BAD1, { C_LET, C_DEC, '.' }, BAD1, }, 248*906afcb8SAndy Fiddaman { BAD1, { 'e', 'E' }, BAD2, }, 249*906afcb8SAndy Fiddaman 250*906afcb8SAndy Fiddaman /* eat malformed numeric fraction|exponent */ 251*906afcb8SAndy Fiddaman { BAD2, { C_XXX }, BACK(T_INVALID), }, 252*906afcb8SAndy Fiddaman { BAD2, { C_LET, C_DEC, '.' }, BAD1, }, 253*906afcb8SAndy Fiddaman { BAD2, { '+', '-' }, BAD1, }, 254*906afcb8SAndy Fiddaman 255*906afcb8SAndy Fiddaman /* saw white space, eat it up */ 256*906afcb8SAndy Fiddaman { WS1, { C_XXX }, S_WS, }, 257*906afcb8SAndy Fiddaman { WS1, { ' ', '\t' }, WS1, }, 258*906afcb8SAndy Fiddaman { WS1, { '\f', '\v' }, S_VS, }, 259*906afcb8SAndy Fiddaman 260*906afcb8SAndy Fiddaman #if !PROTOMAIN 261*906afcb8SAndy Fiddaman 262*906afcb8SAndy Fiddaman /* quick template */ 263*906afcb8SAndy Fiddaman { QUICK, { C_XXX }, QTOK, }, 264*906afcb8SAndy Fiddaman { QUICK, { C_EOF, MARK }, S_CHRB, }, 265*906afcb8SAndy Fiddaman { QUICK, { C_LET, C_DEC }, QID, }, 266*906afcb8SAndy Fiddaman { QUICK, { 'L' }, LIT0, }, 267*906afcb8SAndy Fiddaman { QUICK, { '"', '\'' }, S_LITBEG, }, 268*906afcb8SAndy Fiddaman { QUICK, { '/' }, S_CHRB, }, 269*906afcb8SAndy Fiddaman { QUICK, { '*' }, QCOM, }, 270*906afcb8SAndy Fiddaman { QUICK, { '#' }, SHARP1, }, 271*906afcb8SAndy Fiddaman { QUICK, { '\n' }, S_NL, }, 272*906afcb8SAndy Fiddaman { QUICK, { '\f', '\v' }, S_VS, }, 273*906afcb8SAndy Fiddaman 274*906afcb8SAndy Fiddaman /* copy QUICK to QUICK+1 through MAC0+1 */ 275*906afcb8SAndy Fiddaman { OP, {QUICK,QUICK+1,MAC0+1}, COPY, }, 276*906afcb8SAndy Fiddaman 277*906afcb8SAndy Fiddaman /* quick start state */ 278*906afcb8SAndy Fiddaman { QUICK, { C_EOF }, S_EOF, }, 279*906afcb8SAndy Fiddaman { QUICK, { C_DEC }, QNUM, }, 280*906afcb8SAndy Fiddaman { QUICK, { MARK }, QTOK, }, 281*906afcb8SAndy Fiddaman { QUICK, { '/' }, COM1, }, 282*906afcb8SAndy Fiddaman { QUICK, { ' ', '\t' }, QUICK, }, 283*906afcb8SAndy Fiddaman 284*906afcb8SAndy Fiddaman /* grab non-macro tokens */ 285*906afcb8SAndy Fiddaman { QTOK, { C_DEC }, QNUM, }, 286*906afcb8SAndy Fiddaman 287*906afcb8SAndy Fiddaman /* grab numeric and invalid tokens */ 288*906afcb8SAndy Fiddaman { QNUM, { C_LET, C_DEC, '.' }, QNUM, }, 289*906afcb8SAndy Fiddaman { QNUM, { 'e', 'E' }, QEXP, }, 290*906afcb8SAndy Fiddaman 291*906afcb8SAndy Fiddaman /* grab exponent token */ 292*906afcb8SAndy Fiddaman { QEXP, { C_LET, C_DEC, '.' }, QNUM, }, 293*906afcb8SAndy Fiddaman { QEXP, { '+', '-' }, QNUM, }, 294*906afcb8SAndy Fiddaman 295*906afcb8SAndy Fiddaman /* saw *, grab possible bad comment terminator */ 296*906afcb8SAndy Fiddaman { QCOM, { C_DEC }, QNUM, }, 297*906afcb8SAndy Fiddaman { QCOM, { '/' }, S_COMMENT, }, 298*906afcb8SAndy Fiddaman 299*906afcb8SAndy Fiddaman /* saw L (possible start of wide string or first macro char) */ 300*906afcb8SAndy Fiddaman { MAC0, { 'L' }, QID, }, 301*906afcb8SAndy Fiddaman { MAC0, { '"', '\'' }, QUAL(LIT1), }, 302*906afcb8SAndy Fiddaman 303*906afcb8SAndy Fiddaman /* macro candidate template */ 304*906afcb8SAndy Fiddaman { MAC0+1, { 'L' }, QID, }, 305*906afcb8SAndy Fiddaman 306*906afcb8SAndy Fiddaman /* copy MAC0+1 to MAC0+2 through MACN */ 307*906afcb8SAndy Fiddaman { OP, {MAC0+1,MAC0+2,MACN}, COPY }, 308*906afcb8SAndy Fiddaman 309*906afcb8SAndy Fiddaman /* saw L (possible start of wide string or macro L) */ 310*906afcb8SAndy Fiddaman { HIT0, { C_XXX }, S_MACRO, }, 311*906afcb8SAndy Fiddaman { HIT0, { C_LET, C_DEC }, QID, }, 312*906afcb8SAndy Fiddaman { HIT0, { '"', '\'' }, QUAL(LIT1), }, 313*906afcb8SAndy Fiddaman 314*906afcb8SAndy Fiddaman /* macro hit template */ 315*906afcb8SAndy Fiddaman { HIT0+1, { C_XXX }, S_MACRO, }, 316*906afcb8SAndy Fiddaman { HIT0+1, { C_LET, C_DEC }, QID, }, 317*906afcb8SAndy Fiddaman 318*906afcb8SAndy Fiddaman /* copy HIT0+1 to HIT0+2 through HITN */ 319*906afcb8SAndy Fiddaman { OP, {HIT0+1,HIT0+2,HITN}, COPY }, 320*906afcb8SAndy Fiddaman 321*906afcb8SAndy Fiddaman /* saw L (possible start of wide literal) */ 322*906afcb8SAndy Fiddaman { LIT0, { C_XXX }, S_MACRO, }, 323*906afcb8SAndy Fiddaman { LIT0, { C_LET, C_DEC }, QID, }, 324*906afcb8SAndy Fiddaman { LIT0, { '"', '\'' }, QUAL(LIT1), }, 325*906afcb8SAndy Fiddaman 326*906afcb8SAndy Fiddaman /* (!PROTOMAIN COM1) saw /, perhaps start of comment or /= */ 327*906afcb8SAndy Fiddaman { COM1, { '=' }, KEEP(T_DIVEQ), }, 328*906afcb8SAndy Fiddaman 329*906afcb8SAndy Fiddaman /* normal start state */ 330*906afcb8SAndy Fiddaman { TOKEN, { C_XXX }, S_HUH, }, 331*906afcb8SAndy Fiddaman { TOKEN, { C_EOF }, S_EOF, }, 332*906afcb8SAndy Fiddaman { TOKEN, { C_DEC }, DEC1, }, 333*906afcb8SAndy Fiddaman { TOKEN, { '0' }, OCT1, }, 334*906afcb8SAndy Fiddaman { TOKEN, { '.' }, DOT1, }, 335*906afcb8SAndy Fiddaman { TOKEN, { C_LET }, NID, }, 336*906afcb8SAndy Fiddaman { TOKEN, { 'L' }, LIT, }, 337*906afcb8SAndy Fiddaman { TOKEN, { '"', '\'', '<' }, S_LITBEG, }, 338*906afcb8SAndy Fiddaman { TOKEN, { '/' }, COM1, }, 339*906afcb8SAndy Fiddaman { TOKEN, { '\n' }, S_NL, }, 340*906afcb8SAndy Fiddaman { TOKEN, { ' ', '\t' }, WS1, }, 341*906afcb8SAndy Fiddaman { TOKEN, { '\f', '\v' }, S_VS, }, 342*906afcb8SAndy Fiddaman { TOKEN, { '#' }, SHARP1, }, 343*906afcb8SAndy Fiddaman { TOKEN, { ':' }, COLON1, }, 344*906afcb8SAndy Fiddaman { TOKEN, { '%' }, PCT1, }, 345*906afcb8SAndy Fiddaman { TOKEN, { '&' }, AND1, }, 346*906afcb8SAndy Fiddaman { TOKEN, { '*' }, STAR1, }, 347*906afcb8SAndy Fiddaman { TOKEN, { '+' }, PLUS1, }, 348*906afcb8SAndy Fiddaman { TOKEN, { '-' }, MINUS1, }, 349*906afcb8SAndy Fiddaman { TOKEN, { '=' }, EQ1, }, 350*906afcb8SAndy Fiddaman { TOKEN, { '!' }, NOT1, }, 351*906afcb8SAndy Fiddaman { TOKEN, { '>' }, GT1, }, 352*906afcb8SAndy Fiddaman { TOKEN, { '^' }, CIRC1, }, 353*906afcb8SAndy Fiddaman { TOKEN, { '|' }, OR1, }, 354*906afcb8SAndy Fiddaman { TOKEN, { '(', ')', '[', ']' }, S_CHR, }, 355*906afcb8SAndy Fiddaman { TOKEN, { '{', '}', ',', ';' }, S_CHR, }, 356*906afcb8SAndy Fiddaman { TOKEN, { '~', '?' }, S_CHR, }, 357*906afcb8SAndy Fiddaman 358*906afcb8SAndy Fiddaman /* saw 0, possible oct|hex|dec|dbl constant */ 359*906afcb8SAndy Fiddaman { OCT1, { C_XXX }, BACK(T_DECIMAL), }, 360*906afcb8SAndy Fiddaman { OCT1, { C_LET, C_DEC }, BAD1, }, 361*906afcb8SAndy Fiddaman { OCT1, { C_OCT }, OCT2, }, 362*906afcb8SAndy Fiddaman { OCT1, { 'e', 'E' }, DBL2, }, 363*906afcb8SAndy Fiddaman { OCT1, { 'l', 'L', 'u', 'U' }, QUAL(DEC2), }, 364*906afcb8SAndy Fiddaman { OCT1, { 'x', 'X' }, HEX1, }, 365*906afcb8SAndy Fiddaman { OCT1, { '.' }, DBL1, }, 366*906afcb8SAndy Fiddaman 367*906afcb8SAndy Fiddaman /* saw 0<oct>, oct constant */ 368*906afcb8SAndy Fiddaman { OCT2, { C_XXX }, BACK(T_OCTAL), }, 369*906afcb8SAndy Fiddaman { OCT2, { C_LET, C_DEC }, BAD1, }, 370*906afcb8SAndy Fiddaman { OCT2, { C_OCT }, OCT2, }, 371*906afcb8SAndy Fiddaman { OCT2, { 'e', 'E' }, DBL2, }, 372*906afcb8SAndy Fiddaman { OCT2, { 'l', 'L', 'u', 'U' }, QUAL(OCT3), }, 373*906afcb8SAndy Fiddaman { OCT2, { '.' }, DBL1, }, 374*906afcb8SAndy Fiddaman 375*906afcb8SAndy Fiddaman /* oct constant qualifier */ 376*906afcb8SAndy Fiddaman { OCT3, { C_XXX }, BACK(T_OCTAL), }, 377*906afcb8SAndy Fiddaman { OCT3, { C_LET, C_DEC, '.' }, BAD1, }, 378*906afcb8SAndy Fiddaman { OCT3, { 'l', 'L', 'u', 'U' }, QUAL(OCT3), }, 379*906afcb8SAndy Fiddaman 380*906afcb8SAndy Fiddaman /* saw 0 [xX], hex constant */ 381*906afcb8SAndy Fiddaman { HEX1, { C_XXX }, BACK(T_HEXADECIMAL), }, 382*906afcb8SAndy Fiddaman { HEX1, { C_LET }, BAD1, }, 383*906afcb8SAndy Fiddaman { HEX1, { C_HEX }, HEX1, }, 384*906afcb8SAndy Fiddaman { HEX1, { 'e', 'E' }, HEX3, }, 385*906afcb8SAndy Fiddaman { HEX1, { 'l', 'L', 'u', 'U' }, QUAL(HEX2), }, 386*906afcb8SAndy Fiddaman { HEX1, { '.' }, HEX4, }, 387*906afcb8SAndy Fiddaman { HEX1, { 'p', 'P' }, HEX5, }, 388*906afcb8SAndy Fiddaman 389*906afcb8SAndy Fiddaman /* hex constant qualifier */ 390*906afcb8SAndy Fiddaman { HEX2, { C_XXX }, BACK(T_HEXADECIMAL), }, 391*906afcb8SAndy Fiddaman { HEX2, { C_LET, C_DEC, '.' }, BAD1, }, 392*906afcb8SAndy Fiddaman { HEX2, { 'l', 'L', 'u', 'U' }, QUAL(HEX2), }, 393*906afcb8SAndy Fiddaman 394*906afcb8SAndy Fiddaman /* hex [eE][-+] botch */ 395*906afcb8SAndy Fiddaman { HEX3, { C_XXX }, BACK(T_HEXADECIMAL), }, 396*906afcb8SAndy Fiddaman { HEX3, { C_LET, '.', '-', '+'},BAD1, }, 397*906afcb8SAndy Fiddaman { HEX3, { C_HEX }, HEX1, }, 398*906afcb8SAndy Fiddaman { HEX3, { 'e', 'E' }, HEX3, }, 399*906afcb8SAndy Fiddaman { HEX3, { 'l', 'L', 'u', 'U' }, QUAL(HEX2), }, 400*906afcb8SAndy Fiddaman 401*906afcb8SAndy Fiddaman /* hex dbl fraction */ 402*906afcb8SAndy Fiddaman { HEX4, { C_XXX }, BACK(T_HEXDOUBLE), }, 403*906afcb8SAndy Fiddaman { HEX4, { C_LET, '.' }, BAD1, }, 404*906afcb8SAndy Fiddaman { HEX4, { C_HEX }, HEX4, }, 405*906afcb8SAndy Fiddaman { HEX4, { 'p', 'P' }, HEX5, }, 406*906afcb8SAndy Fiddaman { HEX4, { 'f', 'F', 'l', 'L' }, QUAL(HEX8), }, 407*906afcb8SAndy Fiddaman 408*906afcb8SAndy Fiddaman /* optional hex dbl exponent sign */ 409*906afcb8SAndy Fiddaman { HEX5, { C_XXX }, BACK(T_INVALID), }, 410*906afcb8SAndy Fiddaman { HEX5, { C_LET, '.' }, BAD1, }, 411*906afcb8SAndy Fiddaman { HEX5, { '+', '-' }, HEX6, }, 412*906afcb8SAndy Fiddaman { HEX5, { C_DEC }, HEX7, }, 413*906afcb8SAndy Fiddaman 414*906afcb8SAndy Fiddaman /* mandatory hex dbl exponent first digit */ 415*906afcb8SAndy Fiddaman { HEX6, { C_XXX }, BACK(T_INVALID), }, 416*906afcb8SAndy Fiddaman { HEX6, { C_LET, '.' }, BAD1, }, 417*906afcb8SAndy Fiddaman { HEX6, { C_DEC }, HEX7, }, 418*906afcb8SAndy Fiddaman 419*906afcb8SAndy Fiddaman /* hex dbl exponent digits */ 420*906afcb8SAndy Fiddaman { HEX7, { C_XXX }, BACK(T_HEXDOUBLE), }, 421*906afcb8SAndy Fiddaman { HEX7, { C_LET, '.' }, BAD1, }, 422*906afcb8SAndy Fiddaman { HEX7, { C_DEC }, HEX7, }, 423*906afcb8SAndy Fiddaman { HEX7, { 'f', 'F', 'l', 'L' }, QUAL(HEX8), }, 424*906afcb8SAndy Fiddaman 425*906afcb8SAndy Fiddaman /* hex dbl constant qualifier */ 426*906afcb8SAndy Fiddaman { HEX8, { C_XXX }, BACK(T_HEXDOUBLE), }, 427*906afcb8SAndy Fiddaman { HEX8, { C_LET, '.' }, BAD1, }, 428*906afcb8SAndy Fiddaman { HEX8, { 'f', 'F', 'l', 'L' }, QUAL(HEX8), }, 429*906afcb8SAndy Fiddaman 430*906afcb8SAndy Fiddaman /* saw <dec>, dec constant */ 431*906afcb8SAndy Fiddaman { DEC1, { C_XXX }, BACK(T_DECIMAL), }, 432*906afcb8SAndy Fiddaman { DEC1, { C_LET }, BAD1, }, 433*906afcb8SAndy Fiddaman { DEC1, { C_DEC }, DEC1, }, 434*906afcb8SAndy Fiddaman { DEC1, { 'e', 'E' }, DBL2, }, 435*906afcb8SAndy Fiddaman { DEC1, { 'l', 'L', 'u', 'U' }, QUAL(DEC2), }, 436*906afcb8SAndy Fiddaman { DEC1, { '.' }, DBL1, }, 437*906afcb8SAndy Fiddaman 438*906afcb8SAndy Fiddaman /* dec constant qualifier */ 439*906afcb8SAndy Fiddaman { DEC2, { C_XXX }, BACK(T_DECIMAL), }, 440*906afcb8SAndy Fiddaman { DEC2, { C_LET, C_DEC }, BAD1, }, 441*906afcb8SAndy Fiddaman { DEC2, { 'l', 'L', 'u', 'U' }, QUAL(DEC2), }, 442*906afcb8SAndy Fiddaman 443*906afcb8SAndy Fiddaman /* saw ., operator or dbl constant */ 444*906afcb8SAndy Fiddaman { DOT1, { C_XXX }, S_CHRB, }, 445*906afcb8SAndy Fiddaman { DOT1, { '.' }, DOT2, }, 446*906afcb8SAndy Fiddaman { DOT1, { C_DEC }, DBL1, }, 447*906afcb8SAndy Fiddaman 448*906afcb8SAndy Fiddaman /* dbl fraction */ 449*906afcb8SAndy Fiddaman { DBL1, { C_XXX }, BACK(T_DOUBLE), }, 450*906afcb8SAndy Fiddaman { DBL1, { C_LET, '.' }, BAD1, }, 451*906afcb8SAndy Fiddaman { DBL1, { C_DEC }, DBL1, }, 452*906afcb8SAndy Fiddaman { DBL1, { 'e', 'E' }, DBL2, }, 453*906afcb8SAndy Fiddaman { DBL1, { 'f', 'F', 'l', 'L' }, QUAL(DBL5), }, 454*906afcb8SAndy Fiddaman 455*906afcb8SAndy Fiddaman /* optional dbl exponent sign */ 456*906afcb8SAndy Fiddaman { DBL2, { C_XXX }, BACK(T_INVALID), }, 457*906afcb8SAndy Fiddaman { DBL2, { C_LET, '.' }, BAD1, }, 458*906afcb8SAndy Fiddaman { DBL2, { '+', '-' }, DBL3, }, 459*906afcb8SAndy Fiddaman { DBL2, { C_DEC }, DBL4, }, 460*906afcb8SAndy Fiddaman 461*906afcb8SAndy Fiddaman /* mandatory dbl exponent first digit */ 462*906afcb8SAndy Fiddaman { DBL3, { C_XXX }, BACK(T_INVALID), }, 463*906afcb8SAndy Fiddaman { DBL3, { C_LET, '.' }, BAD1, }, 464*906afcb8SAndy Fiddaman { DBL3, { C_DEC }, DBL4, }, 465*906afcb8SAndy Fiddaman 466*906afcb8SAndy Fiddaman /* dbl exponent digits */ 467*906afcb8SAndy Fiddaman { DBL4, { C_XXX }, BACK(T_DOUBLE), }, 468*906afcb8SAndy Fiddaman { DBL4, { C_LET, '.' }, BAD1, }, 469*906afcb8SAndy Fiddaman { DBL4, { C_DEC }, DBL4, }, 470*906afcb8SAndy Fiddaman { DBL4, { 'f', 'F', 'l', 'L' }, QUAL(DBL5), }, 471*906afcb8SAndy Fiddaman 472*906afcb8SAndy Fiddaman /* dbl constant qualifier */ 473*906afcb8SAndy Fiddaman { DBL5, { C_XXX }, BACK(T_DOUBLE), }, 474*906afcb8SAndy Fiddaman { DBL5, { C_LET, '.' }, BAD1, }, 475*906afcb8SAndy Fiddaman { DBL5, { 'f', 'F', 'l', 'L' }, QUAL(DBL5), }, 476*906afcb8SAndy Fiddaman 477*906afcb8SAndy Fiddaman /* saw < starting include header */ 478*906afcb8SAndy Fiddaman { HDR1, { C_XXX }, HDR1, }, 479*906afcb8SAndy Fiddaman { HDR1, { '>', '\n', C_EOF }, S_LITEND, }, 480*906afcb8SAndy Fiddaman 481*906afcb8SAndy Fiddaman /* saw <binop><space> expecting = */ 482*906afcb8SAndy Fiddaman { BIN1, { C_XXX }, S_HUH, }, 483*906afcb8SAndy Fiddaman { BIN1, { ' ', '\t' }, BIN1, }, 484*906afcb8SAndy Fiddaman 485*906afcb8SAndy Fiddaman /* 2-char ops */ 486*906afcb8SAndy Fiddaman 487*906afcb8SAndy Fiddaman { SHARP1, { C_XXX }, S_SHARP, }, 488*906afcb8SAndy Fiddaman 489*906afcb8SAndy Fiddaman { PCT1, { C_XXX }, S_CHRB, }, 490*906afcb8SAndy Fiddaman { PCT1, { '=' }, KEEP(T_MODEQ), }, 491*906afcb8SAndy Fiddaman 492*906afcb8SAndy Fiddaman { AND1, { C_XXX }, S_CHRB, }, 493*906afcb8SAndy Fiddaman { AND1, { '=' }, KEEP(T_ANDEQ), }, 494*906afcb8SAndy Fiddaman { AND1, { '&' }, KEEP(T_ANDAND), }, 495*906afcb8SAndy Fiddaman 496*906afcb8SAndy Fiddaman { STAR1, { C_XXX }, S_CHRB, }, 497*906afcb8SAndy Fiddaman { STAR1, { '=' }, KEEP(T_MPYEQ), }, 498*906afcb8SAndy Fiddaman { STAR1, { '/' }, S_COMMENT, }, 499*906afcb8SAndy Fiddaman 500*906afcb8SAndy Fiddaman { PLUS1, { C_XXX }, S_CHRB, }, 501*906afcb8SAndy Fiddaman { PLUS1, { '=' }, KEEP(T_ADDEQ), }, 502*906afcb8SAndy Fiddaman { PLUS1, { '+' }, KEEP(T_ADDADD), }, 503*906afcb8SAndy Fiddaman 504*906afcb8SAndy Fiddaman { MINUS1, { C_XXX }, S_CHRB, }, 505*906afcb8SAndy Fiddaman { MINUS1, { '=' }, KEEP(T_SUBEQ), }, 506*906afcb8SAndy Fiddaman { MINUS1, { '-' }, KEEP(T_SUBSUB), }, 507*906afcb8SAndy Fiddaman { MINUS1, { '>' }, KEEP(T_PTRMEM), }, 508*906afcb8SAndy Fiddaman 509*906afcb8SAndy Fiddaman { COLON1, { C_XXX }, S_CHRB, }, 510*906afcb8SAndy Fiddaman { COLON1, { '=', '>' }, S_HUH, }, 511*906afcb8SAndy Fiddaman 512*906afcb8SAndy Fiddaman { LT1, { C_XXX }, S_CHRB, }, 513*906afcb8SAndy Fiddaman { LT1, { '=' }, KEEP(T_LE), }, 514*906afcb8SAndy Fiddaman { LT1, { '<' }, LSH1, }, 515*906afcb8SAndy Fiddaman 516*906afcb8SAndy Fiddaman { EQ1, { C_XXX }, S_CHRB, }, 517*906afcb8SAndy Fiddaman { EQ1, { '=' }, KEEP(T_EQ), }, 518*906afcb8SAndy Fiddaman 519*906afcb8SAndy Fiddaman { NOT1, { C_XXX }, S_CHRB, }, 520*906afcb8SAndy Fiddaman { NOT1, { '=' }, KEEP(T_NE), }, 521*906afcb8SAndy Fiddaman 522*906afcb8SAndy Fiddaman { GT1, { C_XXX }, S_CHRB, }, 523*906afcb8SAndy Fiddaman { GT1, { '=' }, KEEP(T_GE), }, 524*906afcb8SAndy Fiddaman { GT1, { '>' }, RSH1, }, 525*906afcb8SAndy Fiddaman 526*906afcb8SAndy Fiddaman { CIRC1, { C_XXX }, S_CHRB, }, 527*906afcb8SAndy Fiddaman { CIRC1, { '=' }, KEEP(T_XOREQ), }, 528*906afcb8SAndy Fiddaman 529*906afcb8SAndy Fiddaman { OR1, { C_XXX }, S_CHRB, }, 530*906afcb8SAndy Fiddaman { OR1, { '=' }, KEEP(T_OREQ), }, 531*906afcb8SAndy Fiddaman { OR1, { '|' }, KEEP(T_OROR), }, 532*906afcb8SAndy Fiddaman 533*906afcb8SAndy Fiddaman /* 3-char ops */ 534*906afcb8SAndy Fiddaman 535*906afcb8SAndy Fiddaman { ARROW1, { C_XXX }, BACK(T_PTRMEM), }, 536*906afcb8SAndy Fiddaman { ARROW1, { '*' }, KEEP(T_PTRMEMREF), }, 537*906afcb8SAndy Fiddaman 538*906afcb8SAndy Fiddaman { LSH1, { C_XXX }, BACK(T_LSHIFT), }, 539*906afcb8SAndy Fiddaman { LSH1, { '=' }, KEEP(T_LSHIFTEQ), }, 540*906afcb8SAndy Fiddaman 541*906afcb8SAndy Fiddaman { RSH1, { C_XXX }, BACK(T_RSHIFT), }, 542*906afcb8SAndy Fiddaman { RSH1, { '=' }, KEEP(T_RSHIFTEQ), }, 543*906afcb8SAndy Fiddaman 544*906afcb8SAndy Fiddaman #endif 545*906afcb8SAndy Fiddaman 546*906afcb8SAndy Fiddaman /* end */ 547*906afcb8SAndy Fiddaman { OP, { 0 }, END, } 548*906afcb8SAndy Fiddaman }; 549*906afcb8SAndy Fiddaman 550*906afcb8SAndy Fiddaman short fsm[TERMINAL+1][MAX+1]; 551*906afcb8SAndy Fiddaman 552*906afcb8SAndy Fiddaman char trigraph[MAX+1]; 553*906afcb8SAndy Fiddaman 554*906afcb8SAndy Fiddaman #if PROTOMAIN 555*906afcb8SAndy Fiddaman static char spl[] = { '\\', '\r', 0 }; 556*906afcb8SAndy Fiddaman static char aln[] = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz_$@"; 557*906afcb8SAndy Fiddaman #else 558*906afcb8SAndy Fiddaman static char spl[] = { MARK, '?', '\\', '\r', CC_sub, 0 }; 559*906afcb8SAndy Fiddaman static char aln[] = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz_"; 560*906afcb8SAndy Fiddaman #endif 561*906afcb8SAndy Fiddaman static char* let = &aln[10]; 562*906afcb8SAndy Fiddaman static char hex[] = "fedcbaFEDCBA9876543210"; 563*906afcb8SAndy Fiddaman static char* dec = &hex[12]; 564*906afcb8SAndy Fiddaman static char* oct = &hex[14]; 565*906afcb8SAndy Fiddaman 566*906afcb8SAndy Fiddaman /* 567*906afcb8SAndy Fiddaman * runtime FSM modifications 568*906afcb8SAndy Fiddaman * ppfsm(FSM_INIT,0) must be called first 569*906afcb8SAndy Fiddaman */ 570*906afcb8SAndy Fiddaman 571*906afcb8SAndy Fiddaman void 572*906afcb8SAndy Fiddaman ppfsm(int op, register char* s) 573*906afcb8SAndy Fiddaman { 574*906afcb8SAndy Fiddaman register int c; 575*906afcb8SAndy Fiddaman register int n; 576*906afcb8SAndy Fiddaman register int i; 577*906afcb8SAndy Fiddaman register short* rp; 578*906afcb8SAndy Fiddaman register struct fsminit* fp; 579*906afcb8SAndy Fiddaman #if !PROTOMAIN 580*906afcb8SAndy Fiddaman char* t; 581*906afcb8SAndy Fiddaman int x; 582*906afcb8SAndy Fiddaman #endif 583*906afcb8SAndy Fiddaman 584*906afcb8SAndy Fiddaman switch (op) 585*906afcb8SAndy Fiddaman { 586*906afcb8SAndy Fiddaman 587*906afcb8SAndy Fiddaman #if !PROTOMAIN 588*906afcb8SAndy Fiddaman 589*906afcb8SAndy Fiddaman case FSM_IDADD: 590*906afcb8SAndy Fiddaman while (c = *s++) 591*906afcb8SAndy Fiddaman if (!ppisid(c)) 592*906afcb8SAndy Fiddaman { 593*906afcb8SAndy Fiddaman if (fsm[TOKEN][c] == ~S_HUH) 594*906afcb8SAndy Fiddaman { 595*906afcb8SAndy Fiddaman setid(c); 596*906afcb8SAndy Fiddaman for (i = 0; i < TERMINAL; i++) 597*906afcb8SAndy Fiddaman fsm[i][c] = IDSTATE(fsm[i]['_']); 598*906afcb8SAndy Fiddaman } 599*906afcb8SAndy Fiddaman else error(2, "%c: cannot add to identifier set", c); 600*906afcb8SAndy Fiddaman } 601*906afcb8SAndy Fiddaman break; 602*906afcb8SAndy Fiddaman 603*906afcb8SAndy Fiddaman case FSM_IDDEL: 604*906afcb8SAndy Fiddaman while (c = *s++) 605*906afcb8SAndy Fiddaman if (ppisid(c)) 606*906afcb8SAndy Fiddaman { 607*906afcb8SAndy Fiddaman clrid(c); 608*906afcb8SAndy Fiddaman for (i = 0; i < TERMINAL; i++) 609*906afcb8SAndy Fiddaman fsm[i][c] = ~S_HUH; 610*906afcb8SAndy Fiddaman } 611*906afcb8SAndy Fiddaman break; 612*906afcb8SAndy Fiddaman 613*906afcb8SAndy Fiddaman #endif 614*906afcb8SAndy Fiddaman 615*906afcb8SAndy Fiddaman case FSM_INIT: 616*906afcb8SAndy Fiddaman for (fp = fsminit;; fp++) 617*906afcb8SAndy Fiddaman { 618*906afcb8SAndy Fiddaman if ((n = fp->nextstate) >= TERMINAL) n = ~n; 619*906afcb8SAndy Fiddaman if (fp->state == OP) 620*906afcb8SAndy Fiddaman { 621*906afcb8SAndy Fiddaman #if !PROTOMAIN 622*906afcb8SAndy Fiddaman switch (n) 623*906afcb8SAndy Fiddaman { 624*906afcb8SAndy Fiddaman case COPY: 625*906afcb8SAndy Fiddaman c = fp->ch[0]; 626*906afcb8SAndy Fiddaman n = fp->ch[2]; 627*906afcb8SAndy Fiddaman for (i = fp->ch[1]; i <= n; i++) 628*906afcb8SAndy Fiddaman copy(i, c); 629*906afcb8SAndy Fiddaman continue; 630*906afcb8SAndy Fiddaman default: 631*906afcb8SAndy Fiddaman break; 632*906afcb8SAndy Fiddaman } 633*906afcb8SAndy Fiddaman #endif 634*906afcb8SAndy Fiddaman break; 635*906afcb8SAndy Fiddaman } 636*906afcb8SAndy Fiddaman rp = fsm[fp->state]; 637*906afcb8SAndy Fiddaman for (i = 0; i < sizeof(fp->ch) && (c = fp->ch[i]); i++) 638*906afcb8SAndy Fiddaman { 639*906afcb8SAndy Fiddaman switch (c) 640*906afcb8SAndy Fiddaman { 641*906afcb8SAndy Fiddaman case C_XXX: 642*906afcb8SAndy Fiddaman for (c = 0; c <= MAX; c++) 643*906afcb8SAndy Fiddaman rp[c] = n; 644*906afcb8SAndy Fiddaman /*FALLTHROUGH*/ 645*906afcb8SAndy Fiddaman 646*906afcb8SAndy Fiddaman case C_EOF: 647*906afcb8SAndy Fiddaman fsm[TERMINAL][fp->state+1] = n < 0 ? ~n : n; 648*906afcb8SAndy Fiddaman continue; 649*906afcb8SAndy Fiddaman 650*906afcb8SAndy Fiddaman case C_LET: 651*906afcb8SAndy Fiddaman s = let; 652*906afcb8SAndy Fiddaman break; 653*906afcb8SAndy Fiddaman 654*906afcb8SAndy Fiddaman case C_HEX: 655*906afcb8SAndy Fiddaman s = hex; 656*906afcb8SAndy Fiddaman break; 657*906afcb8SAndy Fiddaman 658*906afcb8SAndy Fiddaman case C_DEC: 659*906afcb8SAndy Fiddaman s = dec; 660*906afcb8SAndy Fiddaman break; 661*906afcb8SAndy Fiddaman 662*906afcb8SAndy Fiddaman case C_OCT: 663*906afcb8SAndy Fiddaman s = oct; 664*906afcb8SAndy Fiddaman break; 665*906afcb8SAndy Fiddaman 666*906afcb8SAndy Fiddaman default: 667*906afcb8SAndy Fiddaman rp[c] = n; 668*906afcb8SAndy Fiddaman continue; 669*906afcb8SAndy Fiddaman } 670*906afcb8SAndy Fiddaman while (c = *s++) 671*906afcb8SAndy Fiddaman rp[c] = n; 672*906afcb8SAndy Fiddaman } 673*906afcb8SAndy Fiddaman } 674*906afcb8SAndy Fiddaman 675*906afcb8SAndy Fiddaman /* 676*906afcb8SAndy Fiddaman * install splice special cases 677*906afcb8SAndy Fiddaman * and same non-terminal transitions 678*906afcb8SAndy Fiddaman */ 679*906afcb8SAndy Fiddaman 680*906afcb8SAndy Fiddaman for (i = 0; i < TERMINAL; i++) 681*906afcb8SAndy Fiddaman { 682*906afcb8SAndy Fiddaman rp = fsm[i]; 683*906afcb8SAndy Fiddaman s = spl; 684*906afcb8SAndy Fiddaman while (c = *s++) 685*906afcb8SAndy Fiddaman if (c != MARK || !INCOMMENT(rp)) 686*906afcb8SAndy Fiddaman { 687*906afcb8SAndy Fiddaman if (rp[c] >= 0) rp[c] = ~rp[c]; 688*906afcb8SAndy Fiddaman rp[c] &= ~SPLICE; 689*906afcb8SAndy Fiddaman } 690*906afcb8SAndy Fiddaman rp[EOB] = ~S_EOB; 691*906afcb8SAndy Fiddaman for (c = 0; c <= MAX; c++) 692*906afcb8SAndy Fiddaman if (rp[c] == i) 693*906afcb8SAndy Fiddaman rp[c] = 0; 694*906afcb8SAndy Fiddaman } 695*906afcb8SAndy Fiddaman fsm[TERMINAL][0] = ~S_EOB; 696*906afcb8SAndy Fiddaman 697*906afcb8SAndy Fiddaman #if !PROTOMAIN 698*906afcb8SAndy Fiddaman 699*906afcb8SAndy Fiddaman /* 700*906afcb8SAndy Fiddaman * default character types 701*906afcb8SAndy Fiddaman */ 702*906afcb8SAndy Fiddaman 703*906afcb8SAndy Fiddaman s = let; 704*906afcb8SAndy Fiddaman while (c = *s++) 705*906afcb8SAndy Fiddaman setid(c); 706*906afcb8SAndy Fiddaman s = dec; 707*906afcb8SAndy Fiddaman while (c = *s++) 708*906afcb8SAndy Fiddaman setdig(c); 709*906afcb8SAndy Fiddaman s = spl; 710*906afcb8SAndy Fiddaman do setsplice(c = *s++); while (c); 711*906afcb8SAndy Fiddaman 712*906afcb8SAndy Fiddaman /* 713*906afcb8SAndy Fiddaman * trigraph map 714*906afcb8SAndy Fiddaman */ 715*906afcb8SAndy Fiddaman 716*906afcb8SAndy Fiddaman trigraph['='] = '#'; 717*906afcb8SAndy Fiddaman trigraph['('] = '['; 718*906afcb8SAndy Fiddaman trigraph['/'] = '\\'; 719*906afcb8SAndy Fiddaman trigraph[')'] = ']'; 720*906afcb8SAndy Fiddaman trigraph['\''] = '^'; 721*906afcb8SAndy Fiddaman trigraph['<'] = '{'; 722*906afcb8SAndy Fiddaman trigraph['!'] = '|'; 723*906afcb8SAndy Fiddaman trigraph['>'] = '}'; 724*906afcb8SAndy Fiddaman trigraph['-'] = '~'; 725*906afcb8SAndy Fiddaman #endif 726*906afcb8SAndy Fiddaman break; 727*906afcb8SAndy Fiddaman 728*906afcb8SAndy Fiddaman #if !PROTOMAIN 729*906afcb8SAndy Fiddaman 730*906afcb8SAndy Fiddaman case FSM_PLUSPLUS: 731*906afcb8SAndy Fiddaman if (pp.option & PLUSPLUS) 732*906afcb8SAndy Fiddaman { 733*906afcb8SAndy Fiddaman fsm[COLON1][':'] = ~KEEP(T_SCOPE); 734*906afcb8SAndy Fiddaman fsm[DOT1]['*'] = ~KEEP(T_DOTREF); 735*906afcb8SAndy Fiddaman fsm[MINUS1]['>'] = ARROW1; 736*906afcb8SAndy Fiddaman fsm[COM1]['/'] = COM5; 737*906afcb8SAndy Fiddaman t = "%<:"; 738*906afcb8SAndy Fiddaman for (i = 0; i < TERMINAL; i++) 739*906afcb8SAndy Fiddaman { 740*906afcb8SAndy Fiddaman rp = fsm[i]; 741*906afcb8SAndy Fiddaman if (!INCOMMENT(rp) && !INQUOTE(rp)) 742*906afcb8SAndy Fiddaman { 743*906afcb8SAndy Fiddaman s = t; 744*906afcb8SAndy Fiddaman while (c = *s++) 745*906afcb8SAndy Fiddaman { 746*906afcb8SAndy Fiddaman if (rp[c] > 0) rp[c] = ~rp[c]; 747*906afcb8SAndy Fiddaman else if (!rp[c]) rp[c] = ~i; 748*906afcb8SAndy Fiddaman rp[c] &= ~SPLICE; 749*906afcb8SAndy Fiddaman } 750*906afcb8SAndy Fiddaman } 751*906afcb8SAndy Fiddaman } 752*906afcb8SAndy Fiddaman s = t; 753*906afcb8SAndy Fiddaman while (c = *s++) setsplice(c); 754*906afcb8SAndy Fiddaman } 755*906afcb8SAndy Fiddaman else 756*906afcb8SAndy Fiddaman { 757*906afcb8SAndy Fiddaman fsm[COLON1][':'] = ~S_CHRB; 758*906afcb8SAndy Fiddaman fsm[DOT1]['*'] = ~S_CHRB; 759*906afcb8SAndy Fiddaman fsm[MINUS1]['>'] = ~KEEP(T_PTRMEM); 760*906afcb8SAndy Fiddaman fsm[COM1]['/'] = (pp.option & PLUSCOMMENT) ? COM5 : ~S_CHRB; 761*906afcb8SAndy Fiddaman } 762*906afcb8SAndy Fiddaman break; 763*906afcb8SAndy Fiddaman 764*906afcb8SAndy Fiddaman #if COMPATIBLE 765*906afcb8SAndy Fiddaman 766*906afcb8SAndy Fiddaman case FSM_COMPATIBILITY: 767*906afcb8SAndy Fiddaman if (pp.state & COMPATIBILITY) 768*906afcb8SAndy Fiddaman { 769*906afcb8SAndy Fiddaman fsm[HEX1]['e'] = HEX1; 770*906afcb8SAndy Fiddaman fsm[HEX1]['E'] = HEX1; 771*906afcb8SAndy Fiddaman fsm[QNUM]['e'] = QNUM; 772*906afcb8SAndy Fiddaman fsm[QNUM]['E'] = QNUM; 773*906afcb8SAndy Fiddaman fsm[QNUM]['u'] = ~QUAL(QNUM); 774*906afcb8SAndy Fiddaman fsm[QNUM]['U'] = ~QUAL(QNUM); 775*906afcb8SAndy Fiddaman } 776*906afcb8SAndy Fiddaman else 777*906afcb8SAndy Fiddaman { 778*906afcb8SAndy Fiddaman fsm[HEX1]['e'] = HEX3; 779*906afcb8SAndy Fiddaman fsm[HEX1]['E'] = HEX3; 780*906afcb8SAndy Fiddaman fsm[QNUM]['e'] = QEXP; 781*906afcb8SAndy Fiddaman fsm[QNUM]['E'] = QEXP; 782*906afcb8SAndy Fiddaman fsm[QNUM]['u'] = QNUM; 783*906afcb8SAndy Fiddaman fsm[QNUM]['U'] = QNUM; 784*906afcb8SAndy Fiddaman } 785*906afcb8SAndy Fiddaman break; 786*906afcb8SAndy Fiddaman 787*906afcb8SAndy Fiddaman #endif 788*906afcb8SAndy Fiddaman 789*906afcb8SAndy Fiddaman case FSM_QUOTADD: 790*906afcb8SAndy Fiddaman while (c = *s++) 791*906afcb8SAndy Fiddaman if (fsm[TOKEN][c] == ~S_HUH) 792*906afcb8SAndy Fiddaman for (i = 0; i < TERMINAL; i++) 793*906afcb8SAndy Fiddaman fsm[i][c] = fsm[i]['"']; 794*906afcb8SAndy Fiddaman else error(2, "%c: cannot add to quote set", c); 795*906afcb8SAndy Fiddaman break; 796*906afcb8SAndy Fiddaman 797*906afcb8SAndy Fiddaman case FSM_QUOTDEL: 798*906afcb8SAndy Fiddaman while (c = *s++) 799*906afcb8SAndy Fiddaman if (c != '"' && fsm[TOKEN][c] == fsm[TOKEN]['"']) 800*906afcb8SAndy Fiddaman for (i = 0; i < TERMINAL; i++) 801*906afcb8SAndy Fiddaman fsm[i][c] = fsm[i]['_']; 802*906afcb8SAndy Fiddaman break; 803*906afcb8SAndy Fiddaman 804*906afcb8SAndy Fiddaman case FSM_OPSPACE: 805*906afcb8SAndy Fiddaman n = s ? BIN1 : ~S_CHRB; 806*906afcb8SAndy Fiddaman fsm[COM1][' '] = fsm[COM1]['\t'] = n; 807*906afcb8SAndy Fiddaman fsm[AND1][' '] = fsm[AND1]['\t'] = n; 808*906afcb8SAndy Fiddaman fsm[STAR1][' '] = fsm[STAR1]['\t'] = n; 809*906afcb8SAndy Fiddaman fsm[PCT1][' '] = fsm[PCT1]['\t'] = n; 810*906afcb8SAndy Fiddaman fsm[PLUS1][' '] = fsm[PLUS1]['\t'] = n; 811*906afcb8SAndy Fiddaman fsm[MINUS1][' '] = fsm[MINUS1]['\t'] = n; 812*906afcb8SAndy Fiddaman fsm[CIRC1][' '] = fsm[CIRC1]['\t'] = n; 813*906afcb8SAndy Fiddaman fsm[OR1][' '] = fsm[OR1]['\t'] = n; 814*906afcb8SAndy Fiddaman fsm[LSH1][' '] = fsm[LSH1]['\t'] = s ? BIN1 : ~BACK(T_LSHIFT); 815*906afcb8SAndy Fiddaman fsm[RSH1][' '] = fsm[RSH1]['\t'] = s ? BIN1 : ~BACK(T_RSHIFT); 816*906afcb8SAndy Fiddaman break; 817*906afcb8SAndy Fiddaman 818*906afcb8SAndy Fiddaman case FSM_MACRO: 819*906afcb8SAndy Fiddaman if (pp.truncate && strlen(s) >= pp.truncate) 820*906afcb8SAndy Fiddaman { 821*906afcb8SAndy Fiddaman x = s[pp.truncate]; 822*906afcb8SAndy Fiddaman s[pp.truncate] = 0; 823*906afcb8SAndy Fiddaman } 824*906afcb8SAndy Fiddaman else x = -1; 825*906afcb8SAndy Fiddaman i = MAC0 + ((c = *s++) != 'L'); 826*906afcb8SAndy Fiddaman if ((n = fsm[QUICK][c]) != (i + NMAC)) 827*906afcb8SAndy Fiddaman { 828*906afcb8SAndy Fiddaman n = i; 829*906afcb8SAndy Fiddaman if (!*s) n += NMAC; 830*906afcb8SAndy Fiddaman } 831*906afcb8SAndy Fiddaman if (fsm[QUICK][c] != n) 832*906afcb8SAndy Fiddaman fsm[QUICK][c] = fsm[QCOM][c] = fsm[QTOK][c] = n; 833*906afcb8SAndy Fiddaman if (c = *s++) 834*906afcb8SAndy Fiddaman { 835*906afcb8SAndy Fiddaman for (;;) 836*906afcb8SAndy Fiddaman { 837*906afcb8SAndy Fiddaman if ((i = n) < HIT0) 838*906afcb8SAndy Fiddaman { 839*906afcb8SAndy Fiddaman if (n < MACN) n++; 840*906afcb8SAndy Fiddaman if (!*s) 841*906afcb8SAndy Fiddaman { 842*906afcb8SAndy Fiddaman n += NMAC; 843*906afcb8SAndy Fiddaman break; 844*906afcb8SAndy Fiddaman } 845*906afcb8SAndy Fiddaman if (fsm[i][c] < HIT0) 846*906afcb8SAndy Fiddaman fsm[i][c] = n; 847*906afcb8SAndy Fiddaman if (fsm[i + NMAC][c] < HIT0) 848*906afcb8SAndy Fiddaman fsm[i + NMAC][c] = n; 849*906afcb8SAndy Fiddaman } 850*906afcb8SAndy Fiddaman else 851*906afcb8SAndy Fiddaman { 852*906afcb8SAndy Fiddaman if (n < HITN) n++; 853*906afcb8SAndy Fiddaman if (!*s) break; 854*906afcb8SAndy Fiddaman if (fsm[i][c] < HIT0) 855*906afcb8SAndy Fiddaman { 856*906afcb8SAndy Fiddaman n -= NMAC; 857*906afcb8SAndy Fiddaman fsm[i][c] = n; 858*906afcb8SAndy Fiddaman } 859*906afcb8SAndy Fiddaman } 860*906afcb8SAndy Fiddaman c = *s++; 861*906afcb8SAndy Fiddaman } 862*906afcb8SAndy Fiddaman if (x >= 0) 863*906afcb8SAndy Fiddaman { 864*906afcb8SAndy Fiddaman *s = x; 865*906afcb8SAndy Fiddaman for (n = CHAR_MIN; n <= CHAR_MAX; n++) 866*906afcb8SAndy Fiddaman if (ppisidig(n)) 867*906afcb8SAndy Fiddaman fsm[HITN][n] = HITN; 868*906afcb8SAndy Fiddaman n = HITN; 869*906afcb8SAndy Fiddaman } 870*906afcb8SAndy Fiddaman if (fsm[i][c] < n) 871*906afcb8SAndy Fiddaman fsm[i][c] = n; 872*906afcb8SAndy Fiddaman if (i < HIT0 && fsm[i + NMAC][c] < n) 873*906afcb8SAndy Fiddaman fsm[i + NMAC][c] = n; 874*906afcb8SAndy Fiddaman } 875*906afcb8SAndy Fiddaman break; 876*906afcb8SAndy Fiddaman 877*906afcb8SAndy Fiddaman #endif 878*906afcb8SAndy Fiddaman 879*906afcb8SAndy Fiddaman } 880*906afcb8SAndy Fiddaman } 881*906afcb8SAndy Fiddaman 882*906afcb8SAndy Fiddaman #if !PROTOMAIN 883*906afcb8SAndy Fiddaman 884*906afcb8SAndy Fiddaman /* 885*906afcb8SAndy Fiddaman * file buffer refill 886*906afcb8SAndy Fiddaman * c is current input char 887*906afcb8SAndy Fiddaman */ 888*906afcb8SAndy Fiddaman 889*906afcb8SAndy Fiddaman void 890*906afcb8SAndy Fiddaman refill(register int c) 891*906afcb8SAndy Fiddaman { 892*906afcb8SAndy Fiddaman if (pp.in->flags & IN_eof) 893*906afcb8SAndy Fiddaman { 894*906afcb8SAndy Fiddaman pp.in->nextchr--; 895*906afcb8SAndy Fiddaman c = 0; 896*906afcb8SAndy Fiddaman } 897*906afcb8SAndy Fiddaman else 898*906afcb8SAndy Fiddaman { 899*906afcb8SAndy Fiddaman *((pp.in->nextchr = pp.in->buffer + PPBAKSIZ) - 1) = c; 900*906afcb8SAndy Fiddaman c = 901*906afcb8SAndy Fiddaman #if PROTOTYPE 902*906afcb8SAndy Fiddaman (pp.in->flags & IN_prototype) ? pppread(pp.in->nextchr) : 903*906afcb8SAndy Fiddaman #endif 904*906afcb8SAndy Fiddaman read(pp.in->fd, pp.in->nextchr, PPBUFSIZ); 905*906afcb8SAndy Fiddaman } 906*906afcb8SAndy Fiddaman if (c > 0) 907*906afcb8SAndy Fiddaman { 908*906afcb8SAndy Fiddaman if (pp.in->nextchr[c - 1] == '\n') pp.in->flags |= IN_newline; 909*906afcb8SAndy Fiddaman else pp.in->flags &= ~IN_newline; 910*906afcb8SAndy Fiddaman #if PROTOTYPE 911*906afcb8SAndy Fiddaman if (!(pp.in->flags & IN_prototype)) 912*906afcb8SAndy Fiddaman #endif 913*906afcb8SAndy Fiddaman if (c < PPBUFSIZ && (pp.in->flags & IN_regular)) 914*906afcb8SAndy Fiddaman { 915*906afcb8SAndy Fiddaman pp.in->flags |= IN_eof; 916*906afcb8SAndy Fiddaman close(pp.in->fd); 917*906afcb8SAndy Fiddaman pp.in->fd = -1; 918*906afcb8SAndy Fiddaman } 919*906afcb8SAndy Fiddaman } 920*906afcb8SAndy Fiddaman else 921*906afcb8SAndy Fiddaman { 922*906afcb8SAndy Fiddaman if (c < 0) 923*906afcb8SAndy Fiddaman { 924*906afcb8SAndy Fiddaman error(ERROR_SYSTEM|3, "read error"); 925*906afcb8SAndy Fiddaman c = 0; 926*906afcb8SAndy Fiddaman } 927*906afcb8SAndy Fiddaman else if ((pp.in->flags ^ pp.in->prev->flags) & IN_c) 928*906afcb8SAndy Fiddaman { 929*906afcb8SAndy Fiddaman static char ket[] = { 0, '}', '\n', 0 }; 930*906afcb8SAndy Fiddaman 931*906afcb8SAndy Fiddaman pp.in->flags ^= IN_c; 932*906afcb8SAndy Fiddaman pp.in->nextchr = ket + 1; 933*906afcb8SAndy Fiddaman c = 2; 934*906afcb8SAndy Fiddaman } 935*906afcb8SAndy Fiddaman pp.in->flags |= IN_eof; 936*906afcb8SAndy Fiddaman } 937*906afcb8SAndy Fiddaman #if CHECKPOINT 938*906afcb8SAndy Fiddaman pp.in->buflen = c; 939*906afcb8SAndy Fiddaman #endif 940*906afcb8SAndy Fiddaman pp.in->nextchr[c] = 0; 941*906afcb8SAndy Fiddaman debug((-7, "refill(\"%s\") = %d = \"%-.*s%s\"", error_info.file, c, (c > 32 ? 32 : c), pp.in->nextchr, c > 32 ? "..." : "")); 942*906afcb8SAndy Fiddaman if (pp.test & 0x0080) 943*906afcb8SAndy Fiddaman sfprintf(sfstderr, "===== refill(\"%s\") = %d =====\n%s\n===== eob(\"%s\") =====\n", error_info.file, c, pp.in->nextchr, error_info.file); 944*906afcb8SAndy Fiddaman } 945*906afcb8SAndy Fiddaman 946*906afcb8SAndy Fiddaman #endif 947