1da2e3ebdSchin /*********************************************************************** 2da2e3ebdSchin * * 3da2e3ebdSchin * This software is part of the ast package * 4*34f9b3eeSRoland Mainz * Copyright (c) 1986-2009 AT&T Intellectual Property * 5da2e3ebdSchin * and is licensed under the * 6da2e3ebdSchin * Common Public License, Version 1.0 * 77c2fbfb3SApril Chin * by AT&T Intellectual Property * 8da2e3ebdSchin * * 9da2e3ebdSchin * A copy of the License is available at * 10da2e3ebdSchin * http://www.opensource.org/licenses/cpl1.0.txt * 11da2e3ebdSchin * (with md5 checksum 059e8cd6165cb4c31e351f2b69388fd9) * 12da2e3ebdSchin * * 13da2e3ebdSchin * Information and Software Systems Research * 14da2e3ebdSchin * AT&T Research * 15da2e3ebdSchin * Florham Park NJ * 16da2e3ebdSchin * * 17da2e3ebdSchin * Glenn Fowler <gsf@research.att.com> * 18da2e3ebdSchin * * 19da2e3ebdSchin ***********************************************************************/ 20da2e3ebdSchin #pragma prototyped 21da2e3ebdSchin /* 22da2e3ebdSchin * Glenn Fowler 23da2e3ebdSchin * AT&T Research 24da2e3ebdSchin * 25da2e3ebdSchin * preprocessor lexical analyzer definitions 26da2e3ebdSchin */ 27da2e3ebdSchin 28da2e3ebdSchin #ifndef _PPFSM_H 29da2e3ebdSchin #define _PPFSM_H 30da2e3ebdSchin 31da2e3ebdSchin #define BITSTATE 16 /* bitsof(state) */ 32da2e3ebdSchin #define BITNONTERM 7 /* bitsof(non-terminal-state) */ 33da2e3ebdSchin #define BITTERM 7 /* bitsof(terminal-state) */ 34da2e3ebdSchin #define NMAC 19 /* number of MAC states */ 35da2e3ebdSchin 36da2e3ebdSchin #define SPLICE (1<<BITTERM) 37da2e3ebdSchin 38da2e3ebdSchin #define CODE(tok,act) ((((tok)-N_PP)<<(BITTERM+1))|(act)) 39da2e3ebdSchin #define TERM(st) ((st)&((1<<(BITTERM+1))-1)) 40da2e3ebdSchin #define NEXT(st) (((st)>>(BITTERM+1))&((1<<BITNONTERM)-1)) 41da2e3ebdSchin #define QUAL(st) (((st)<<(BITTERM+1))|(S_QUAL)) 42da2e3ebdSchin #define TYPE(st) (NEXT(st)+N_PP) 43da2e3ebdSchin 44da2e3ebdSchin #define BACK(tok) CODE(tok,S_TOKB) 45da2e3ebdSchin #define KEEP(tok) CODE(tok,S_TOK) 46da2e3ebdSchin 47da2e3ebdSchin #undef MAX 48da2e3ebdSchin #define MAX 255 49da2e3ebdSchin 50da2e3ebdSchin #undef EOB 51da2e3ebdSchin #define EOB 0 52da2e3ebdSchin #undef EOF 53da2e3ebdSchin #define EOF (MAX+1) 54da2e3ebdSchin 55da2e3ebdSchin /* 56da2e3ebdSchin * FSM states 57da2e3ebdSchin * 58da2e3ebdSchin * NOTE: preserve the ranges 59da2e3ebdSchin */ 60da2e3ebdSchin 61da2e3ebdSchin #define INDEX(p) (((p)-fsm[0])/(MAX+1)) 62da2e3ebdSchin 63da2e3ebdSchin #define IDSTATE(x) (((x)>=0&&INQMACRO(fsm[x]))?QID:(x)) 64da2e3ebdSchin 65da2e3ebdSchin #define INCOMMENT(p) ((p)>=fsm[COM2]&&(p)<=fsm[COM7]) 66da2e3ebdSchin #define INCOMMENTXX(p) ((p)>=fsm[COM5]&&(p)<=fsm[COM7]) 67da2e3ebdSchin #define INQMACRO(p) ((p)>=fsm[MAC0]&&(p)<=fsm[LIT0]) 68da2e3ebdSchin #define INTMACRO(p) ((p)>=fsm[NID]&&(p)<=fsm[LIT]) 69da2e3ebdSchin #define INQUOTE(p) ((p)>=fsm[LIT1]&&(p)<=fsm[LIT2]) 70da2e3ebdSchin #define INOPSPACE(p) ((p)==fsm[BIN1]) 71da2e3ebdSchin #define INSPACE(p) ((p)==fsm[WS1]) 72da2e3ebdSchin 73da2e3ebdSchin /* 74da2e3ebdSchin * proto non-terminal states 75da2e3ebdSchin */ 76da2e3ebdSchin 77da2e3ebdSchin #define PROTO 0 78da2e3ebdSchin #define RES1 (PROTO+1) 79da2e3ebdSchin #define RES1a (PROTO+2) 80da2e3ebdSchin #define RES1e (PROTO+3) 81da2e3ebdSchin #define RES1f (PROTO+4) 82da2e3ebdSchin #define RES1h (PROTO+5) 83da2e3ebdSchin #define RES1l (PROTO+6) 84da2e3ebdSchin #define RES1n (PROTO+7) 85da2e3ebdSchin #define RES1o (PROTO+8) 86da2e3ebdSchin #define RES1t (PROTO+9) 87da2e3ebdSchin #define RES1x (PROTO+10) 88da2e3ebdSchin #define RES1y (PROTO+11) 89da2e3ebdSchin #define COM1 (PROTO+12) 90da2e3ebdSchin #define COM2 (PROTO+13) 91da2e3ebdSchin #define COM3 (PROTO+14) 92da2e3ebdSchin #define COM4 (PROTO+15) 93da2e3ebdSchin #define COM5 (PROTO+16) 94da2e3ebdSchin #define COM6 (PROTO+17) 95da2e3ebdSchin #define COM7 (PROTO+18) 96da2e3ebdSchin #define NID (PROTO+19) 97da2e3ebdSchin #define LIT (PROTO+20) 98da2e3ebdSchin #define LIT1 (PROTO+21) 99da2e3ebdSchin #define LIT2 (PROTO+22) 100da2e3ebdSchin #define BAD1 (PROTO+23) 101da2e3ebdSchin #define BAD2 (PROTO+24) 102da2e3ebdSchin #define DOT (PROTO+25) 103da2e3ebdSchin #define DOT2 (PROTO+26) 104da2e3ebdSchin #define WS1 (PROTO+27) 105da2e3ebdSchin 106da2e3ebdSchin #if PROTOMAIN 107da2e3ebdSchin 108da2e3ebdSchin #define TERMINAL (PROTO+28) /* PROTOMAIN */ 109da2e3ebdSchin 110da2e3ebdSchin #else 111da2e3ebdSchin 112da2e3ebdSchin /* 113da2e3ebdSchin * quick non-terminal states 114da2e3ebdSchin */ 115da2e3ebdSchin 116da2e3ebdSchin #define QUICK (PROTO+28) 117da2e3ebdSchin #define QTOK (QUICK+1) 118da2e3ebdSchin #define QNUM (QUICK+2) 119da2e3ebdSchin #define QEXP (QUICK+3) 120da2e3ebdSchin #define QCOM (QUICK+4) 121da2e3ebdSchin #define QID (QUICK+5) 122da2e3ebdSchin #define MAC0 (QUICK+6) 123da2e3ebdSchin #define MACN (MAC0+NMAC-1) 124da2e3ebdSchin #define HIT0 (MACN+1) 125da2e3ebdSchin #define HITN (HIT0+NMAC-1) 126da2e3ebdSchin #define LIT0 (HITN+1) 127da2e3ebdSchin #define SHARP1 (HITN+2) 128da2e3ebdSchin 129da2e3ebdSchin /* 130da2e3ebdSchin * tokenize non-terminal states 131da2e3ebdSchin */ 132da2e3ebdSchin 133da2e3ebdSchin #define TOKEN (HITN+3) 134da2e3ebdSchin #define OCT1 (TOKEN+1) 135da2e3ebdSchin #define OCT2 (TOKEN+2) 136da2e3ebdSchin #define OCT3 (TOKEN+3) 137da2e3ebdSchin #define NOT1 (TOKEN+4) 138da2e3ebdSchin #define PCT1 (TOKEN+5) 139da2e3ebdSchin #define AND1 (TOKEN+6) 140da2e3ebdSchin #define STAR1 (TOKEN+7) 141da2e3ebdSchin #define PLUS1 (TOKEN+8) 142da2e3ebdSchin #define MINUS1 (TOKEN+9) 143da2e3ebdSchin #define ARROW1 (TOKEN+10) 144da2e3ebdSchin #define COLON1 (TOKEN+11) 145da2e3ebdSchin #define LT1 (TOKEN+12) 146da2e3ebdSchin #define LSH1 (TOKEN+13) 147da2e3ebdSchin #define EQ1 (TOKEN+14) 148da2e3ebdSchin #define RSH1 (TOKEN+15) 149da2e3ebdSchin #define GT1 (TOKEN+16) 150da2e3ebdSchin #define CIRC1 (TOKEN+17) 151da2e3ebdSchin #define OR1 (TOKEN+18) 152da2e3ebdSchin #define DEC1 (TOKEN+19) 153da2e3ebdSchin #define DEC2 (TOKEN+20) 154da2e3ebdSchin #define HEX1 (TOKEN+21) 155da2e3ebdSchin #define HEX2 (TOKEN+22) 156da2e3ebdSchin #define HEX3 (TOKEN+23) 157da2e3ebdSchin #define HEX4 (TOKEN+24) 158da2e3ebdSchin #define HEX5 (TOKEN+25) 159da2e3ebdSchin #define HEX6 (TOKEN+26) 160da2e3ebdSchin #define HEX7 (TOKEN+27) 161da2e3ebdSchin #define HEX8 (TOKEN+28) 162da2e3ebdSchin #define DBL1 (TOKEN+29) 163da2e3ebdSchin #define DBL2 (TOKEN+30) 164da2e3ebdSchin #define DBL3 (TOKEN+31) 165da2e3ebdSchin #define DBL4 (TOKEN+32) 166da2e3ebdSchin #define DBL5 (TOKEN+33) 167da2e3ebdSchin #define DOT1 (TOKEN+34) 168da2e3ebdSchin #define HDR1 (TOKEN+35) 169da2e3ebdSchin #define BIN1 (TOKEN+36) 170da2e3ebdSchin 171da2e3ebdSchin #define TERMINAL (TOKEN+37) 172da2e3ebdSchin 173da2e3ebdSchin #endif 174da2e3ebdSchin 175da2e3ebdSchin /* 176da2e3ebdSchin * quick terminal states grouped together 177da2e3ebdSchin */ 178da2e3ebdSchin 179da2e3ebdSchin #define S_CHRB (TERMINAL+0) 180da2e3ebdSchin #define S_COMMENT (TERMINAL+1) 181da2e3ebdSchin #define S_EOB (TERMINAL+2) 182da2e3ebdSchin #define S_LITBEG (TERMINAL+3) 183da2e3ebdSchin #define S_LITEND (TERMINAL+4) 184da2e3ebdSchin #define S_LITESC (TERMINAL+5) 185da2e3ebdSchin #define S_MACRO (TERMINAL+6) 186da2e3ebdSchin #define S_NL (TERMINAL+7) 187da2e3ebdSchin #define S_QUAL (TERMINAL+8) 188da2e3ebdSchin #define S_SHARP (TERMINAL+9) 189da2e3ebdSchin #define S_VS (TERMINAL+10) 190da2e3ebdSchin 191da2e3ebdSchin /* 192da2e3ebdSchin * and the remaining terminal states 193da2e3ebdSchin */ 194da2e3ebdSchin 195da2e3ebdSchin #define S_CHR (TERMINAL+11) 196da2e3ebdSchin #define S_HUH (TERMINAL+12) 197da2e3ebdSchin #define S_TOK (TERMINAL+13) 198da2e3ebdSchin #define S_TOKB (TERMINAL+14) 199da2e3ebdSchin #define S_WS (TERMINAL+15) 200da2e3ebdSchin 201da2e3ebdSchin #define S_RESERVED (S_HUH) 202da2e3ebdSchin 203da2e3ebdSchin /* 204da2e3ebdSchin * the last terminal state (for tracing) 205da2e3ebdSchin */ 206da2e3ebdSchin 207da2e3ebdSchin #define LAST (S_WS) 208da2e3ebdSchin 209da2e3ebdSchin /* 210da2e3ebdSchin * pseudo terminal states 211da2e3ebdSchin */ 212da2e3ebdSchin 213da2e3ebdSchin #define S_EOF (0) 214da2e3ebdSchin 215da2e3ebdSchin /* 216da2e3ebdSchin * common lex macros 217da2e3ebdSchin * 218da2e3ebdSchin * NOTE: common local variable names assumed 219da2e3ebdSchin */ 220da2e3ebdSchin 221da2e3ebdSchin #define GET(p,c,tp,xp) \ 222da2e3ebdSchin do \ 223da2e3ebdSchin { \ 224da2e3ebdSchin if ((c = GETCHR()) == EOB && pp.in->type == IN_FILE) \ 225da2e3ebdSchin FGET(p, c, tp, xp); \ 226da2e3ebdSchin } while (0) 227da2e3ebdSchin 228da2e3ebdSchin #define FGET(p,c,tp,xp) \ 229da2e3ebdSchin do \ 230da2e3ebdSchin { \ 231da2e3ebdSchin if (op > xp + PPTOKSIZ) \ 232da2e3ebdSchin { \ 233da2e3ebdSchin if (!INCOMMENT(rp) && !(pp.state & (NOTEXT|SKIPCONTROL))) \ 234da2e3ebdSchin error(2, "long token truncated"); \ 235da2e3ebdSchin op = xp + PPTOKSIZ; \ 236da2e3ebdSchin } \ 237da2e3ebdSchin if ((pp.in->flags & IN_flush) && pp.level == 1 && !INMACRO(rp) && (!pp.comment || !INCOMMENT(rp)) && (c = op - pp.outbuf) > 0 && *(op - 1) == '\n') \ 238da2e3ebdSchin { \ 239da2e3ebdSchin PPWRITE(c); \ 240da2e3ebdSchin op = tp = pp.outp = pp.outbuf; \ 241da2e3ebdSchin } \ 242da2e3ebdSchin SYNCIN(); \ 243da2e3ebdSchin refill(p); \ 244da2e3ebdSchin CACHEIN(); \ 245da2e3ebdSchin if ((c = GETCHR()) == EOB) BACKIN(); \ 246da2e3ebdSchin } while (0) 247da2e3ebdSchin 248da2e3ebdSchin #define POP() \ 249da2e3ebdSchin do \ 250da2e3ebdSchin { \ 251da2e3ebdSchin debug((-7, "POP in=%s next=%s state=%s", ppinstr(cur), pptokchr(*prv->nextchr), pplexstr(INDEX(rp)))); \ 252da2e3ebdSchin ip = (pp.in = prv)->nextchr; \ 253da2e3ebdSchin } while (0) 254da2e3ebdSchin 255da2e3ebdSchin /* 256da2e3ebdSchin * fsm implementaion globals 257da2e3ebdSchin */ 258da2e3ebdSchin 259da2e3ebdSchin #define fsm _pp_fsmtab 260da2e3ebdSchin #define refill _pp_refill 261da2e3ebdSchin #define trigraph _pp_trigraph 262da2e3ebdSchin 263da2e3ebdSchin /* 264da2e3ebdSchin * first index is state, second is char, value is next state 265da2e3ebdSchin * except for fsm[TERMINAL] where second is state+1 for EOF transition 266da2e3ebdSchin */ 267da2e3ebdSchin 268da2e3ebdSchin extern short fsm[TERMINAL+1][MAX+1]; 269da2e3ebdSchin 270da2e3ebdSchin /* 271da2e3ebdSchin * the index is char, value is trigraph value for <?><?><char>, 0 if invalid 272da2e3ebdSchin */ 273da2e3ebdSchin 274da2e3ebdSchin extern char trigraph[MAX+1]; 275da2e3ebdSchin 276da2e3ebdSchin extern void refill(int); 277da2e3ebdSchin 278da2e3ebdSchin #endif 279