1 /* 2 ** This file contains all sources (including headers) to the LEMON 3 ** LALR(1) parser generator. The sources have been combined into a 4 ** single file to make it easy to include LEMON in the source tree 5 ** and Makefile of another program. 6 ** 7 ** The author of this program disclaims copyright. 8 */ 9 #include <stdio.h> 10 #include <stdarg.h> 11 #include <string.h> 12 #include <ctype.h> 13 #include <stdlib.h> 14 15 #ifndef __WIN32__ 16 # if defined(_WIN32) || defined(WIN32) 17 # define __WIN32__ 18 # endif 19 #endif 20 21 /* #define PRIVATE static */ 22 #define PRIVATE 23 24 #ifdef TEST 25 #define MAXRHS 5 /* Set low to exercise exception code */ 26 #else 27 #define MAXRHS 1000 28 #endif 29 30 char *msort(); 31 extern void *malloc(); 32 33 /******** From the file "action.h" *************************************/ 34 struct action *Action_new(); 35 struct action *Action_sort(); 36 37 /********* From the file "assert.h" ************************************/ 38 void myassert(); 39 #ifndef NDEBUG 40 # define assert(X) if(!(X))myassert(__FILE__,__LINE__) 41 #else 42 # define assert(X) 43 #endif 44 45 /********** From the file "build.h" ************************************/ 46 void FindRulePrecedences(); 47 void FindFirstSets(); 48 void FindStates(); 49 void FindLinks(); 50 void FindFollowSets(); 51 void FindActions(); 52 53 /********* From the file "configlist.h" *********************************/ 54 void Configlist_init(/* void */); 55 struct config *Configlist_add(/* struct rule *, int */); 56 struct config *Configlist_addbasis(/* struct rule *, int */); 57 void Configlist_closure(/* void */); 58 void Configlist_sort(/* void */); 59 void Configlist_sortbasis(/* void */); 60 struct config *Configlist_return(/* void */); 61 struct config *Configlist_basis(/* void */); 62 void Configlist_eat(/* struct config * */); 63 void Configlist_reset(/* void */); 64 65 /********* From the file "error.h" ***************************************/ 66 void ErrorMsg(const char *, int,const char *, ...); 67 68 /****** From the file "option.h" ******************************************/ 69 struct s_options { 70 enum { OPT_FLAG=1, OPT_INT, OPT_DBL, OPT_STR, 71 OPT_FFLAG, OPT_FINT, OPT_FDBL, OPT_FSTR} type; 72 char *label; 73 char *arg; 74 char *message; 75 }; 76 int OptInit(/* char**,struct s_options*,FILE* */); 77 int OptNArgs(/* void */); 78 char *OptArg(/* int */); 79 void OptErr(/* int */); 80 void OptPrint(/* void */); 81 82 /******** From the file "parse.h" *****************************************/ 83 void Parse(/* struct lemon *lemp */); 84 85 /********* From the file "plink.h" ***************************************/ 86 struct plink *Plink_new(/* void */); 87 void Plink_add(/* struct plink **, struct config * */); 88 void Plink_copy(/* struct plink **, struct plink * */); 89 void Plink_delete(/* struct plink * */); 90 91 /********** From the file "report.h" *************************************/ 92 void Reprint(/* struct lemon * */); 93 void ReportOutput(/* struct lemon * */); 94 void ReportTable(/* struct lemon * */); 95 void ReportHeader(/* struct lemon * */); 96 void CompressTables(/* struct lemon * */); 97 98 /********** From the file "set.h" ****************************************/ 99 void SetSize(/* int N */); /* All sets will be of size N */ 100 char *SetNew(/* void */); /* A new set for element 0..N */ 101 void SetFree(/* char* */); /* Deallocate a set */ 102 103 int SetAdd(/* char*,int */); /* Add element to a set */ 104 int SetUnion(/* char *A,char *B */); /* A <- A U B, thru element N */ 105 106 #define SetFind(X,Y) (X[Y]) /* True if Y is in set X */ 107 108 /********** From the file "struct.h" *************************************/ 109 /* 110 ** Principal data structures for the LEMON parser generator. 111 */ 112 113 typedef enum {B_FALSE=0, B_TRUE} Boolean; 114 115 /* Symbols (terminals and nonterminals) of the grammar are stored 116 ** in the following: */ 117 struct symbol { 118 char *name; /* Name of the symbol */ 119 int index; /* Index number for this symbol */ 120 enum { 121 TERMINAL, 122 NONTERMINAL 123 } type; /* Symbols are all either TERMINALS or NTs */ 124 struct rule *rule; /* Linked list of rules of this (if an NT) */ 125 struct symbol *fallback; /* fallback token in case this token doesn't parse */ 126 int prec; /* Precedence if defined (-1 otherwise) */ 127 enum e_assoc { 128 LEFT, 129 RIGHT, 130 NONE, 131 UNK 132 } assoc; /* Associativity if predecence is defined */ 133 char *firstset; /* First-set for all rules of this symbol */ 134 Boolean lambda; /* True if NT and can generate an empty string */ 135 char *destructor; /* Code which executes whenever this symbol is 136 ** popped from the stack during error processing */ 137 int destructorln; /* Line number of destructor code */ 138 char *datatype; /* The data type of information held by this 139 ** object. Only used if type==NONTERMINAL */ 140 int dtnum; /* The data type number. In the parser, the value 141 ** stack is a union. The .yy%d element of this 142 ** union is the correct data type for this object */ 143 }; 144 145 /* Each production rule in the grammar is stored in the following 146 ** structure. */ 147 struct rule { 148 struct symbol *lhs; /* Left-hand side of the rule */ 149 char *lhsalias; /* Alias for the LHS (NULL if none) */ 150 int ruleline; /* Line number for the rule */ 151 int nrhs; /* Number of RHS symbols */ 152 struct symbol **rhs; /* The RHS symbols */ 153 char **rhsalias; /* An alias for each RHS symbol (NULL if none) */ 154 int line; /* Line number at which code begins */ 155 char *code; /* The code executed when this rule is reduced */ 156 struct symbol *precsym; /* Precedence symbol for this rule */ 157 int index; /* An index number for this rule */ 158 Boolean canReduce; /* True if this rule is ever reduced */ 159 struct rule *nextlhs; /* Next rule with the same LHS */ 160 struct rule *next; /* Next rule in the global list */ 161 }; 162 163 /* A configuration is a production rule of the grammar together with 164 ** a mark (dot) showing how much of that rule has been processed so far. 165 ** Configurations also contain a follow-set which is a list of terminal 166 ** symbols which are allowed to immediately follow the end of the rule. 167 ** Every configuration is recorded as an instance of the following: */ 168 struct config { 169 struct rule *rp; /* The rule upon which the configuration is based */ 170 int dot; /* The parse point */ 171 char *fws; /* Follow-set for this configuration only */ 172 struct plink *fplp; /* Follow-set forward propagation links */ 173 struct plink *bplp; /* Follow-set backwards propagation links */ 174 struct state *stp; /* Pointer to state which contains this */ 175 enum { 176 COMPLETE, /* The status is used during followset and */ 177 INCOMPLETE /* shift computations */ 178 } status; 179 struct config *next; /* Next configuration in the state */ 180 struct config *bp; /* The next basis configuration */ 181 }; 182 183 /* Every shift or reduce operation is stored as one of the following */ 184 struct action { 185 struct symbol *sp; /* The look-ahead symbol */ 186 enum e_action { 187 SHIFT, 188 ACCEPT, 189 REDUCE, 190 ERROR, 191 CONFLICT, /* Was a reduce, but part of a conflict */ 192 SH_RESOLVED, /* Was a shift. Precedence resolved conflict */ 193 RD_RESOLVED, /* Was reduce. Precedence resolved conflict */ 194 NOT_USED /* Deleted by compression */ 195 } type; 196 union { 197 struct state *stp; /* The new state, if a shift */ 198 struct rule *rp; /* The rule, if a reduce */ 199 } x; 200 struct action *next; /* Next action for this state */ 201 struct action *collide; /* Next action with the same hash */ 202 }; 203 204 /* Each state of the generated parser's finite state machine 205 ** is encoded as an instance of the following structure. */ 206 struct state { 207 struct config *bp; /* The basis configurations for this state */ 208 struct config *cfp; /* All configurations in this set */ 209 int index; /* Sequencial number for this state */ 210 struct action *ap; /* Array of actions for this state */ 211 int nTknAct, nNtAct; /* Number of actions on terminals and nonterminals */ 212 int iTknOfst, iNtOfst; /* yy_action[] offset for terminals and nonterms */ 213 int iDflt; /* Default action */ 214 }; 215 #define NO_OFFSET (-2147483647) 216 217 /* A followset propagation link indicates that the contents of one 218 ** configuration followset should be propagated to another whenever 219 ** the first changes. */ 220 struct plink { 221 struct config *cfp; /* The configuration to which linked */ 222 struct plink *next; /* The next propagate link */ 223 }; 224 225 /* The state vector for the entire parser generator is recorded as 226 ** follows. (LEMON uses no global variables and makes little use of 227 ** static variables. Fields in the following structure can be thought 228 ** of as begin global variables in the program.) */ 229 struct lemon { 230 struct state **sorted; /* Table of states sorted by state number */ 231 struct rule *rule; /* List of all rules */ 232 int nstate; /* Number of states */ 233 int nrule; /* Number of rules */ 234 int nsymbol; /* Number of terminal and nonterminal symbols */ 235 int nterminal; /* Number of terminal symbols */ 236 struct symbol **symbols; /* Sorted array of pointers to symbols */ 237 int errorcnt; /* Number of errors */ 238 struct symbol *errsym; /* The error symbol */ 239 char *name; /* Name of the generated parser */ 240 char *arg; /* Declaration of the 3th argument to parser */ 241 char *tokentype; /* Type of terminal symbols in the parser stack */ 242 char *vartype; /* The default type of non-terminal symbols */ 243 char *start; /* Name of the start symbol for the grammar */ 244 char *stacksize; /* Size of the parser stack */ 245 char *include; /* Code to put at the start of the C file */ 246 int includeln; /* Line number for start of include code */ 247 char *error; /* Code to execute when an error is seen */ 248 int errorln; /* Line number for start of error code */ 249 char *overflow; /* Code to execute on a stack overflow */ 250 int overflowln; /* Line number for start of overflow code */ 251 char *failure; /* Code to execute on parser failure */ 252 int failureln; /* Line number for start of failure code */ 253 char *accept; /* Code to execute when the parser excepts */ 254 int acceptln; /* Line number for the start of accept code */ 255 char *extracode; /* Code appended to the generated file */ 256 int extracodeln; /* Line number for the start of the extra code */ 257 char *tokendest; /* Code to execute to destroy token data */ 258 int tokendestln; /* Line number for token destroyer code */ 259 char *vardest; /* Code for the default non-terminal destructor */ 260 int vardestln; /* Line number for default non-term destructor code*/ 261 char *filename; /* Name of the input file */ 262 char *outname; /* Name of the current output file */ 263 char *tokenprefix; /* A prefix added to token names in the .h file */ 264 int nconflict; /* Number of parsing conflicts */ 265 int tablesize; /* Size of the parse tables */ 266 int basisflag; /* Print only basis configurations */ 267 int has_fallback; /* True if any %fallback is seen in the grammer */ 268 char *argv0; /* Name of the program */ 269 }; 270 271 #define MemoryCheck(X) if((X)==0){ \ 272 extern void memory_error(); \ 273 memory_error(); \ 274 } 275 276 /**************** From the file "table.h" *********************************/ 277 /* 278 ** All code in this file has been automatically generated 279 ** from a specification in the file 280 ** "table.q" 281 ** by the associative array code building program "aagen". 282 ** Do not edit this file! Instead, edit the specification 283 ** file, then rerun aagen. 284 */ 285 /* 286 ** Code for processing tables in the LEMON parser generator. 287 */ 288 289 /* Routines for handling a strings */ 290 291 char *Strsafe(); 292 293 void Strsafe_init(/* void */); 294 int Strsafe_insert(/* char * */); 295 char *Strsafe_find(/* char * */); 296 297 /* Routines for handling symbols of the grammar */ 298 299 struct symbol *Symbol_new(); 300 int Symbolcmpp(/* struct symbol **, struct symbol ** */); 301 void Symbol_init(/* void */); 302 int Symbol_insert(/* struct symbol *, char * */); 303 struct symbol *Symbol_find(/* char * */); 304 struct symbol *Symbol_Nth(/* int */); 305 int Symbol_count(/* */); 306 struct symbol **Symbol_arrayof(/* */); 307 308 /* Routines to manage the state table */ 309 310 int Configcmp(/* struct config *, struct config * */); 311 struct state *State_new(); 312 void State_init(/* void */); 313 int State_insert(/* struct state *, struct config * */); 314 struct state *State_find(/* struct config * */); 315 struct state **State_arrayof(/* */); 316 317 /* Routines used for efficiency in Configlist_add */ 318 319 void Configtable_init(/* void */); 320 int Configtable_insert(/* struct config * */); 321 struct config *Configtable_find(/* struct config * */); 322 void Configtable_clear(/* int(*)(struct config *) */); 323 /****************** From the file "action.c" *******************************/ 324 /* 325 ** Routines processing parser actions in the LEMON parser generator. 326 */ 327 328 /* Allocate a new parser action */ 329 struct action *Action_new(){ 330 static struct action *freelist = 0; 331 struct action *new; 332 333 if( freelist==0 ){ 334 int i; 335 int amt = 100; 336 freelist = (struct action *)malloc( sizeof(struct action)*amt ); 337 if( freelist==0 ){ 338 fprintf(stderr,"Unable to allocate memory for a new parser action."); 339 exit(1); 340 } 341 for(i=0; i<amt-1; i++) freelist[i].next = &freelist[i+1]; 342 freelist[amt-1].next = 0; 343 } 344 new = freelist; 345 freelist = freelist->next; 346 return new; 347 } 348 349 /* Compare two actions */ 350 static int actioncmp(ap1,ap2) 351 struct action *ap1; 352 struct action *ap2; 353 { 354 int rc; 355 rc = ap1->sp->index - ap2->sp->index; 356 if( rc==0 ) rc = (int)ap1->type - (int)ap2->type; 357 if( rc==0 ){ 358 assert( ap1->type==REDUCE || ap1->type==RD_RESOLVED || ap1->type==CONFLICT); 359 assert( ap2->type==REDUCE || ap2->type==RD_RESOLVED || ap2->type==CONFLICT); 360 rc = ap1->x.rp->index - ap2->x.rp->index; 361 } 362 return rc; 363 } 364 365 /* Sort parser actions */ 366 struct action *Action_sort(ap) 367 struct action *ap; 368 { 369 ap = (struct action *)msort((char *)ap,(char **)&ap->next,actioncmp); 370 return ap; 371 } 372 373 void Action_add(app,type,sp,arg) 374 struct action **app; 375 enum e_action type; 376 struct symbol *sp; 377 char *arg; 378 { 379 struct action *new; 380 new = Action_new(); 381 new->next = *app; 382 *app = new; 383 new->type = type; 384 new->sp = sp; 385 if( type==SHIFT ){ 386 new->x.stp = (struct state *)arg; 387 }else{ 388 new->x.rp = (struct rule *)arg; 389 } 390 } 391 /********************** New code to implement the "acttab" module ***********/ 392 /* 393 ** This module implements routines use to construct the yy_action[] table. 394 */ 395 396 /* 397 ** The state of the yy_action table under construction is an instance of 398 ** the following structure 399 */ 400 typedef struct acttab acttab; 401 struct acttab { 402 int nAction; /* Number of used slots in aAction[] */ 403 int nActionAlloc; /* Slots allocated for aAction[] */ 404 struct { 405 int lookahead; /* Value of the lookahead token */ 406 int action; /* Action to take on the given lookahead */ 407 } *aAction, /* The yy_action[] table under construction */ 408 *aLookahead; /* A single new transaction set */ 409 int mnLookahead; /* Minimum aLookahead[].lookahead */ 410 int mnAction; /* Action associated with mnLookahead */ 411 int mxLookahead; /* Maximum aLookahead[].lookahead */ 412 int nLookahead; /* Used slots in aLookahead[] */ 413 int nLookaheadAlloc; /* Slots allocated in aLookahead[] */ 414 }; 415 416 /* Return the number of entries in the yy_action table */ 417 #define acttab_size(X) ((X)->nAction) 418 419 /* The value for the N-th entry in yy_action */ 420 #define acttab_yyaction(X,N) ((X)->aAction[N].action) 421 422 /* The value for the N-th entry in yy_lookahead */ 423 #define acttab_yylookahead(X,N) ((X)->aAction[N].lookahead) 424 425 /* Free all memory associated with the given acttab */ 426 void acttab_free(acttab *p){ 427 free( p->aAction ); 428 free( p->aLookahead ); 429 free( p ); 430 } 431 432 /* Allocate a new acttab structure */ 433 acttab *acttab_alloc(void){ 434 acttab *p = malloc( sizeof(*p) ); 435 if( p==0 ){ 436 fprintf(stderr,"Unable to allocate memory for a new acttab."); 437 exit(1); 438 } 439 memset(p, 0, sizeof(*p)); 440 return p; 441 } 442 443 /* Add a new action to the current transaction set 444 */ 445 void acttab_action(acttab *p, int lookahead, int action){ 446 if( p->nLookahead>=p->nLookaheadAlloc ){ 447 p->nLookaheadAlloc += 25; 448 p->aLookahead = realloc( p->aLookahead, 449 sizeof(p->aLookahead[0])*p->nLookaheadAlloc ); 450 if( p->aLookahead==0 ){ 451 fprintf(stderr,"malloc failed\n"); 452 exit(1); 453 } 454 } 455 if( p->nLookahead==0 ){ 456 p->mxLookahead = lookahead; 457 p->mnLookahead = lookahead; 458 p->mnAction = action; 459 }else{ 460 if( p->mxLookahead<lookahead ) p->mxLookahead = lookahead; 461 if( p->mnLookahead>lookahead ){ 462 p->mnLookahead = lookahead; 463 p->mnAction = action; 464 } 465 } 466 p->aLookahead[p->nLookahead].lookahead = lookahead; 467 p->aLookahead[p->nLookahead].action = action; 468 p->nLookahead++; 469 } 470 471 /* 472 ** Add the transaction set built up with prior calls to acttab_action() 473 ** into the current action table. Then reset the transaction set back 474 ** to an empty set in preparation for a new round of acttab_action() calls. 475 ** 476 ** Return the offset into the action table of the new transaction. 477 */ 478 int acttab_insert(acttab *p){ 479 int i, j, k, n; 480 assert( p->nLookahead>0 ); 481 482 /* Make sure we have enough space to hold the expanded action table 483 ** in the worst case. The worst case occurs if the transaction set 484 ** must be appended to the current action table 485 */ 486 n = p->mxLookahead + 1; 487 if( p->nAction + n >= p->nActionAlloc ){ 488 int oldAlloc = p->nActionAlloc; 489 p->nActionAlloc = p->nAction + n + p->nActionAlloc + 20; 490 p->aAction = realloc( p->aAction, 491 sizeof(p->aAction[0])*p->nActionAlloc); 492 if( p->aAction==0 ){ 493 fprintf(stderr,"malloc failed\n"); 494 exit(1); 495 } 496 for(i=oldAlloc; i<p->nActionAlloc; i++){ 497 p->aAction[i].lookahead = -1; 498 p->aAction[i].action = -1; 499 } 500 } 501 502 /* Scan the existing action table looking for an offset where we can 503 ** insert the current transaction set. Fall out of the loop when that 504 ** offset is found. In the worst case, we fall out of the loop when 505 ** i reaches p->nAction, which means we append the new transaction set. 506 ** 507 ** i is the index in p->aAction[] where p->mnLookahead is inserted. 508 */ 509 for(i=0; i<p->nAction+p->mnLookahead; i++){ 510 if( p->aAction[i].lookahead<0 ){ 511 for(j=0; j<p->nLookahead; j++){ 512 k = p->aLookahead[j].lookahead - p->mnLookahead + i; 513 if( k<0 ) break; 514 if( p->aAction[k].lookahead>=0 ) break; 515 } 516 if( j<p->nLookahead ) continue; 517 for(j=0; j<p->nAction; j++){ 518 if( p->aAction[j].lookahead==j+p->mnLookahead-i ) break; 519 } 520 if( j==p->nAction ){ 521 break; /* Fits in empty slots */ 522 } 523 }else if( p->aAction[i].lookahead==p->mnLookahead ){ 524 if( p->aAction[i].action!=p->mnAction ) continue; 525 for(j=0; j<p->nLookahead; j++){ 526 k = p->aLookahead[j].lookahead - p->mnLookahead + i; 527 if( k<0 || k>=p->nAction ) break; 528 if( p->aLookahead[j].lookahead!=p->aAction[k].lookahead ) break; 529 if( p->aLookahead[j].action!=p->aAction[k].action ) break; 530 } 531 if( j<p->nLookahead ) continue; 532 n = 0; 533 for(j=0; j<p->nAction; j++){ 534 if( p->aAction[j].lookahead<0 ) continue; 535 if( p->aAction[j].lookahead==j+p->mnLookahead-i ) n++; 536 } 537 if( n==p->nLookahead ){ 538 break; /* Same as a prior transaction set */ 539 } 540 } 541 } 542 /* Insert transaction set at index i. */ 543 for(j=0; j<p->nLookahead; j++){ 544 k = p->aLookahead[j].lookahead - p->mnLookahead + i; 545 p->aAction[k] = p->aLookahead[j]; 546 if( k>=p->nAction ) p->nAction = k+1; 547 } 548 p->nLookahead = 0; 549 550 /* Return the offset that is added to the lookahead in order to get the 551 ** index into yy_action of the action */ 552 return i - p->mnLookahead; 553 } 554 555 /********************** From the file "assert.c" ****************************/ 556 /* 557 ** A more efficient way of handling assertions. 558 */ 559 void myassert(file,line) 560 char *file; 561 int line; 562 { 563 fprintf(stderr,"Assertion failed on line %d of file \"%s\"\n",line,file); 564 exit(1); 565 } 566 /********************** From the file "build.c" *****************************/ 567 /* 568 ** Routines to construction the finite state machine for the LEMON 569 ** parser generator. 570 */ 571 572 /* Find a precedence symbol of every rule in the grammar. 573 ** 574 ** Those rules which have a precedence symbol coded in the input 575 ** grammar using the "[symbol]" construct will already have the 576 ** rp->precsym field filled. Other rules take as their precedence 577 ** symbol the first RHS symbol with a defined precedence. If there 578 ** are not RHS symbols with a defined precedence, the precedence 579 ** symbol field is left blank. 580 */ 581 void FindRulePrecedences(xp) 582 struct lemon *xp; 583 { 584 struct rule *rp; 585 for(rp=xp->rule; rp; rp=rp->next){ 586 if( rp->precsym==0 ){ 587 int i; 588 for(i=0; i<rp->nrhs; i++){ 589 if( rp->rhs[i]->prec>=0 ){ 590 rp->precsym = rp->rhs[i]; 591 break; 592 } 593 } 594 } 595 } 596 return; 597 } 598 599 /* Find all nonterminals which will generate the empty string. 600 ** Then go back and compute the first sets of every nonterminal. 601 ** The first set is the set of all terminal symbols which can begin 602 ** a string generated by that nonterminal. 603 */ 604 void FindFirstSets(lemp) 605 struct lemon *lemp; 606 { 607 int i; 608 struct rule *rp; 609 int progress; 610 611 for(i=0; i<lemp->nsymbol; i++){ 612 lemp->symbols[i]->lambda = B_FALSE; 613 } 614 for(i=lemp->nterminal; i<lemp->nsymbol; i++){ 615 lemp->symbols[i]->firstset = SetNew(); 616 } 617 618 /* First compute all lambdas */ 619 do{ 620 progress = 0; 621 for(rp=lemp->rule; rp; rp=rp->next){ 622 if( rp->lhs->lambda ) continue; 623 for(i=0; i<rp->nrhs; i++){ 624 if( rp->rhs[i]->lambda==B_FALSE ) break; 625 } 626 if( i==rp->nrhs ){ 627 rp->lhs->lambda = B_TRUE; 628 progress = 1; 629 } 630 } 631 }while( progress ); 632 633 /* Now compute all first sets */ 634 do{ 635 struct symbol *s1, *s2; 636 progress = 0; 637 for(rp=lemp->rule; rp; rp=rp->next){ 638 s1 = rp->lhs; 639 for(i=0; i<rp->nrhs; i++){ 640 s2 = rp->rhs[i]; 641 if( s2->type==TERMINAL ){ 642 progress += SetAdd(s1->firstset,s2->index); 643 break; 644 }else if( s1==s2 ){ 645 if( s1->lambda==B_FALSE ) break; 646 }else{ 647 progress += SetUnion(s1->firstset,s2->firstset); 648 if( s2->lambda==B_FALSE ) break; 649 } 650 } 651 } 652 }while( progress ); 653 return; 654 } 655 656 /* Compute all LR(0) states for the grammar. Links 657 ** are added to between some states so that the LR(1) follow sets 658 ** can be computed later. 659 */ 660 PRIVATE struct state *getstate(/* struct lemon * */); /* forward reference */ 661 void FindStates(lemp) 662 struct lemon *lemp; 663 { 664 struct symbol *sp; 665 struct rule *rp; 666 667 Configlist_init(); 668 669 /* Find the start symbol */ 670 if( lemp->start ){ 671 sp = Symbol_find(lemp->start); 672 if( sp==0 ){ 673 ErrorMsg(lemp->filename,0, 674 "The specified start symbol \"%s\" is not \ 675 in a nonterminal of the grammar. \"%s\" will be used as the start \ 676 symbol instead.",lemp->start,lemp->rule->lhs->name); 677 lemp->errorcnt++; 678 sp = lemp->rule->lhs; 679 } 680 }else{ 681 sp = lemp->rule->lhs; 682 } 683 684 /* Make sure the start symbol doesn't occur on the right-hand side of 685 ** any rule. Report an error if it does. (YACC would generate a new 686 ** start symbol in this case.) */ 687 for(rp=lemp->rule; rp; rp=rp->next){ 688 int i; 689 for(i=0; i<rp->nrhs; i++){ 690 if( rp->rhs[i]==sp ){ 691 ErrorMsg(lemp->filename,0, 692 "The start symbol \"%s\" occurs on the \ 693 right-hand side of a rule. This will result in a parser which \ 694 does not work properly.",sp->name); 695 lemp->errorcnt++; 696 } 697 } 698 } 699 700 /* The basis configuration set for the first state 701 ** is all rules which have the start symbol as their 702 ** left-hand side */ 703 for(rp=sp->rule; rp; rp=rp->nextlhs){ 704 struct config *newcfp; 705 newcfp = Configlist_addbasis(rp,0); 706 SetAdd(newcfp->fws,0); 707 } 708 709 /* Compute the first state. All other states will be 710 ** computed automatically during the computation of the first one. 711 ** The returned pointer to the first state is not used. */ 712 (void)getstate(lemp); 713 return; 714 } 715 716 /* Return a pointer to a state which is described by the configuration 717 ** list which has been built from calls to Configlist_add. 718 */ 719 PRIVATE void buildshifts(/* struct lemon *, struct state * */); /* Forwd ref */ 720 PRIVATE struct state *getstate(lemp) 721 struct lemon *lemp; 722 { 723 struct config *cfp, *bp; 724 struct state *stp; 725 726 /* Extract the sorted basis of the new state. The basis was constructed 727 ** by prior calls to "Configlist_addbasis()". */ 728 Configlist_sortbasis(); 729 bp = Configlist_basis(); 730 731 /* Get a state with the same basis */ 732 stp = State_find(bp); 733 if( stp ){ 734 /* A state with the same basis already exists! Copy all the follow-set 735 ** propagation links from the state under construction into the 736 ** preexisting state, then return a pointer to the preexisting state */ 737 struct config *x, *y; 738 for(x=bp, y=stp->bp; x && y; x=x->bp, y=y->bp){ 739 Plink_copy(&y->bplp,x->bplp); 740 Plink_delete(x->fplp); 741 x->fplp = x->bplp = 0; 742 } 743 cfp = Configlist_return(); 744 Configlist_eat(cfp); 745 }else{ 746 /* This really is a new state. Construct all the details */ 747 Configlist_closure(lemp); /* Compute the configuration closure */ 748 Configlist_sort(); /* Sort the configuration closure */ 749 cfp = Configlist_return(); /* Get a pointer to the config list */ 750 stp = State_new(); /* A new state structure */ 751 MemoryCheck(stp); 752 stp->bp = bp; /* Remember the configuration basis */ 753 stp->cfp = cfp; /* Remember the configuration closure */ 754 stp->index = lemp->nstate++; /* Every state gets a sequence number */ 755 stp->ap = 0; /* No actions, yet. */ 756 State_insert(stp,stp->bp); /* Add to the state table */ 757 buildshifts(lemp,stp); /* Recursively compute successor states */ 758 } 759 return stp; 760 } 761 762 /* Construct all successor states to the given state. A "successor" 763 ** state is any state which can be reached by a shift action. 764 */ 765 PRIVATE void buildshifts(lemp,stp) 766 struct lemon *lemp; 767 struct state *stp; /* The state from which successors are computed */ 768 { 769 struct config *cfp; /* For looping thru the config closure of "stp" */ 770 struct config *bcfp; /* For the inner loop on config closure of "stp" */ 771 struct config *new; /* */ 772 struct symbol *sp; /* Symbol following the dot in configuration "cfp" */ 773 struct symbol *bsp; /* Symbol following the dot in configuration "bcfp" */ 774 struct state *newstp; /* A pointer to a successor state */ 775 776 /* Each configuration becomes complete after it contibutes to a successor 777 ** state. Initially, all configurations are incomplete */ 778 for(cfp=stp->cfp; cfp; cfp=cfp->next) cfp->status = INCOMPLETE; 779 780 /* Loop through all configurations of the state "stp" */ 781 for(cfp=stp->cfp; cfp; cfp=cfp->next){ 782 if( cfp->status==COMPLETE ) continue; /* Already used by inner loop */ 783 if( cfp->dot>=cfp->rp->nrhs ) continue; /* Can't shift this config */ 784 Configlist_reset(); /* Reset the new config set */ 785 sp = cfp->rp->rhs[cfp->dot]; /* Symbol after the dot */ 786 787 /* For every configuration in the state "stp" which has the symbol "sp" 788 ** following its dot, add the same configuration to the basis set under 789 ** construction but with the dot shifted one symbol to the right. */ 790 for(bcfp=cfp; bcfp; bcfp=bcfp->next){ 791 if( bcfp->status==COMPLETE ) continue; /* Already used */ 792 if( bcfp->dot>=bcfp->rp->nrhs ) continue; /* Can't shift this one */ 793 bsp = bcfp->rp->rhs[bcfp->dot]; /* Get symbol after dot */ 794 if( bsp!=sp ) continue; /* Must be same as for "cfp" */ 795 bcfp->status = COMPLETE; /* Mark this config as used */ 796 new = Configlist_addbasis(bcfp->rp,bcfp->dot+1); 797 Plink_add(&new->bplp,bcfp); 798 } 799 800 /* Get a pointer to the state described by the basis configuration set 801 ** constructed in the preceding loop */ 802 newstp = getstate(lemp); 803 804 /* The state "newstp" is reached from the state "stp" by a shift action 805 ** on the symbol "sp" */ 806 Action_add(&stp->ap,SHIFT,sp,(char *)newstp); 807 } 808 } 809 810 /* 811 ** Construct the propagation links 812 */ 813 void FindLinks(lemp) 814 struct lemon *lemp; 815 { 816 int i; 817 struct config *cfp, *other; 818 struct state *stp; 819 struct plink *plp; 820 821 /* Housekeeping detail: 822 ** Add to every propagate link a pointer back to the state to 823 ** which the link is attached. */ 824 for(i=0; i<lemp->nstate; i++){ 825 stp = lemp->sorted[i]; 826 for(cfp=stp->cfp; cfp; cfp=cfp->next){ 827 cfp->stp = stp; 828 } 829 } 830 831 /* Convert all backlinks into forward links. Only the forward 832 ** links are used in the follow-set computation. */ 833 for(i=0; i<lemp->nstate; i++){ 834 stp = lemp->sorted[i]; 835 for(cfp=stp->cfp; cfp; cfp=cfp->next){ 836 for(plp=cfp->bplp; plp; plp=plp->next){ 837 other = plp->cfp; 838 Plink_add(&other->fplp,cfp); 839 } 840 } 841 } 842 } 843 844 /* Compute all followsets. 845 ** 846 ** A followset is the set of all symbols which can come immediately 847 ** after a configuration. 848 */ 849 void FindFollowSets(lemp) 850 struct lemon *lemp; 851 { 852 int i; 853 struct config *cfp; 854 struct plink *plp; 855 int progress; 856 int change; 857 858 for(i=0; i<lemp->nstate; i++){ 859 for(cfp=lemp->sorted[i]->cfp; cfp; cfp=cfp->next){ 860 cfp->status = INCOMPLETE; 861 } 862 } 863 864 do{ 865 progress = 0; 866 for(i=0; i<lemp->nstate; i++){ 867 for(cfp=lemp->sorted[i]->cfp; cfp; cfp=cfp->next){ 868 if( cfp->status==COMPLETE ) continue; 869 for(plp=cfp->fplp; plp; plp=plp->next){ 870 change = SetUnion(plp->cfp->fws,cfp->fws); 871 if( change ){ 872 plp->cfp->status = INCOMPLETE; 873 progress = 1; 874 } 875 } 876 cfp->status = COMPLETE; 877 } 878 } 879 }while( progress ); 880 } 881 882 static int resolve_conflict(); 883 884 /* Compute the reduce actions, and resolve conflicts. 885 */ 886 void FindActions(lemp) 887 struct lemon *lemp; 888 { 889 int i,j; 890 struct config *cfp; 891 struct state *stp; 892 struct symbol *sp; 893 struct rule *rp; 894 895 /* Add all of the reduce actions 896 ** A reduce action is added for each element of the followset of 897 ** a configuration which has its dot at the extreme right. 898 */ 899 for(i=0; i<lemp->nstate; i++){ /* Loop over all states */ 900 stp = lemp->sorted[i]; 901 for(cfp=stp->cfp; cfp; cfp=cfp->next){ /* Loop over all configurations */ 902 if( cfp->rp->nrhs==cfp->dot ){ /* Is dot at extreme right? */ 903 for(j=0; j<lemp->nterminal; j++){ 904 if( SetFind(cfp->fws,j) ){ 905 /* Add a reduce action to the state "stp" which will reduce by the 906 ** rule "cfp->rp" if the lookahead symbol is "lemp->symbols[j]" */ 907 Action_add(&stp->ap,REDUCE,lemp->symbols[j],(char *)cfp->rp); 908 } 909 } 910 } 911 } 912 } 913 914 /* Add the accepting token */ 915 if( lemp->start ){ 916 sp = Symbol_find(lemp->start); 917 if( sp==0 ) sp = lemp->rule->lhs; 918 }else{ 919 sp = lemp->rule->lhs; 920 } 921 /* Add to the first state (which is always the starting state of the 922 ** finite state machine) an action to ACCEPT if the lookahead is the 923 ** start nonterminal. */ 924 Action_add(&lemp->sorted[0]->ap,ACCEPT,sp,0); 925 926 /* Resolve conflicts */ 927 for(i=0; i<lemp->nstate; i++){ 928 struct action *ap, *nap; 929 struct state *stp; 930 stp = lemp->sorted[i]; 931 assert( stp->ap ); 932 stp->ap = Action_sort(stp->ap); 933 for(ap=stp->ap; ap && ap->next; ap=ap->next){ 934 for(nap=ap->next; nap && nap->sp==ap->sp; nap=nap->next){ 935 /* The two actions "ap" and "nap" have the same lookahead. 936 ** Figure out which one should be used */ 937 lemp->nconflict += resolve_conflict(ap,nap,lemp->errsym); 938 } 939 } 940 } 941 942 /* Report an error for each rule that can never be reduced. */ 943 for(rp=lemp->rule; rp; rp=rp->next) rp->canReduce = B_FALSE; 944 for(i=0; i<lemp->nstate; i++){ 945 struct action *ap; 946 for(ap=lemp->sorted[i]->ap; ap; ap=ap->next){ 947 if( ap->type==REDUCE ) ap->x.rp->canReduce = B_TRUE; 948 } 949 } 950 for(rp=lemp->rule; rp; rp=rp->next){ 951 if( rp->canReduce ) continue; 952 ErrorMsg(lemp->filename,rp->ruleline,"This rule can not be reduced.\n"); 953 lemp->errorcnt++; 954 } 955 } 956 957 /* Resolve a conflict between the two given actions. If the 958 ** conflict can't be resolve, return non-zero. 959 ** 960 ** NO LONGER TRUE: 961 ** To resolve a conflict, first look to see if either action 962 ** is on an error rule. In that case, take the action which 963 ** is not associated with the error rule. If neither or both 964 ** actions are associated with an error rule, then try to 965 ** use precedence to resolve the conflict. 966 ** 967 ** If either action is a SHIFT, then it must be apx. This 968 ** function won't work if apx->type==REDUCE and apy->type==SHIFT. 969 */ 970 static int resolve_conflict(apx,apy,errsym) 971 struct action *apx; 972 struct action *apy; 973 struct symbol *errsym; /* The error symbol (if defined. NULL otherwise) */ 974 { 975 struct symbol *spx, *spy; 976 int errcnt = 0; 977 assert( apx->sp==apy->sp ); /* Otherwise there would be no conflict */ 978 if( apx->type==SHIFT && apy->type==REDUCE ){ 979 spx = apx->sp; 980 spy = apy->x.rp->precsym; 981 if( spy==0 || spx->prec<0 || spy->prec<0 ){ 982 /* Not enough precedence information. */ 983 apy->type = CONFLICT; 984 errcnt++; 985 }else if( spx->prec>spy->prec ){ /* Lower precedence wins */ 986 apy->type = RD_RESOLVED; 987 }else if( spx->prec<spy->prec ){ 988 apx->type = SH_RESOLVED; 989 }else if( spx->prec==spy->prec && spx->assoc==RIGHT ){ /* Use operator */ 990 apy->type = RD_RESOLVED; /* associativity */ 991 }else if( spx->prec==spy->prec && spx->assoc==LEFT ){ /* to break tie */ 992 apx->type = SH_RESOLVED; 993 }else{ 994 assert( spx->prec==spy->prec && spx->assoc==NONE ); 995 apy->type = CONFLICT; 996 errcnt++; 997 } 998 }else if( apx->type==REDUCE && apy->type==REDUCE ){ 999 spx = apx->x.rp->precsym; 1000 spy = apy->x.rp->precsym; 1001 if( spx==0 || spy==0 || spx->prec<0 || 1002 spy->prec<0 || spx->prec==spy->prec ){ 1003 apy->type = CONFLICT; 1004 errcnt++; 1005 }else if( spx->prec>spy->prec ){ 1006 apy->type = RD_RESOLVED; 1007 }else if( spx->prec<spy->prec ){ 1008 apx->type = RD_RESOLVED; 1009 } 1010 }else{ 1011 assert( 1012 apx->type==SH_RESOLVED || 1013 apx->type==RD_RESOLVED || 1014 apx->type==CONFLICT || 1015 apy->type==SH_RESOLVED || 1016 apy->type==RD_RESOLVED || 1017 apy->type==CONFLICT 1018 ); 1019 /* The REDUCE/SHIFT case cannot happen because SHIFTs come before 1020 ** REDUCEs on the list. If we reach this point it must be because 1021 ** the parser conflict had already been resolved. */ 1022 } 1023 return errcnt; 1024 } 1025 /********************* From the file "configlist.c" *************************/ 1026 /* 1027 ** Routines to processing a configuration list and building a state 1028 ** in the LEMON parser generator. 1029 */ 1030 1031 static struct config *freelist = 0; /* List of free configurations */ 1032 static struct config *current = 0; /* Top of list of configurations */ 1033 static struct config **currentend = 0; /* Last on list of configs */ 1034 static struct config *basis = 0; /* Top of list of basis configs */ 1035 static struct config **basisend = 0; /* End of list of basis configs */ 1036 1037 /* Return a pointer to a new configuration */ 1038 PRIVATE struct config *newconfig(){ 1039 struct config *new; 1040 if( freelist==0 ){ 1041 int i; 1042 int amt = 3; 1043 freelist = (struct config *)malloc( sizeof(struct config)*amt ); 1044 if( freelist==0 ){ 1045 fprintf(stderr,"Unable to allocate memory for a new configuration."); 1046 exit(1); 1047 } 1048 for(i=0; i<amt-1; i++) freelist[i].next = &freelist[i+1]; 1049 freelist[amt-1].next = 0; 1050 } 1051 new = freelist; 1052 freelist = freelist->next; 1053 return new; 1054 } 1055 1056 /* The configuration "old" is no longer used */ 1057 PRIVATE void deleteconfig(old) 1058 struct config *old; 1059 { 1060 old->next = freelist; 1061 freelist = old; 1062 } 1063 1064 /* Initialized the configuration list builder */ 1065 void Configlist_init(){ 1066 current = 0; 1067 currentend = ¤t; 1068 basis = 0; 1069 basisend = &basis; 1070 Configtable_init(); 1071 return; 1072 } 1073 1074 /* Initialized the configuration list builder */ 1075 void Configlist_reset(){ 1076 current = 0; 1077 currentend = ¤t; 1078 basis = 0; 1079 basisend = &basis; 1080 Configtable_clear(0); 1081 return; 1082 } 1083 1084 /* Add another configuration to the configuration list */ 1085 struct config *Configlist_add(rp,dot) 1086 struct rule *rp; /* The rule */ 1087 int dot; /* Index into the RHS of the rule where the dot goes */ 1088 { 1089 struct config *cfp, model; 1090 1091 assert( currentend!=0 ); 1092 model.rp = rp; 1093 model.dot = dot; 1094 cfp = Configtable_find(&model); 1095 if( cfp==0 ){ 1096 cfp = newconfig(); 1097 cfp->rp = rp; 1098 cfp->dot = dot; 1099 cfp->fws = SetNew(); 1100 cfp->stp = 0; 1101 cfp->fplp = cfp->bplp = 0; 1102 cfp->next = 0; 1103 cfp->bp = 0; 1104 *currentend = cfp; 1105 currentend = &cfp->next; 1106 Configtable_insert(cfp); 1107 } 1108 return cfp; 1109 } 1110 1111 /* Add a basis configuration to the configuration list */ 1112 struct config *Configlist_addbasis(rp,dot) 1113 struct rule *rp; 1114 int dot; 1115 { 1116 struct config *cfp, model; 1117 1118 assert( basisend!=0 ); 1119 assert( currentend!=0 ); 1120 model.rp = rp; 1121 model.dot = dot; 1122 cfp = Configtable_find(&model); 1123 if( cfp==0 ){ 1124 cfp = newconfig(); 1125 cfp->rp = rp; 1126 cfp->dot = dot; 1127 cfp->fws = SetNew(); 1128 cfp->stp = 0; 1129 cfp->fplp = cfp->bplp = 0; 1130 cfp->next = 0; 1131 cfp->bp = 0; 1132 *currentend = cfp; 1133 currentend = &cfp->next; 1134 *basisend = cfp; 1135 basisend = &cfp->bp; 1136 Configtable_insert(cfp); 1137 } 1138 return cfp; 1139 } 1140 1141 /* Compute the closure of the configuration list */ 1142 void Configlist_closure(lemp) 1143 struct lemon *lemp; 1144 { 1145 struct config *cfp, *newcfp; 1146 struct rule *rp, *newrp; 1147 struct symbol *sp, *xsp; 1148 int i, dot; 1149 1150 assert( currentend!=0 ); 1151 for(cfp=current; cfp; cfp=cfp->next){ 1152 rp = cfp->rp; 1153 dot = cfp->dot; 1154 if( dot>=rp->nrhs ) continue; 1155 sp = rp->rhs[dot]; 1156 if( sp->type==NONTERMINAL ){ 1157 if( sp->rule==0 && sp!=lemp->errsym ){ 1158 ErrorMsg(lemp->filename,rp->line,"Nonterminal \"%s\" has no rules.", 1159 sp->name); 1160 lemp->errorcnt++; 1161 } 1162 for(newrp=sp->rule; newrp; newrp=newrp->nextlhs){ 1163 newcfp = Configlist_add(newrp,0); 1164 for(i=dot+1; i<rp->nrhs; i++){ 1165 xsp = rp->rhs[i]; 1166 if( xsp->type==TERMINAL ){ 1167 SetAdd(newcfp->fws,xsp->index); 1168 break; 1169 }else{ 1170 SetUnion(newcfp->fws,xsp->firstset); 1171 if( xsp->lambda==B_FALSE ) break; 1172 } 1173 } 1174 if( i==rp->nrhs ) Plink_add(&cfp->fplp,newcfp); 1175 } 1176 } 1177 } 1178 return; 1179 } 1180 1181 /* Sort the configuration list */ 1182 void Configlist_sort(){ 1183 current = (struct config *)msort((char *)current,(char **)&(current->next),Configcmp); 1184 currentend = 0; 1185 return; 1186 } 1187 1188 /* Sort the basis configuration list */ 1189 void Configlist_sortbasis(){ 1190 basis = (struct config *)msort((char *)current,(char **)&(current->bp),Configcmp); 1191 basisend = 0; 1192 return; 1193 } 1194 1195 /* Return a pointer to the head of the configuration list and 1196 ** reset the list */ 1197 struct config *Configlist_return(){ 1198 struct config *old; 1199 old = current; 1200 current = 0; 1201 currentend = 0; 1202 return old; 1203 } 1204 1205 /* Return a pointer to the head of the configuration list and 1206 ** reset the list */ 1207 struct config *Configlist_basis(){ 1208 struct config *old; 1209 old = basis; 1210 basis = 0; 1211 basisend = 0; 1212 return old; 1213 } 1214 1215 /* Free all elements of the given configuration list */ 1216 void Configlist_eat(cfp) 1217 struct config *cfp; 1218 { 1219 struct config *nextcfp; 1220 for(; cfp; cfp=nextcfp){ 1221 nextcfp = cfp->next; 1222 assert( cfp->fplp==0 ); 1223 assert( cfp->bplp==0 ); 1224 if( cfp->fws ) SetFree(cfp->fws); 1225 deleteconfig(cfp); 1226 } 1227 return; 1228 } 1229 /***************** From the file "error.c" *********************************/ 1230 /* 1231 ** Code for printing error message. 1232 */ 1233 1234 /* Find a good place to break "msg" so that its length is at least "min" 1235 ** but no more than "max". Make the point as close to max as possible. 1236 */ 1237 static int findbreak(msg,min,max) 1238 char *msg; 1239 int min; 1240 int max; 1241 { 1242 int i,spot; 1243 char c; 1244 for(i=spot=min; i<=max; i++){ 1245 c = msg[i]; 1246 if( c=='\t' ) msg[i] = ' '; 1247 if( c=='\n' ){ msg[i] = ' '; spot = i; break; } 1248 if( c==0 ){ spot = i; break; } 1249 if( c=='-' && i<max-1 ) spot = i+1; 1250 if( c==' ' ) spot = i; 1251 } 1252 return spot; 1253 } 1254 1255 /* 1256 ** The error message is split across multiple lines if necessary. The 1257 ** splits occur at a space, if there is a space available near the end 1258 ** of the line. 1259 */ 1260 #define ERRMSGSIZE 10000 /* Hope this is big enough. No way to error check */ 1261 #define LINEWIDTH 79 /* Max width of any output line */ 1262 #define PREFIXLIMIT 30 /* Max width of the prefix on each line */ 1263 void ErrorMsg(const char *filename, int lineno, const char *format, ...){ 1264 char errmsg[ERRMSGSIZE]; 1265 char prefix[PREFIXLIMIT+10]; 1266 int errmsgsize; 1267 int prefixsize; 1268 int availablewidth; 1269 va_list ap; 1270 int end, restart, base; 1271 1272 va_start(ap, format); 1273 /* Prepare a prefix to be prepended to every output line */ 1274 if( lineno>0 ){ 1275 sprintf(prefix,"%.*s:%d: ",PREFIXLIMIT-10,filename,lineno); 1276 }else{ 1277 sprintf(prefix,"%.*s: ",PREFIXLIMIT-10,filename); 1278 } 1279 prefixsize = strlen(prefix); 1280 availablewidth = LINEWIDTH - prefixsize; 1281 1282 /* Generate the error message */ 1283 vsprintf(errmsg,format,ap); 1284 va_end(ap); 1285 errmsgsize = strlen(errmsg); 1286 /* Remove trailing '\n's from the error message. */ 1287 while( errmsgsize>0 && errmsg[errmsgsize-1]=='\n' ){ 1288 errmsg[--errmsgsize] = 0; 1289 } 1290 1291 /* Print the error message */ 1292 base = 0; 1293 while( errmsg[base]!=0 ){ 1294 end = restart = findbreak(&errmsg[base],0,availablewidth); 1295 restart += base; 1296 while( errmsg[restart]==' ' ) restart++; 1297 fprintf(stdout,"%s%.*s\n",prefix,end,&errmsg[base]); 1298 base = restart; 1299 } 1300 } 1301 /**************** From the file "main.c" ************************************/ 1302 /* 1303 ** Main program file for the LEMON parser generator. 1304 */ 1305 1306 /* Report an out-of-memory condition and abort. This function 1307 ** is used mostly by the "MemoryCheck" macro in struct.h 1308 */ 1309 void memory_error(){ 1310 fprintf(stderr,"Out of memory. Aborting...\n"); 1311 exit(1); 1312 } 1313 1314 1315 /* The main program. Parse the command line and do it... */ 1316 int main(argc,argv) 1317 int argc; 1318 char **argv; 1319 { 1320 static int version = 0; 1321 static int rpflag = 0; 1322 static int basisflag = 0; 1323 static int compress = 0; 1324 static int quiet = 0; 1325 static int statistics = 0; 1326 static int mhflag = 0; 1327 static struct s_options options[] = { 1328 {OPT_FLAG, "b", (char*)&basisflag, "Print only the basis in report."}, 1329 {OPT_FLAG, "c", (char*)&compress, "Don't compress the action table."}, 1330 {OPT_FLAG, "g", (char*)&rpflag, "Print grammar without actions."}, 1331 {OPT_FLAG, "m", (char*)&mhflag, "Output a makeheaders compatible file"}, 1332 {OPT_FLAG, "q", (char*)&quiet, "(Quiet) Don't print the report file."}, 1333 {OPT_FLAG, "s", (char*)&statistics, "Print parser stats to standard output."}, 1334 {OPT_FLAG, "x", (char*)&version, "Print the version number."}, 1335 {OPT_FLAG,0,0,0} 1336 }; 1337 int i; 1338 struct lemon lem; 1339 1340 OptInit(argv,options,stderr); 1341 if( version ){ 1342 printf("Lemon version 1.0\n"); 1343 exit(0); 1344 } 1345 if( OptNArgs()!=1 ){ 1346 fprintf(stderr,"Exactly one filename argument is required.\n"); 1347 exit(1); 1348 } 1349 lem.errorcnt = 0; 1350 1351 /* Initialize the machine */ 1352 Strsafe_init(); 1353 Symbol_init(); 1354 State_init(); 1355 lem.argv0 = argv[0]; 1356 lem.filename = OptArg(0); 1357 lem.basisflag = basisflag; 1358 lem.has_fallback = 0; 1359 lem.nconflict = 0; 1360 lem.name = lem.include = lem.arg = lem.tokentype = lem.start = 0; 1361 lem.vartype = 0; 1362 lem.stacksize = 0; 1363 lem.error = lem.overflow = lem.failure = lem.accept = lem.tokendest = 1364 lem.tokenprefix = lem.outname = lem.extracode = 0; 1365 lem.vardest = 0; 1366 lem.tablesize = 0; 1367 Symbol_new("$"); 1368 lem.errsym = Symbol_new("error"); 1369 1370 /* Parse the input file */ 1371 Parse(&lem); 1372 if( lem.errorcnt ) exit(lem.errorcnt); 1373 if( lem.rule==0 ){ 1374 fprintf(stderr,"Empty grammar.\n"); 1375 exit(1); 1376 } 1377 1378 /* Count and index the symbols of the grammar */ 1379 lem.nsymbol = Symbol_count(); 1380 Symbol_new("{default}"); 1381 lem.symbols = Symbol_arrayof(); 1382 for(i=0; i<=lem.nsymbol; i++) lem.symbols[i]->index = i; 1383 qsort(lem.symbols,lem.nsymbol+1,sizeof(struct symbol*), 1384 (int(*)())Symbolcmpp); 1385 for(i=0; i<=lem.nsymbol; i++) lem.symbols[i]->index = i; 1386 for(i=1; isupper(lem.symbols[i]->name[0]); i++); 1387 lem.nterminal = i; 1388 1389 /* Generate a reprint of the grammar, if requested on the command line */ 1390 if( rpflag ){ 1391 Reprint(&lem); 1392 }else{ 1393 /* Initialize the size for all follow and first sets */ 1394 SetSize(lem.nterminal); 1395 1396 /* Find the precedence for every production rule (that has one) */ 1397 FindRulePrecedences(&lem); 1398 1399 /* Compute the lambda-nonterminals and the first-sets for every 1400 ** nonterminal */ 1401 FindFirstSets(&lem); 1402 1403 /* Compute all LR(0) states. Also record follow-set propagation 1404 ** links so that the follow-set can be computed later */ 1405 lem.nstate = 0; 1406 FindStates(&lem); 1407 lem.sorted = State_arrayof(); 1408 1409 /* Tie up loose ends on the propagation links */ 1410 FindLinks(&lem); 1411 1412 /* Compute the follow set of every reducible configuration */ 1413 FindFollowSets(&lem); 1414 1415 /* Compute the action tables */ 1416 FindActions(&lem); 1417 1418 /* Compress the action tables */ 1419 if( compress==0 ) CompressTables(&lem); 1420 1421 /* Generate a report of the parser generated. (the "y.output" file) */ 1422 if( !quiet ) ReportOutput(&lem); 1423 1424 /* Generate the source code for the parser */ 1425 ReportTable(&lem, mhflag); 1426 1427 /* Produce a header file for use by the scanner. (This step is 1428 ** omitted if the "-m" option is used because makeheaders will 1429 ** generate the file for us.) */ 1430 if( !mhflag ) ReportHeader(&lem); 1431 } 1432 if( statistics ){ 1433 printf("Parser statistics: %d terminals, %d nonterminals, %d rules\n", 1434 lem.nterminal, lem.nsymbol - lem.nterminal, lem.nrule); 1435 printf(" %d states, %d parser table entries, %d conflicts\n", 1436 lem.nstate, lem.tablesize, lem.nconflict); 1437 } 1438 if( lem.nconflict ){ 1439 fprintf(stderr,"%d parsing conflicts.\n",lem.nconflict); 1440 } 1441 exit(lem.errorcnt + lem.nconflict); 1442 return (lem.errorcnt + lem.nconflict); 1443 } 1444 /******************** From the file "msort.c" *******************************/ 1445 /* 1446 ** A generic merge-sort program. 1447 ** 1448 ** USAGE: 1449 ** Let "ptr" be a pointer to some structure which is at the head of 1450 ** a null-terminated list. Then to sort the list call: 1451 ** 1452 ** ptr = msort(ptr,&(ptr->next),cmpfnc); 1453 ** 1454 ** In the above, "cmpfnc" is a pointer to a function which compares 1455 ** two instances of the structure and returns an integer, as in 1456 ** strcmp. The second argument is a pointer to the pointer to the 1457 ** second element of the linked list. This address is used to compute 1458 ** the offset to the "next" field within the structure. The offset to 1459 ** the "next" field must be constant for all structures in the list. 1460 ** 1461 ** The function returns a new pointer which is the head of the list 1462 ** after sorting. 1463 ** 1464 ** ALGORITHM: 1465 ** Merge-sort. 1466 */ 1467 1468 /* 1469 ** Return a pointer to the next structure in the linked list. 1470 */ 1471 #define NEXT(A) (*(char**)(((unsigned long)A)+offset)) 1472 1473 /* 1474 ** Inputs: 1475 ** a: A sorted, null-terminated linked list. (May be null). 1476 ** b: A sorted, null-terminated linked list. (May be null). 1477 ** cmp: A pointer to the comparison function. 1478 ** offset: Offset in the structure to the "next" field. 1479 ** 1480 ** Return Value: 1481 ** A pointer to the head of a sorted list containing the elements 1482 ** of both a and b. 1483 ** 1484 ** Side effects: 1485 ** The "next" pointers for elements in the lists a and b are 1486 ** changed. 1487 */ 1488 static char *merge(a,b,cmp,offset) 1489 char *a; 1490 char *b; 1491 int (*cmp)(); 1492 int offset; 1493 { 1494 char *ptr, *head; 1495 1496 if( a==0 ){ 1497 head = b; 1498 }else if( b==0 ){ 1499 head = a; 1500 }else{ 1501 if( (*cmp)(a,b)<0 ){ 1502 ptr = a; 1503 a = NEXT(a); 1504 }else{ 1505 ptr = b; 1506 b = NEXT(b); 1507 } 1508 head = ptr; 1509 while( a && b ){ 1510 if( (*cmp)(a,b)<0 ){ 1511 NEXT(ptr) = a; 1512 ptr = a; 1513 a = NEXT(a); 1514 }else{ 1515 NEXT(ptr) = b; 1516 ptr = b; 1517 b = NEXT(b); 1518 } 1519 } 1520 if( a ) NEXT(ptr) = a; 1521 else NEXT(ptr) = b; 1522 } 1523 return head; 1524 } 1525 1526 /* 1527 ** Inputs: 1528 ** list: Pointer to a singly-linked list of structures. 1529 ** next: Pointer to pointer to the second element of the list. 1530 ** cmp: A comparison function. 1531 ** 1532 ** Return Value: 1533 ** A pointer to the head of a sorted list containing the elements 1534 ** orginally in list. 1535 ** 1536 ** Side effects: 1537 ** The "next" pointers for elements in list are changed. 1538 */ 1539 #define LISTSIZE 30 1540 char *msort(list,next,cmp) 1541 char *list; 1542 char **next; 1543 int (*cmp)(); 1544 { 1545 unsigned long offset; 1546 char *ep; 1547 char *set[LISTSIZE]; 1548 int i; 1549 offset = (unsigned long)next - (unsigned long)list; 1550 for(i=0; i<LISTSIZE; i++) set[i] = 0; 1551 while( list ){ 1552 ep = list; 1553 list = NEXT(list); 1554 NEXT(ep) = 0; 1555 for(i=0; i<LISTSIZE-1 && set[i]!=0; i++){ 1556 ep = merge(ep,set[i],cmp,offset); 1557 set[i] = 0; 1558 } 1559 set[i] = ep; 1560 } 1561 ep = 0; 1562 for(i=0; i<LISTSIZE; i++) if( set[i] ) ep = merge(ep,set[i],cmp,offset); 1563 return ep; 1564 } 1565 /************************ From the file "option.c" **************************/ 1566 static char **argv; 1567 static struct s_options *op; 1568 static FILE *errstream; 1569 1570 #define ISOPT(X) ((X)[0]=='-'||(X)[0]=='+'||strchr((X),'=')!=0) 1571 1572 /* 1573 ** Print the command line with a carrot pointing to the k-th character 1574 ** of the n-th field. 1575 */ 1576 static void errline(n,k,err) 1577 int n; 1578 int k; 1579 FILE *err; 1580 { 1581 int spcnt, i; 1582 spcnt = 0; 1583 if( argv[0] ) fprintf(err,"%s",argv[0]); 1584 spcnt = strlen(argv[0]) + 1; 1585 for(i=1; i<n && argv[i]; i++){ 1586 fprintf(err," %s",argv[i]); 1587 spcnt += strlen(argv[i]+1); 1588 } 1589 spcnt += k; 1590 for(; argv[i]; i++) fprintf(err," %s",argv[i]); 1591 if( spcnt<20 ){ 1592 fprintf(err,"\n%*s^-- here\n",spcnt,""); 1593 }else{ 1594 fprintf(err,"\n%*shere --^\n",spcnt-7,""); 1595 } 1596 } 1597 1598 /* 1599 ** Return the index of the N-th non-switch argument. Return -1 1600 ** if N is out of range. 1601 */ 1602 static int argindex(n) 1603 int n; 1604 { 1605 int i; 1606 int dashdash = 0; 1607 if( argv!=0 && *argv!=0 ){ 1608 for(i=1; argv[i]; i++){ 1609 if( dashdash || !ISOPT(argv[i]) ){ 1610 if( n==0 ) return i; 1611 n--; 1612 } 1613 if( strcmp(argv[i],"--")==0 ) dashdash = 1; 1614 } 1615 } 1616 return -1; 1617 } 1618 1619 static char emsg[] = "Command line syntax error: "; 1620 1621 /* 1622 ** Process a flag command line argument. 1623 */ 1624 static int handleflags(i,err) 1625 int i; 1626 FILE *err; 1627 { 1628 int v; 1629 int errcnt = 0; 1630 int j; 1631 for(j=0; op[j].label; j++){ 1632 if( strcmp(&argv[i][1],op[j].label)==0 ) break; 1633 } 1634 v = argv[i][0]=='-' ? 1 : 0; 1635 if( op[j].label==0 ){ 1636 if( err ){ 1637 fprintf(err,"%sundefined option.\n",emsg); 1638 errline(i,1,err); 1639 } 1640 errcnt++; 1641 }else if( op[j].type==OPT_FLAG ){ 1642 *((int*)op[j].arg) = v; 1643 }else if( op[j].type==OPT_FFLAG ){ 1644 (*(void(*)())(op[j].arg))(v); 1645 }else{ 1646 if( err ){ 1647 fprintf(err,"%smissing argument on switch.\n",emsg); 1648 errline(i,1,err); 1649 } 1650 errcnt++; 1651 } 1652 return errcnt; 1653 } 1654 1655 /* 1656 ** Process a command line switch which has an argument. 1657 */ 1658 static int handleswitch(i,err) 1659 int i; 1660 FILE *err; 1661 { 1662 int lv = 0; 1663 double dv = 0.0; 1664 char *sv = 0, *end; 1665 char *cp; 1666 int j; 1667 int errcnt = 0; 1668 cp = strchr(argv[i],'='); 1669 *cp = 0; 1670 for(j=0; op[j].label; j++){ 1671 if( strcmp(argv[i],op[j].label)==0 ) break; 1672 } 1673 *cp = '='; 1674 if( op[j].label==0 ){ 1675 if( err ){ 1676 fprintf(err,"%sundefined option.\n",emsg); 1677 errline(i,0,err); 1678 } 1679 errcnt++; 1680 }else{ 1681 cp++; 1682 switch( op[j].type ){ 1683 case OPT_FLAG: 1684 case OPT_FFLAG: 1685 if( err ){ 1686 fprintf(err,"%soption requires an argument.\n",emsg); 1687 errline(i,0,err); 1688 } 1689 errcnt++; 1690 break; 1691 case OPT_DBL: 1692 case OPT_FDBL: 1693 dv = strtod(cp,&end); 1694 if( *end ){ 1695 if( err ){ 1696 fprintf(err,"%sillegal character in floating-point argument.\n",emsg); 1697 errline(i,((unsigned long)end)-(unsigned long)argv[i],err); 1698 } 1699 errcnt++; 1700 } 1701 break; 1702 case OPT_INT: 1703 case OPT_FINT: 1704 lv = strtol(cp,&end,0); 1705 if( *end ){ 1706 if( err ){ 1707 fprintf(err,"%sillegal character in integer argument.\n",emsg); 1708 errline(i,((unsigned long)end)-(unsigned long)argv[i],err); 1709 } 1710 errcnt++; 1711 } 1712 break; 1713 case OPT_STR: 1714 case OPT_FSTR: 1715 sv = cp; 1716 break; 1717 } 1718 switch( op[j].type ){ 1719 case OPT_FLAG: 1720 case OPT_FFLAG: 1721 break; 1722 case OPT_DBL: 1723 *(double*)(op[j].arg) = dv; 1724 break; 1725 case OPT_FDBL: 1726 (*(void(*)())(op[j].arg))(dv); 1727 break; 1728 case OPT_INT: 1729 *(int*)(op[j].arg) = lv; 1730 break; 1731 case OPT_FINT: 1732 (*(void(*)())(op[j].arg))((int)lv); 1733 break; 1734 case OPT_STR: 1735 *(char**)(op[j].arg) = sv; 1736 break; 1737 case OPT_FSTR: 1738 (*(void(*)())(op[j].arg))(sv); 1739 break; 1740 } 1741 } 1742 return errcnt; 1743 } 1744 1745 int OptInit(a,o,err) 1746 char **a; 1747 struct s_options *o; 1748 FILE *err; 1749 { 1750 int errcnt = 0; 1751 argv = a; 1752 op = o; 1753 errstream = err; 1754 if( argv && *argv && op ){ 1755 int i; 1756 for(i=1; argv[i]; i++){ 1757 if( argv[i][0]=='+' || argv[i][0]=='-' ){ 1758 errcnt += handleflags(i,err); 1759 }else if( strchr(argv[i],'=') ){ 1760 errcnt += handleswitch(i,err); 1761 } 1762 } 1763 } 1764 if( errcnt>0 ){ 1765 fprintf(err,"Valid command line options for \"%s\" are:\n",*a); 1766 OptPrint(); 1767 exit(1); 1768 } 1769 return 0; 1770 } 1771 1772 int OptNArgs(){ 1773 int cnt = 0; 1774 int dashdash = 0; 1775 int i; 1776 if( argv!=0 && argv[0]!=0 ){ 1777 for(i=1; argv[i]; i++){ 1778 if( dashdash || !ISOPT(argv[i]) ) cnt++; 1779 if( strcmp(argv[i],"--")==0 ) dashdash = 1; 1780 } 1781 } 1782 return cnt; 1783 } 1784 1785 char *OptArg(n) 1786 int n; 1787 { 1788 int i; 1789 i = argindex(n); 1790 return i>=0 ? argv[i] : 0; 1791 } 1792 1793 void OptErr(n) 1794 int n; 1795 { 1796 int i; 1797 i = argindex(n); 1798 if( i>=0 ) errline(i,0,errstream); 1799 } 1800 1801 void OptPrint(){ 1802 int i; 1803 int max, len; 1804 max = 0; 1805 for(i=0; op[i].label; i++){ 1806 len = strlen(op[i].label) + 1; 1807 switch( op[i].type ){ 1808 case OPT_FLAG: 1809 case OPT_FFLAG: 1810 break; 1811 case OPT_INT: 1812 case OPT_FINT: 1813 len += 9; /* length of "<integer>" */ 1814 break; 1815 case OPT_DBL: 1816 case OPT_FDBL: 1817 len += 6; /* length of "<real>" */ 1818 break; 1819 case OPT_STR: 1820 case OPT_FSTR: 1821 len += 8; /* length of "<string>" */ 1822 break; 1823 } 1824 if( len>max ) max = len; 1825 } 1826 for(i=0; op[i].label; i++){ 1827 switch( op[i].type ){ 1828 case OPT_FLAG: 1829 case OPT_FFLAG: 1830 fprintf(errstream," -%-*s %s\n",max,op[i].label,op[i].message); 1831 break; 1832 case OPT_INT: 1833 case OPT_FINT: 1834 fprintf(errstream," %s=<integer>%*s %s\n",op[i].label, 1835 (int)(max-strlen(op[i].label)-9),"",op[i].message); 1836 break; 1837 case OPT_DBL: 1838 case OPT_FDBL: 1839 fprintf(errstream," %s=<real>%*s %s\n",op[i].label, 1840 (int)(max-strlen(op[i].label)-6),"",op[i].message); 1841 break; 1842 case OPT_STR: 1843 case OPT_FSTR: 1844 fprintf(errstream," %s=<string>%*s %s\n",op[i].label, 1845 (int)(max-strlen(op[i].label)-8),"",op[i].message); 1846 break; 1847 } 1848 } 1849 } 1850 /*********************** From the file "parse.c" ****************************/ 1851 /* 1852 ** Input file parser for the LEMON parser generator. 1853 */ 1854 1855 /* The state of the parser */ 1856 struct pstate { 1857 char *filename; /* Name of the input file */ 1858 int tokenlineno; /* Linenumber at which current token starts */ 1859 int errorcnt; /* Number of errors so far */ 1860 char *tokenstart; /* Text of current token */ 1861 struct lemon *gp; /* Global state vector */ 1862 enum e_state { 1863 INITIALIZE, 1864 WAITING_FOR_DECL_OR_RULE, 1865 WAITING_FOR_DECL_KEYWORD, 1866 WAITING_FOR_DECL_ARG, 1867 WAITING_FOR_PRECEDENCE_SYMBOL, 1868 WAITING_FOR_ARROW, 1869 IN_RHS, 1870 LHS_ALIAS_1, 1871 LHS_ALIAS_2, 1872 LHS_ALIAS_3, 1873 RHS_ALIAS_1, 1874 RHS_ALIAS_2, 1875 PRECEDENCE_MARK_1, 1876 PRECEDENCE_MARK_2, 1877 RESYNC_AFTER_RULE_ERROR, 1878 RESYNC_AFTER_DECL_ERROR, 1879 WAITING_FOR_DESTRUCTOR_SYMBOL, 1880 WAITING_FOR_DATATYPE_SYMBOL, 1881 WAITING_FOR_FALLBACK_ID 1882 } state; /* The state of the parser */ 1883 struct symbol *fallback; /* The fallback token */ 1884 struct symbol *lhs; /* Left-hand side of current rule */ 1885 char *lhsalias; /* Alias for the LHS */ 1886 int nrhs; /* Number of right-hand side symbols seen */ 1887 struct symbol *rhs[MAXRHS]; /* RHS symbols */ 1888 char *alias[MAXRHS]; /* Aliases for each RHS symbol (or NULL) */ 1889 struct rule *prevrule; /* Previous rule parsed */ 1890 char *declkeyword; /* Keyword of a declaration */ 1891 char **declargslot; /* Where the declaration argument should be put */ 1892 int *decllnslot; /* Where the declaration linenumber is put */ 1893 enum e_assoc declassoc; /* Assign this association to decl arguments */ 1894 int preccounter; /* Assign this precedence to decl arguments */ 1895 struct rule *firstrule; /* Pointer to first rule in the grammar */ 1896 struct rule *lastrule; /* Pointer to the most recently parsed rule */ 1897 }; 1898 1899 /* Parse a single token */ 1900 static void parseonetoken(psp) 1901 struct pstate *psp; 1902 { 1903 char *x; 1904 x = Strsafe(psp->tokenstart); /* Save the token permanently */ 1905 #if 0 1906 printf("%s:%d: Token=[%s] state=%d\n",psp->filename,psp->tokenlineno, 1907 x,psp->state); 1908 #endif 1909 switch( psp->state ){ 1910 case INITIALIZE: 1911 psp->prevrule = 0; 1912 psp->preccounter = 0; 1913 psp->firstrule = psp->lastrule = 0; 1914 psp->gp->nrule = 0; 1915 /* FALLTHROUGH */ 1916 case WAITING_FOR_DECL_OR_RULE: 1917 if( x[0]=='%' ){ 1918 psp->state = WAITING_FOR_DECL_KEYWORD; 1919 }else if( islower(x[0]) ){ 1920 psp->lhs = Symbol_new(x); 1921 psp->nrhs = 0; 1922 psp->lhsalias = 0; 1923 psp->state = WAITING_FOR_ARROW; 1924 }else if( x[0]=='{' ){ 1925 if( psp->prevrule==0 ){ 1926 ErrorMsg(psp->filename,psp->tokenlineno, 1927 "There is not prior rule opon which to attach the code \ 1928 fragment which begins on this line."); 1929 psp->errorcnt++; 1930 }else if( psp->prevrule->code!=0 ){ 1931 ErrorMsg(psp->filename,psp->tokenlineno, 1932 "Code fragment beginning on this line is not the first \ 1933 to follow the previous rule."); 1934 psp->errorcnt++; 1935 }else{ 1936 psp->prevrule->line = psp->tokenlineno; 1937 psp->prevrule->code = &x[1]; 1938 } 1939 }else if( x[0]=='[' ){ 1940 psp->state = PRECEDENCE_MARK_1; 1941 }else{ 1942 ErrorMsg(psp->filename,psp->tokenlineno, 1943 "Token \"%s\" should be either \"%%\" or a nonterminal name.", 1944 x); 1945 psp->errorcnt++; 1946 } 1947 break; 1948 case PRECEDENCE_MARK_1: 1949 if( !isupper(x[0]) ){ 1950 ErrorMsg(psp->filename,psp->tokenlineno, 1951 "The precedence symbol must be a terminal."); 1952 psp->errorcnt++; 1953 }else if( psp->prevrule==0 ){ 1954 ErrorMsg(psp->filename,psp->tokenlineno, 1955 "There is no prior rule to assign precedence \"[%s]\".",x); 1956 psp->errorcnt++; 1957 }else if( psp->prevrule->precsym!=0 ){ 1958 ErrorMsg(psp->filename,psp->tokenlineno, 1959 "Precedence mark on this line is not the first \ 1960 to follow the previous rule."); 1961 psp->errorcnt++; 1962 }else{ 1963 psp->prevrule->precsym = Symbol_new(x); 1964 } 1965 psp->state = PRECEDENCE_MARK_2; 1966 break; 1967 case PRECEDENCE_MARK_2: 1968 if( x[0]!=']' ){ 1969 ErrorMsg(psp->filename,psp->tokenlineno, 1970 "Missing \"]\" on precedence mark."); 1971 psp->errorcnt++; 1972 } 1973 psp->state = WAITING_FOR_DECL_OR_RULE; 1974 break; 1975 case WAITING_FOR_ARROW: 1976 if( x[0]==':' && x[1]==':' && x[2]=='=' ){ 1977 psp->state = IN_RHS; 1978 }else if( x[0]=='(' ){ 1979 psp->state = LHS_ALIAS_1; 1980 }else{ 1981 ErrorMsg(psp->filename,psp->tokenlineno, 1982 "Expected to see a \":\" following the LHS symbol \"%s\".", 1983 psp->lhs->name); 1984 psp->errorcnt++; 1985 psp->state = RESYNC_AFTER_RULE_ERROR; 1986 } 1987 break; 1988 case LHS_ALIAS_1: 1989 if( isalpha(x[0]) ){ 1990 psp->lhsalias = x; 1991 psp->state = LHS_ALIAS_2; 1992 }else{ 1993 ErrorMsg(psp->filename,psp->tokenlineno, 1994 "\"%s\" is not a valid alias for the LHS \"%s\"\n", 1995 x,psp->lhs->name); 1996 psp->errorcnt++; 1997 psp->state = RESYNC_AFTER_RULE_ERROR; 1998 } 1999 break; 2000 case LHS_ALIAS_2: 2001 if( x[0]==')' ){ 2002 psp->state = LHS_ALIAS_3; 2003 }else{ 2004 ErrorMsg(psp->filename,psp->tokenlineno, 2005 "Missing \")\" following LHS alias name \"%s\".",psp->lhsalias); 2006 psp->errorcnt++; 2007 psp->state = RESYNC_AFTER_RULE_ERROR; 2008 } 2009 break; 2010 case LHS_ALIAS_3: 2011 if( x[0]==':' && x[1]==':' && x[2]=='=' ){ 2012 psp->state = IN_RHS; 2013 }else{ 2014 ErrorMsg(psp->filename,psp->tokenlineno, 2015 "Missing \"->\" following: \"%s(%s)\".", 2016 psp->lhs->name,psp->lhsalias); 2017 psp->errorcnt++; 2018 psp->state = RESYNC_AFTER_RULE_ERROR; 2019 } 2020 break; 2021 case IN_RHS: 2022 if( x[0]=='.' ){ 2023 struct rule *rp; 2024 rp = (struct rule *)malloc( sizeof(struct rule) + 2025 sizeof(struct symbol*)*psp->nrhs + sizeof(char*)*psp->nrhs ); 2026 if( rp==0 ){ 2027 ErrorMsg(psp->filename,psp->tokenlineno, 2028 "Can't allocate enough memory for this rule."); 2029 psp->errorcnt++; 2030 psp->prevrule = 0; 2031 }else{ 2032 int i; 2033 rp->ruleline = psp->tokenlineno; 2034 rp->rhs = (struct symbol**)&rp[1]; 2035 rp->rhsalias = (char**)&(rp->rhs[psp->nrhs]); 2036 for(i=0; i<psp->nrhs; i++){ 2037 rp->rhs[i] = psp->rhs[i]; 2038 rp->rhsalias[i] = psp->alias[i]; 2039 } 2040 rp->lhs = psp->lhs; 2041 rp->lhsalias = psp->lhsalias; 2042 rp->nrhs = psp->nrhs; 2043 rp->code = 0; 2044 rp->precsym = 0; 2045 rp->index = psp->gp->nrule++; 2046 rp->nextlhs = rp->lhs->rule; 2047 rp->lhs->rule = rp; 2048 rp->next = 0; 2049 if( psp->firstrule==0 ){ 2050 psp->firstrule = psp->lastrule = rp; 2051 }else{ 2052 psp->lastrule->next = rp; 2053 psp->lastrule = rp; 2054 } 2055 psp->prevrule = rp; 2056 } 2057 psp->state = WAITING_FOR_DECL_OR_RULE; 2058 }else if( isalpha(x[0]) ){ 2059 if( psp->nrhs>=MAXRHS ){ 2060 ErrorMsg(psp->filename,psp->tokenlineno, 2061 "Too many symbol on RHS or rule beginning at \"%s\".", 2062 x); 2063 psp->errorcnt++; 2064 psp->state = RESYNC_AFTER_RULE_ERROR; 2065 }else{ 2066 psp->rhs[psp->nrhs] = Symbol_new(x); 2067 psp->alias[psp->nrhs] = 0; 2068 psp->nrhs++; 2069 } 2070 }else if( x[0]=='(' && psp->nrhs>0 ){ 2071 psp->state = RHS_ALIAS_1; 2072 }else{ 2073 ErrorMsg(psp->filename,psp->tokenlineno, 2074 "Illegal character on RHS of rule: \"%s\".",x); 2075 psp->errorcnt++; 2076 psp->state = RESYNC_AFTER_RULE_ERROR; 2077 } 2078 break; 2079 case RHS_ALIAS_1: 2080 if( isalpha(x[0]) ){ 2081 psp->alias[psp->nrhs-1] = x; 2082 psp->state = RHS_ALIAS_2; 2083 }else{ 2084 ErrorMsg(psp->filename,psp->tokenlineno, 2085 "\"%s\" is not a valid alias for the RHS symbol \"%s\"\n", 2086 x,psp->rhs[psp->nrhs-1]->name); 2087 psp->errorcnt++; 2088 psp->state = RESYNC_AFTER_RULE_ERROR; 2089 } 2090 break; 2091 case RHS_ALIAS_2: 2092 if( x[0]==')' ){ 2093 psp->state = IN_RHS; 2094 }else{ 2095 ErrorMsg(psp->filename,psp->tokenlineno, 2096 "Missing \")\" following LHS alias name \"%s\".",psp->lhsalias); 2097 psp->errorcnt++; 2098 psp->state = RESYNC_AFTER_RULE_ERROR; 2099 } 2100 break; 2101 case WAITING_FOR_DECL_KEYWORD: 2102 if( isalpha(x[0]) ){ 2103 psp->declkeyword = x; 2104 psp->declargslot = 0; 2105 psp->decllnslot = 0; 2106 psp->state = WAITING_FOR_DECL_ARG; 2107 if( strcmp(x,"name")==0 ){ 2108 psp->declargslot = &(psp->gp->name); 2109 }else if( strcmp(x,"include")==0 ){ 2110 psp->declargslot = &(psp->gp->include); 2111 psp->decllnslot = &psp->gp->includeln; 2112 }else if( strcmp(x,"code")==0 ){ 2113 psp->declargslot = &(psp->gp->extracode); 2114 psp->decllnslot = &psp->gp->extracodeln; 2115 }else if( strcmp(x,"token_destructor")==0 ){ 2116 psp->declargslot = &psp->gp->tokendest; 2117 psp->decllnslot = &psp->gp->tokendestln; 2118 }else if( strcmp(x,"default_destructor")==0 ){ 2119 psp->declargslot = &psp->gp->vardest; 2120 psp->decllnslot = &psp->gp->vardestln; 2121 }else if( strcmp(x,"token_prefix")==0 ){ 2122 psp->declargslot = &psp->gp->tokenprefix; 2123 }else if( strcmp(x,"syntax_error")==0 ){ 2124 psp->declargslot = &(psp->gp->error); 2125 psp->decllnslot = &psp->gp->errorln; 2126 }else if( strcmp(x,"parse_accept")==0 ){ 2127 psp->declargslot = &(psp->gp->accept); 2128 psp->decllnslot = &psp->gp->acceptln; 2129 }else if( strcmp(x,"parse_failure")==0 ){ 2130 psp->declargslot = &(psp->gp->failure); 2131 psp->decllnslot = &psp->gp->failureln; 2132 }else if( strcmp(x,"stack_overflow")==0 ){ 2133 psp->declargslot = &(psp->gp->overflow); 2134 psp->decllnslot = &psp->gp->overflowln; 2135 }else if( strcmp(x,"extra_argument")==0 ){ 2136 psp->declargslot = &(psp->gp->arg); 2137 }else if( strcmp(x,"token_type")==0 ){ 2138 psp->declargslot = &(psp->gp->tokentype); 2139 }else if( strcmp(x,"default_type")==0 ){ 2140 psp->declargslot = &(psp->gp->vartype); 2141 }else if( strcmp(x,"stack_size")==0 ){ 2142 psp->declargslot = &(psp->gp->stacksize); 2143 }else if( strcmp(x,"start_symbol")==0 ){ 2144 psp->declargslot = &(psp->gp->start); 2145 }else if( strcmp(x,"left")==0 ){ 2146 psp->preccounter++; 2147 psp->declassoc = LEFT; 2148 psp->state = WAITING_FOR_PRECEDENCE_SYMBOL; 2149 }else if( strcmp(x,"right")==0 ){ 2150 psp->preccounter++; 2151 psp->declassoc = RIGHT; 2152 psp->state = WAITING_FOR_PRECEDENCE_SYMBOL; 2153 }else if( strcmp(x,"nonassoc")==0 ){ 2154 psp->preccounter++; 2155 psp->declassoc = NONE; 2156 psp->state = WAITING_FOR_PRECEDENCE_SYMBOL; 2157 }else if( strcmp(x,"destructor")==0 ){ 2158 psp->state = WAITING_FOR_DESTRUCTOR_SYMBOL; 2159 }else if( strcmp(x,"type")==0 ){ 2160 psp->state = WAITING_FOR_DATATYPE_SYMBOL; 2161 }else if( strcmp(x,"fallback")==0 ){ 2162 psp->fallback = 0; 2163 psp->state = WAITING_FOR_FALLBACK_ID; 2164 }else{ 2165 ErrorMsg(psp->filename,psp->tokenlineno, 2166 "Unknown declaration keyword: \"%%%s\".",x); 2167 psp->errorcnt++; 2168 psp->state = RESYNC_AFTER_DECL_ERROR; 2169 } 2170 }else{ 2171 ErrorMsg(psp->filename,psp->tokenlineno, 2172 "Illegal declaration keyword: \"%s\".",x); 2173 psp->errorcnt++; 2174 psp->state = RESYNC_AFTER_DECL_ERROR; 2175 } 2176 break; 2177 case WAITING_FOR_DESTRUCTOR_SYMBOL: 2178 if( !isalpha(x[0]) ){ 2179 ErrorMsg(psp->filename,psp->tokenlineno, 2180 "Symbol name missing after %destructor keyword"); 2181 psp->errorcnt++; 2182 psp->state = RESYNC_AFTER_DECL_ERROR; 2183 }else{ 2184 struct symbol *sp = Symbol_new(x); 2185 psp->declargslot = &sp->destructor; 2186 psp->decllnslot = &sp->destructorln; 2187 psp->state = WAITING_FOR_DECL_ARG; 2188 } 2189 break; 2190 case WAITING_FOR_DATATYPE_SYMBOL: 2191 if( !isalpha(x[0]) ){ 2192 ErrorMsg(psp->filename,psp->tokenlineno, 2193 "Symbol name missing after %destructor keyword"); 2194 psp->errorcnt++; 2195 psp->state = RESYNC_AFTER_DECL_ERROR; 2196 }else{ 2197 struct symbol *sp = Symbol_new(x); 2198 psp->declargslot = &sp->datatype; 2199 psp->decllnslot = 0; 2200 psp->state = WAITING_FOR_DECL_ARG; 2201 } 2202 break; 2203 case WAITING_FOR_PRECEDENCE_SYMBOL: 2204 if( x[0]=='.' ){ 2205 psp->state = WAITING_FOR_DECL_OR_RULE; 2206 }else if( isupper(x[0]) ){ 2207 struct symbol *sp; 2208 sp = Symbol_new(x); 2209 if( sp->prec>=0 ){ 2210 ErrorMsg(psp->filename,psp->tokenlineno, 2211 "Symbol \"%s\" has already be given a precedence.",x); 2212 psp->errorcnt++; 2213 }else{ 2214 sp->prec = psp->preccounter; 2215 sp->assoc = psp->declassoc; 2216 } 2217 }else{ 2218 ErrorMsg(psp->filename,psp->tokenlineno, 2219 "Can't assign a precedence to \"%s\".",x); 2220 psp->errorcnt++; 2221 } 2222 break; 2223 case WAITING_FOR_DECL_ARG: 2224 if( (x[0]=='{' || x[0]=='\"' || isalnum(x[0])) ){ 2225 if( *(psp->declargslot)!=0 ){ 2226 ErrorMsg(psp->filename,psp->tokenlineno, 2227 "The argument \"%s\" to declaration \"%%%s\" is not the first.", 2228 x[0]=='\"' ? &x[1] : x,psp->declkeyword); 2229 psp->errorcnt++; 2230 psp->state = RESYNC_AFTER_DECL_ERROR; 2231 }else{ 2232 *(psp->declargslot) = (x[0]=='\"' || x[0]=='{') ? &x[1] : x; 2233 if( psp->decllnslot ) *psp->decllnslot = psp->tokenlineno; 2234 psp->state = WAITING_FOR_DECL_OR_RULE; 2235 } 2236 }else{ 2237 ErrorMsg(psp->filename,psp->tokenlineno, 2238 "Illegal argument to %%%s: %s",psp->declkeyword,x); 2239 psp->errorcnt++; 2240 psp->state = RESYNC_AFTER_DECL_ERROR; 2241 } 2242 break; 2243 case WAITING_FOR_FALLBACK_ID: 2244 if( x[0]=='.' ){ 2245 psp->state = WAITING_FOR_DECL_OR_RULE; 2246 }else if( !isupper(x[0]) ){ 2247 ErrorMsg(psp->filename, psp->tokenlineno, 2248 "%%fallback argument \"%s\" should be a token", x); 2249 psp->errorcnt++; 2250 }else{ 2251 struct symbol *sp = Symbol_new(x); 2252 if( psp->fallback==0 ){ 2253 psp->fallback = sp; 2254 }else if( sp->fallback ){ 2255 ErrorMsg(psp->filename, psp->tokenlineno, 2256 "More than one fallback assigned to token %s", x); 2257 psp->errorcnt++; 2258 }else{ 2259 sp->fallback = psp->fallback; 2260 psp->gp->has_fallback = 1; 2261 } 2262 } 2263 break; 2264 case RESYNC_AFTER_RULE_ERROR: 2265 /* if( x[0]=='.' ) psp->state = WAITING_FOR_DECL_OR_RULE; 2266 ** break; */ 2267 case RESYNC_AFTER_DECL_ERROR: 2268 if( x[0]=='.' ) psp->state = WAITING_FOR_DECL_OR_RULE; 2269 if( x[0]=='%' ) psp->state = WAITING_FOR_DECL_KEYWORD; 2270 break; 2271 } 2272 } 2273 2274 /* In spite of its name, this function is really a scanner. It read 2275 ** in the entire input file (all at once) then tokenizes it. Each 2276 ** token is passed to the function "parseonetoken" which builds all 2277 ** the appropriate data structures in the global state vector "gp". 2278 */ 2279 void Parse(gp) 2280 struct lemon *gp; 2281 { 2282 struct pstate ps; 2283 FILE *fp; 2284 char *filebuf; 2285 int filesize; 2286 int lineno; 2287 int c; 2288 char *cp, *nextcp; 2289 int startline = 0; 2290 2291 ps.gp = gp; 2292 ps.filename = gp->filename; 2293 ps.errorcnt = 0; 2294 ps.state = INITIALIZE; 2295 2296 /* Begin by reading the input file */ 2297 fp = fopen(ps.filename,"rb"); 2298 if( fp==0 ){ 2299 ErrorMsg(ps.filename,0,"Can't open this file for reading."); 2300 gp->errorcnt++; 2301 return; 2302 } 2303 fseek(fp,0,2); 2304 filesize = ftell(fp); 2305 rewind(fp); 2306 filebuf = (char *)malloc( filesize+1 ); 2307 if( filebuf==0 ){ 2308 ErrorMsg(ps.filename,0,"Can't allocate %d of memory to hold this file.", 2309 filesize+1); 2310 gp->errorcnt++; 2311 return; 2312 } 2313 if( fread(filebuf,1,filesize,fp)!=filesize ){ 2314 ErrorMsg(ps.filename,0,"Can't read in all %d bytes of this file.", 2315 filesize); 2316 free(filebuf); 2317 gp->errorcnt++; 2318 return; 2319 } 2320 fclose(fp); 2321 filebuf[filesize] = 0; 2322 2323 /* Now scan the text of the input file */ 2324 lineno = 1; 2325 for(cp=filebuf; (c= *cp)!=0; ){ 2326 if( c=='\n' ) lineno++; /* Keep track of the line number */ 2327 if( isspace(c) ){ cp++; continue; } /* Skip all white space */ 2328 if( c=='/' && cp[1]=='/' ){ /* Skip C++ style comments */ 2329 cp+=2; 2330 while( (c= *cp)!=0 && c!='\n' ) cp++; 2331 continue; 2332 } 2333 if( c=='/' && cp[1]=='*' ){ /* Skip C style comments */ 2334 cp+=2; 2335 while( (c= *cp)!=0 && (c!='/' || cp[-1]!='*') ){ 2336 if( c=='\n' ) lineno++; 2337 cp++; 2338 } 2339 if( c ) cp++; 2340 continue; 2341 } 2342 ps.tokenstart = cp; /* Mark the beginning of the token */ 2343 ps.tokenlineno = lineno; /* Linenumber on which token begins */ 2344 if( c=='\"' ){ /* String literals */ 2345 cp++; 2346 while( (c= *cp)!=0 && c!='\"' ){ 2347 if( c=='\n' ) lineno++; 2348 cp++; 2349 } 2350 if( c==0 ){ 2351 ErrorMsg(ps.filename,startline, 2352 "String starting on this line is not terminated before the end of the file."); 2353 ps.errorcnt++; 2354 nextcp = cp; 2355 }else{ 2356 nextcp = cp+1; 2357 } 2358 }else if( c=='{' ){ /* A block of C code */ 2359 int level; 2360 cp++; 2361 for(level=1; (c= *cp)!=0 && (level>1 || c!='}'); cp++){ 2362 if( c=='\n' ) lineno++; 2363 else if( c=='{' ) level++; 2364 else if( c=='}' ) level--; 2365 else if( c=='/' && cp[1]=='*' ){ /* Skip comments */ 2366 int prevc; 2367 cp = &cp[2]; 2368 prevc = 0; 2369 while( (c= *cp)!=0 && (c!='/' || prevc!='*') ){ 2370 if( c=='\n' ) lineno++; 2371 prevc = c; 2372 cp++; 2373 } 2374 }else if( c=='/' && cp[1]=='/' ){ /* Skip C++ style comments too */ 2375 cp = &cp[2]; 2376 while( (c= *cp)!=0 && c!='\n' ) cp++; 2377 if( c ) lineno++; 2378 }else if( c=='\'' || c=='\"' ){ /* String a character literals */ 2379 int startchar, prevc; 2380 startchar = c; 2381 prevc = 0; 2382 for(cp++; (c= *cp)!=0 && (c!=startchar || prevc=='\\'); cp++){ 2383 if( c=='\n' ) lineno++; 2384 if( prevc=='\\' ) prevc = 0; 2385 else prevc = c; 2386 } 2387 } 2388 } 2389 if( c==0 ){ 2390 ErrorMsg(ps.filename,ps.tokenlineno, 2391 "C code starting on this line is not terminated before the end of the file."); 2392 ps.errorcnt++; 2393 nextcp = cp; 2394 }else{ 2395 nextcp = cp+1; 2396 } 2397 }else if( isalnum(c) ){ /* Identifiers */ 2398 while( (c= *cp)!=0 && (isalnum(c) || c=='_') ) cp++; 2399 nextcp = cp; 2400 }else if( c==':' && cp[1]==':' && cp[2]=='=' ){ /* The operator "::=" */ 2401 cp += 3; 2402 nextcp = cp; 2403 }else{ /* All other (one character) operators */ 2404 cp++; 2405 nextcp = cp; 2406 } 2407 c = *cp; 2408 *cp = 0; /* Null terminate the token */ 2409 parseonetoken(&ps); /* Parse the token */ 2410 *cp = c; /* Restore the buffer */ 2411 cp = nextcp; 2412 } 2413 free(filebuf); /* Release the buffer after parsing */ 2414 gp->rule = ps.firstrule; 2415 gp->errorcnt = ps.errorcnt; 2416 } 2417 /*************************** From the file "plink.c" *********************/ 2418 /* 2419 ** Routines processing configuration follow-set propagation links 2420 ** in the LEMON parser generator. 2421 */ 2422 static struct plink *plink_freelist = 0; 2423 2424 /* Allocate a new plink */ 2425 struct plink *Plink_new(){ 2426 struct plink *new; 2427 2428 if( plink_freelist==0 ){ 2429 int i; 2430 int amt = 100; 2431 plink_freelist = (struct plink *)malloc( sizeof(struct plink)*amt ); 2432 if( plink_freelist==0 ){ 2433 fprintf(stderr, 2434 "Unable to allocate memory for a new follow-set propagation link.\n"); 2435 exit(1); 2436 } 2437 for(i=0; i<amt-1; i++) plink_freelist[i].next = &plink_freelist[i+1]; 2438 plink_freelist[amt-1].next = 0; 2439 } 2440 new = plink_freelist; 2441 plink_freelist = plink_freelist->next; 2442 return new; 2443 } 2444 2445 /* Add a plink to a plink list */ 2446 void Plink_add(plpp,cfp) 2447 struct plink **plpp; 2448 struct config *cfp; 2449 { 2450 struct plink *new; 2451 new = Plink_new(); 2452 new->next = *plpp; 2453 *plpp = new; 2454 new->cfp = cfp; 2455 } 2456 2457 /* Transfer every plink on the list "from" to the list "to" */ 2458 void Plink_copy(to,from) 2459 struct plink **to; 2460 struct plink *from; 2461 { 2462 struct plink *nextpl; 2463 while( from ){ 2464 nextpl = from->next; 2465 from->next = *to; 2466 *to = from; 2467 from = nextpl; 2468 } 2469 } 2470 2471 /* Delete every plink on the list */ 2472 void Plink_delete(plp) 2473 struct plink *plp; 2474 { 2475 struct plink *nextpl; 2476 2477 while( plp ){ 2478 nextpl = plp->next; 2479 plp->next = plink_freelist; 2480 plink_freelist = plp; 2481 plp = nextpl; 2482 } 2483 } 2484 /*********************** From the file "report.c" **************************/ 2485 /* 2486 ** Procedures for generating reports and tables in the LEMON parser generator. 2487 */ 2488 2489 /* Generate a filename with the given suffix. Space to hold the 2490 ** name comes from malloc() and must be freed by the calling 2491 ** function. 2492 */ 2493 PRIVATE char *file_makename(lemp,suffix) 2494 struct lemon *lemp; 2495 char *suffix; 2496 { 2497 char *name; 2498 char *cp; 2499 2500 name = malloc( strlen(lemp->filename) + strlen(suffix) + 5 ); 2501 if( name==0 ){ 2502 fprintf(stderr,"Can't allocate space for a filename.\n"); 2503 exit(1); 2504 } 2505 strcpy(name,lemp->filename); 2506 cp = strrchr(name,'.'); 2507 if( cp ) *cp = 0; 2508 strcat(name,suffix); 2509 return name; 2510 } 2511 2512 /* Open a file with a name based on the name of the input file, 2513 ** but with a different (specified) suffix, and return a pointer 2514 ** to the stream */ 2515 PRIVATE FILE *file_open(lemp,suffix,mode) 2516 struct lemon *lemp; 2517 char *suffix; 2518 char *mode; 2519 { 2520 FILE *fp; 2521 2522 if( lemp->outname ) free(lemp->outname); 2523 lemp->outname = file_makename(lemp, suffix); 2524 fp = fopen(lemp->outname,mode); 2525 if( fp==0 && *mode=='w' ){ 2526 fprintf(stderr,"Can't open file \"%s\".\n",lemp->outname); 2527 lemp->errorcnt++; 2528 return 0; 2529 } 2530 return fp; 2531 } 2532 2533 /* Duplicate the input file without comments and without actions 2534 ** on rules */ 2535 void Reprint(lemp) 2536 struct lemon *lemp; 2537 { 2538 struct rule *rp; 2539 struct symbol *sp; 2540 int i, j, maxlen, len, ncolumns, skip; 2541 printf("// Reprint of input file \"%s\".\n// Symbols:\n",lemp->filename); 2542 maxlen = 10; 2543 for(i=0; i<lemp->nsymbol; i++){ 2544 sp = lemp->symbols[i]; 2545 len = strlen(sp->name); 2546 if( len>maxlen ) maxlen = len; 2547 } 2548 ncolumns = 76/(maxlen+5); 2549 if( ncolumns<1 ) ncolumns = 1; 2550 skip = (lemp->nsymbol + ncolumns - 1)/ncolumns; 2551 for(i=0; i<skip; i++){ 2552 printf("//"); 2553 for(j=i; j<lemp->nsymbol; j+=skip){ 2554 sp = lemp->symbols[j]; 2555 assert( sp->index==j ); 2556 printf(" %3d %-*.*s",j,maxlen,maxlen,sp->name); 2557 } 2558 printf("\n"); 2559 } 2560 for(rp=lemp->rule; rp; rp=rp->next){ 2561 printf("%s",rp->lhs->name); 2562 /* if( rp->lhsalias ) printf("(%s)",rp->lhsalias); */ 2563 printf(" ::="); 2564 for(i=0; i<rp->nrhs; i++){ 2565 printf(" %s",rp->rhs[i]->name); 2566 /* if( rp->rhsalias[i] ) printf("(%s)",rp->rhsalias[i]); */ 2567 } 2568 printf("."); 2569 if( rp->precsym ) printf(" [%s]",rp->precsym->name); 2570 /* if( rp->code ) printf("\n %s",rp->code); */ 2571 printf("\n"); 2572 } 2573 } 2574 2575 void ConfigPrint(fp,cfp) 2576 FILE *fp; 2577 struct config *cfp; 2578 { 2579 struct rule *rp; 2580 int i; 2581 rp = cfp->rp; 2582 fprintf(fp,"%s ::=",rp->lhs->name); 2583 for(i=0; i<=rp->nrhs; i++){ 2584 if( i==cfp->dot ) fprintf(fp," *"); 2585 if( i==rp->nrhs ) break; 2586 fprintf(fp," %s",rp->rhs[i]->name); 2587 } 2588 } 2589 2590 /* #define TEST */ 2591 #ifdef TEST 2592 /* Print a set */ 2593 PRIVATE void SetPrint(out,set,lemp) 2594 FILE *out; 2595 char *set; 2596 struct lemon *lemp; 2597 { 2598 int i; 2599 char *spacer; 2600 spacer = ""; 2601 fprintf(out,"%12s[",""); 2602 for(i=0; i<lemp->nterminal; i++){ 2603 if( SetFind(set,i) ){ 2604 fprintf(out,"%s%s",spacer,lemp->symbols[i]->name); 2605 spacer = " "; 2606 } 2607 } 2608 fprintf(out,"]\n"); 2609 } 2610 2611 /* Print a plink chain */ 2612 PRIVATE void PlinkPrint(out,plp,tag) 2613 FILE *out; 2614 struct plink *plp; 2615 char *tag; 2616 { 2617 while( plp ){ 2618 fprintf(out,"%12s%s (state %2d) ","",tag,plp->cfp->stp->index); 2619 ConfigPrint(out,plp->cfp); 2620 fprintf(out,"\n"); 2621 plp = plp->next; 2622 } 2623 } 2624 #endif 2625 2626 /* Print an action to the given file descriptor. Return FALSE if 2627 ** nothing was actually printed. 2628 */ 2629 int PrintAction(struct action *ap, FILE *fp, int indent){ 2630 int result = 1; 2631 switch( ap->type ){ 2632 case SHIFT: 2633 fprintf(fp,"%*s shift %d",indent,ap->sp->name,ap->x.stp->index); 2634 break; 2635 case REDUCE: 2636 fprintf(fp,"%*s reduce %d",indent,ap->sp->name,ap->x.rp->index); 2637 break; 2638 case ACCEPT: 2639 fprintf(fp,"%*s accept",indent,ap->sp->name); 2640 break; 2641 case ERROR: 2642 fprintf(fp,"%*s error",indent,ap->sp->name); 2643 break; 2644 case CONFLICT: 2645 fprintf(fp,"%*s reduce %-3d ** Parsing conflict **", 2646 indent,ap->sp->name,ap->x.rp->index); 2647 break; 2648 case SH_RESOLVED: 2649 case RD_RESOLVED: 2650 case NOT_USED: 2651 result = 0; 2652 break; 2653 } 2654 return result; 2655 } 2656 2657 /* Generate the "y.output" log file */ 2658 void ReportOutput(lemp) 2659 struct lemon *lemp; 2660 { 2661 int i; 2662 struct state *stp; 2663 struct config *cfp; 2664 struct action *ap; 2665 FILE *fp; 2666 2667 fp = file_open(lemp,".out","w"); 2668 if( fp==0 ) return; 2669 fprintf(fp," \b"); 2670 for(i=0; i<lemp->nstate; i++){ 2671 stp = lemp->sorted[i]; 2672 fprintf(fp,"State %d:\n",stp->index); 2673 if( lemp->basisflag ) cfp=stp->bp; 2674 else cfp=stp->cfp; 2675 while( cfp ){ 2676 char buf[20]; 2677 if( cfp->dot==cfp->rp->nrhs ){ 2678 sprintf(buf,"(%d)",cfp->rp->index); 2679 fprintf(fp," %5s ",buf); 2680 }else{ 2681 fprintf(fp," "); 2682 } 2683 ConfigPrint(fp,cfp); 2684 fprintf(fp,"\n"); 2685 #ifdef TEST 2686 SetPrint(fp,cfp->fws,lemp); 2687 PlinkPrint(fp,cfp->fplp,"To "); 2688 PlinkPrint(fp,cfp->bplp,"From"); 2689 #endif 2690 if( lemp->basisflag ) cfp=cfp->bp; 2691 else cfp=cfp->next; 2692 } 2693 fprintf(fp,"\n"); 2694 for(ap=stp->ap; ap; ap=ap->next){ 2695 if( PrintAction(ap,fp,30) ) fprintf(fp,"\n"); 2696 } 2697 fprintf(fp,"\n"); 2698 } 2699 fclose(fp); 2700 return; 2701 } 2702 2703 /* Search for the file "name" which is in the same directory as 2704 ** the exacutable */ 2705 PRIVATE char *pathsearch(argv0,name,modemask) 2706 char *argv0; 2707 char *name; 2708 int modemask; 2709 { 2710 char *pathlist; 2711 char *path,*cp; 2712 char c; 2713 extern int access(); 2714 2715 #ifdef __WIN32__ 2716 cp = strrchr(argv0,'\\'); 2717 #else 2718 cp = strrchr(argv0,'/'); 2719 #endif 2720 if( cp ){ 2721 c = *cp; 2722 *cp = 0; 2723 path = (char *)malloc( strlen(argv0) + strlen(name) + 2 ); 2724 if( path ) sprintf(path,"%s/%s",argv0,name); 2725 *cp = c; 2726 }else{ 2727 extern char *getenv(); 2728 pathlist = getenv("PATH"); 2729 if( pathlist==0 ) pathlist = ".:/bin:/usr/bin"; 2730 path = (char *)malloc( strlen(pathlist)+strlen(name)+2 ); 2731 if( path!=0 ){ 2732 while( *pathlist ){ 2733 cp = strchr(pathlist,':'); 2734 if( cp==0 ) cp = &pathlist[strlen(pathlist)]; 2735 c = *cp; 2736 *cp = 0; 2737 sprintf(path,"%s/%s",pathlist,name); 2738 *cp = c; 2739 if( c==0 ) pathlist = ""; 2740 else pathlist = &cp[1]; 2741 if( access(path,modemask)==0 ) break; 2742 } 2743 } 2744 } 2745 return path; 2746 } 2747 2748 /* Given an action, compute the integer value for that action 2749 ** which is to be put in the action table of the generated machine. 2750 ** Return negative if no action should be generated. 2751 */ 2752 PRIVATE int compute_action(lemp,ap) 2753 struct lemon *lemp; 2754 struct action *ap; 2755 { 2756 int act; 2757 switch( ap->type ){ 2758 case SHIFT: act = ap->x.stp->index; break; 2759 case REDUCE: act = ap->x.rp->index + lemp->nstate; break; 2760 case ERROR: act = lemp->nstate + lemp->nrule; break; 2761 case ACCEPT: act = lemp->nstate + lemp->nrule + 1; break; 2762 default: act = -1; break; 2763 } 2764 return act; 2765 } 2766 2767 #define LINESIZE 1000 2768 /* The next cluster of routines are for reading the template file 2769 ** and writing the results to the generated parser */ 2770 /* The first function transfers data from "in" to "out" until 2771 ** a line is seen which begins with "%%". The line number is 2772 ** tracked. 2773 ** 2774 ** if name!=0, then any word that begin with "Parse" is changed to 2775 ** begin with *name instead. 2776 */ 2777 PRIVATE void tplt_xfer(name,in,out,lineno) 2778 char *name; 2779 FILE *in; 2780 FILE *out; 2781 int *lineno; 2782 { 2783 int i, iStart; 2784 char line[LINESIZE]; 2785 while( fgets(line,LINESIZE,in) && (line[0]!='%' || line[1]!='%') ){ 2786 (*lineno)++; 2787 iStart = 0; 2788 if( name ){ 2789 for(i=0; line[i]; i++){ 2790 if( line[i]=='P' && strncmp(&line[i],"Parse",5)==0 2791 && (i==0 || !isalpha(line[i-1])) 2792 ){ 2793 if( i>iStart ) fprintf(out,"%.*s",i-iStart,&line[iStart]); 2794 fprintf(out,"%s",name); 2795 i += 4; 2796 iStart = i+1; 2797 } 2798 } 2799 } 2800 fprintf(out,"%s",&line[iStart]); 2801 } 2802 } 2803 2804 /* The next function finds the template file and opens it, returning 2805 ** a pointer to the opened file. */ 2806 PRIVATE FILE *tplt_open(lemp) 2807 struct lemon *lemp; 2808 { 2809 static char templatename[] = "lempar.c"; 2810 char buf[1000]; 2811 FILE *in; 2812 char *tpltname; 2813 char *cp; 2814 2815 cp = strrchr(lemp->filename,'.'); 2816 if( cp ){ 2817 sprintf(buf,"%.*s.lt",(int)(cp-lemp->filename),lemp->filename); 2818 }else{ 2819 sprintf(buf,"%s.lt",lemp->filename); 2820 } 2821 if( access(buf,004)==0 ){ 2822 tpltname = buf; 2823 }else if( access(templatename,004)==0 ){ 2824 tpltname = templatename; 2825 }else{ 2826 tpltname = pathsearch(lemp->argv0,templatename,0); 2827 } 2828 if( tpltname==0 ){ 2829 fprintf(stderr,"Can't find the parser driver template file \"%s\".\n", 2830 templatename); 2831 lemp->errorcnt++; 2832 return 0; 2833 } 2834 in = fopen(tpltname,"r"); 2835 if( in==0 ){ 2836 fprintf(stderr,"Can't open the template file \"%s\".\n",templatename); 2837 lemp->errorcnt++; 2838 return 0; 2839 } 2840 return in; 2841 } 2842 2843 /* Print a string to the file and keep the linenumber up to date */ 2844 PRIVATE void tplt_print(out,lemp,str,strln,lineno) 2845 FILE *out; 2846 struct lemon *lemp; 2847 char *str; 2848 int strln; 2849 int *lineno; 2850 { 2851 if( str==0 ) return; 2852 fprintf(out,"#line %d \"%s\"\n",strln,lemp->filename); (*lineno)++; 2853 while( *str ){ 2854 if( *str=='\n' ) (*lineno)++; 2855 putc(*str,out); 2856 str++; 2857 } 2858 fprintf(out,"\n#line %d \"%s\"\n",*lineno+2,lemp->outname); (*lineno)+=2; 2859 return; 2860 } 2861 2862 /* 2863 ** The following routine emits code for the destructor for the 2864 ** symbol sp 2865 */ 2866 void emit_destructor_code(out,sp,lemp,lineno) 2867 FILE *out; 2868 struct symbol *sp; 2869 struct lemon *lemp; 2870 int *lineno; 2871 { 2872 char *cp = 0; 2873 2874 int linecnt = 0; 2875 if( sp->type==TERMINAL ){ 2876 cp = lemp->tokendest; 2877 if( cp==0 ) return; 2878 fprintf(out,"#line %d \"%s\"\n{",lemp->tokendestln,lemp->filename); 2879 }else if( sp->destructor ){ 2880 cp = sp->destructor; 2881 fprintf(out,"#line %d \"%s\"\n{",sp->destructorln,lemp->filename); 2882 }else if( lemp->vardest ){ 2883 cp = lemp->vardest; 2884 if( cp==0 ) return; 2885 fprintf(out,"#line %d \"%s\"\n{",lemp->vardestln,lemp->filename); 2886 }else{ 2887 assert( 0 ); /* Cannot happen */ 2888 } 2889 for(; *cp; cp++){ 2890 if( *cp=='$' && cp[1]=='$' ){ 2891 fprintf(out,"(yypminor->yy%d)",sp->dtnum); 2892 cp++; 2893 continue; 2894 } 2895 if( *cp=='\n' ) linecnt++; 2896 fputc(*cp,out); 2897 } 2898 (*lineno) += 3 + linecnt; 2899 fprintf(out,"}\n#line %d \"%s\"\n",*lineno,lemp->outname); 2900 return; 2901 } 2902 2903 /* 2904 ** Return TRUE (non-zero) if the given symbol has a destructor. 2905 */ 2906 int has_destructor(sp, lemp) 2907 struct symbol *sp; 2908 struct lemon *lemp; 2909 { 2910 int ret; 2911 if( sp->type==TERMINAL ){ 2912 ret = lemp->tokendest!=0; 2913 }else{ 2914 ret = lemp->vardest!=0 || sp->destructor!=0; 2915 } 2916 return ret; 2917 } 2918 2919 /* 2920 ** Generate code which executes when the rule "rp" is reduced. Write 2921 ** the code to "out". Make sure lineno stays up-to-date. 2922 */ 2923 PRIVATE void emit_code(out,rp,lemp,lineno) 2924 FILE *out; 2925 struct rule *rp; 2926 struct lemon *lemp; 2927 int *lineno; 2928 { 2929 char *cp, *xp; 2930 int linecnt = 0; 2931 int i; 2932 char lhsused = 0; /* True if the LHS element has been used */ 2933 char used[MAXRHS]; /* True for each RHS element which is used */ 2934 2935 for(i=0; i<rp->nrhs; i++) used[i] = 0; 2936 lhsused = 0; 2937 2938 /* Generate code to do the reduce action */ 2939 if( rp->code ){ 2940 fprintf(out,"#line %d \"%s\"\n{",rp->line,lemp->filename); 2941 for(cp=rp->code; *cp; cp++){ 2942 if( isalpha(*cp) && (cp==rp->code || (!isalnum(cp[-1]) && cp[-1]!='_')) ){ 2943 char saved; 2944 for(xp= &cp[1]; isalnum(*xp) || *xp=='_'; xp++); 2945 saved = *xp; 2946 *xp = 0; 2947 if( rp->lhsalias && strcmp(cp,rp->lhsalias)==0 ){ 2948 fprintf(out,"yygotominor.yy%d",rp->lhs->dtnum); 2949 cp = xp; 2950 lhsused = 1; 2951 }else{ 2952 for(i=0; i<rp->nrhs; i++){ 2953 if( rp->rhsalias[i] && strcmp(cp,rp->rhsalias[i])==0 ){ 2954 fprintf(out,"yymsp[%d].minor.yy%d",i-rp->nrhs+1,rp->rhs[i]->dtnum); 2955 cp = xp; 2956 used[i] = 1; 2957 break; 2958 } 2959 } 2960 } 2961 *xp = saved; 2962 } 2963 if( *cp=='\n' ) linecnt++; 2964 fputc(*cp,out); 2965 } /* End loop */ 2966 (*lineno) += 3 + linecnt; 2967 fprintf(out,"}\n#line %d \"%s\"\n",*lineno,lemp->outname); 2968 } /* End if( rp->code ) */ 2969 2970 /* Check to make sure the LHS has been used */ 2971 if( rp->lhsalias && !lhsused ){ 2972 ErrorMsg(lemp->filename,rp->ruleline, 2973 "Label \"%s\" for \"%s(%s)\" is never used.", 2974 rp->lhsalias,rp->lhs->name,rp->lhsalias); 2975 lemp->errorcnt++; 2976 } 2977 2978 /* Generate destructor code for RHS symbols which are not used in the 2979 ** reduce code */ 2980 for(i=0; i<rp->nrhs; i++){ 2981 if( rp->rhsalias[i] && !used[i] ){ 2982 ErrorMsg(lemp->filename,rp->ruleline, 2983 "Label %s for \"%s(%s)\" is never used.", 2984 rp->rhsalias[i],rp->rhs[i]->name,rp->rhsalias[i]); 2985 lemp->errorcnt++; 2986 }else if( rp->rhsalias[i]==0 ){ 2987 if( has_destructor(rp->rhs[i],lemp) ){ 2988 fprintf(out," yy_destructor(%d,&yymsp[%d].minor);\n", 2989 rp->rhs[i]->index,i-rp->nrhs+1); (*lineno)++; 2990 }else{ 2991 fprintf(out," /* No destructor defined for %s */\n", 2992 rp->rhs[i]->name); 2993 (*lineno)++; 2994 } 2995 } 2996 } 2997 return; 2998 } 2999 3000 /* 3001 ** Print the definition of the union used for the parser's data stack. 3002 ** This union contains fields for every possible data type for tokens 3003 ** and nonterminals. In the process of computing and printing this 3004 ** union, also set the ".dtnum" field of every terminal and nonterminal 3005 ** symbol. 3006 */ 3007 void print_stack_union(out,lemp,plineno,mhflag) 3008 FILE *out; /* The output stream */ 3009 struct lemon *lemp; /* The main info structure for this parser */ 3010 int *plineno; /* Pointer to the line number */ 3011 int mhflag; /* True if generating makeheaders output */ 3012 { 3013 int lineno = *plineno; /* The line number of the output */ 3014 char **types; /* A hash table of datatypes */ 3015 int arraysize; /* Size of the "types" array */ 3016 int maxdtlength; /* Maximum length of any ".datatype" field. */ 3017 char *stddt; /* Standardized name for a datatype */ 3018 int i,j; /* Loop counters */ 3019 int hash; /* For hashing the name of a type */ 3020 char *name; /* Name of the parser */ 3021 3022 /* Allocate and initialize types[] and allocate stddt[] */ 3023 arraysize = lemp->nsymbol * 2; 3024 types = (char**)malloc( arraysize * sizeof(char*) ); 3025 for(i=0; i<arraysize; i++) types[i] = 0; 3026 maxdtlength = 0; 3027 if( lemp->vartype ){ 3028 maxdtlength = strlen(lemp->vartype); 3029 } 3030 for(i=0; i<lemp->nsymbol; i++){ 3031 int len; 3032 struct symbol *sp = lemp->symbols[i]; 3033 if( sp->datatype==0 ) continue; 3034 len = strlen(sp->datatype); 3035 if( len>maxdtlength ) maxdtlength = len; 3036 } 3037 stddt = (char*)malloc( maxdtlength*2 + 1 ); 3038 if( types==0 || stddt==0 ){ 3039 fprintf(stderr,"Out of memory.\n"); 3040 exit(1); 3041 } 3042 3043 /* Build a hash table of datatypes. The ".dtnum" field of each symbol 3044 ** is filled in with the hash index plus 1. A ".dtnum" value of 0 is 3045 ** used for terminal symbols. If there is no %default_type defined then 3046 ** 0 is also used as the .dtnum value for nonterminals which do not specify 3047 ** a datatype using the %type directive. 3048 */ 3049 for(i=0; i<lemp->nsymbol; i++){ 3050 struct symbol *sp = lemp->symbols[i]; 3051 char *cp; 3052 if( sp==lemp->errsym ){ 3053 sp->dtnum = arraysize+1; 3054 continue; 3055 } 3056 if( sp->type!=NONTERMINAL || (sp->datatype==0 && lemp->vartype==0) ){ 3057 sp->dtnum = 0; 3058 continue; 3059 } 3060 cp = sp->datatype; 3061 if( cp==0 ) cp = lemp->vartype; 3062 j = 0; 3063 while( isspace(*cp) ) cp++; 3064 while( *cp ) stddt[j++] = *cp++; 3065 while( j>0 && isspace(stddt[j-1]) ) j--; 3066 stddt[j] = 0; 3067 hash = 0; 3068 for(j=0; stddt[j]; j++){ 3069 hash = hash*53 + stddt[j]; 3070 } 3071 hash = (hash & 0x7fffffff)%arraysize; 3072 while( types[hash] ){ 3073 if( strcmp(types[hash],stddt)==0 ){ 3074 sp->dtnum = hash + 1; 3075 break; 3076 } 3077 hash++; 3078 if( hash>=arraysize ) hash = 0; 3079 } 3080 if( types[hash]==0 ){ 3081 sp->dtnum = hash + 1; 3082 types[hash] = (char*)malloc( strlen(stddt)+1 ); 3083 if( types[hash]==0 ){ 3084 fprintf(stderr,"Out of memory.\n"); 3085 exit(1); 3086 } 3087 strcpy(types[hash],stddt); 3088 } 3089 } 3090 3091 /* Print out the definition of YYTOKENTYPE and YYMINORTYPE */ 3092 name = lemp->name ? lemp->name : "Parse"; 3093 lineno = *plineno; 3094 if( mhflag ){ fprintf(out,"#if INTERFACE\n"); lineno++; } 3095 fprintf(out,"#define %sTOKENTYPE %s\n",name, 3096 lemp->tokentype?lemp->tokentype:"void*"); lineno++; 3097 if( mhflag ){ fprintf(out,"#endif\n"); lineno++; } 3098 fprintf(out,"typedef union {\n"); lineno++; 3099 fprintf(out," %sTOKENTYPE yy0;\n",name); lineno++; 3100 for(i=0; i<arraysize; i++){ 3101 if( types[i]==0 ) continue; 3102 fprintf(out," %s yy%d;\n",types[i],i+1); lineno++; 3103 free(types[i]); 3104 } 3105 fprintf(out," int yy%d;\n",lemp->errsym->dtnum); lineno++; 3106 free(stddt); 3107 free(types); 3108 fprintf(out,"} YYMINORTYPE;\n"); lineno++; 3109 *plineno = lineno; 3110 } 3111 3112 /* 3113 ** Return the name of a C datatype able to represent values between 3114 ** lwr and upr, inclusive. 3115 */ 3116 static const char *minimum_size_type(int lwr, int upr){ 3117 if( lwr>=0 ){ 3118 if( upr<=255 ){ 3119 return "unsigned char"; 3120 }else if( upr<65535 ){ 3121 return "unsigned short int"; 3122 }else{ 3123 return "unsigned int"; 3124 } 3125 }else if( lwr>=-127 && upr<=127 ){ 3126 return "signed char"; 3127 }else if( lwr>=-32767 && upr<32767 ){ 3128 return "short"; 3129 }else{ 3130 return "int"; 3131 } 3132 } 3133 3134 /* 3135 ** Each state contains a set of token transaction and a set of 3136 ** nonterminal transactions. Each of these sets makes an instance 3137 ** of the following structure. An array of these structures is used 3138 ** to order the creation of entries in the yy_action[] table. 3139 */ 3140 struct axset { 3141 struct state *stp; /* A pointer to a state */ 3142 int isTkn; /* True to use tokens. False for non-terminals */ 3143 int nAction; /* Number of actions */ 3144 }; 3145 3146 /* 3147 ** Compare to axset structures for sorting purposes 3148 */ 3149 static int axset_compare(const void *a, const void *b){ 3150 struct axset *p1 = (struct axset*)a; 3151 struct axset *p2 = (struct axset*)b; 3152 return p2->nAction - p1->nAction; 3153 } 3154 3155 /* Generate C source code for the parser */ 3156 void ReportTable(lemp, mhflag) 3157 struct lemon *lemp; 3158 int mhflag; /* Output in makeheaders format if true */ 3159 { 3160 FILE *out, *in; 3161 char line[LINESIZE]; 3162 int lineno; 3163 struct state *stp; 3164 struct action *ap; 3165 struct rule *rp; 3166 struct acttab *pActtab; 3167 int i, j, n; 3168 char *name; 3169 int mnTknOfst, mxTknOfst; 3170 int mnNtOfst, mxNtOfst; 3171 struct axset *ax; 3172 3173 in = tplt_open(lemp); 3174 if( in==0 ) return; 3175 out = file_open(lemp,".c","w"); 3176 if( out==0 ){ 3177 fclose(in); 3178 return; 3179 } 3180 lineno = 1; 3181 tplt_xfer(lemp->name,in,out,&lineno); 3182 3183 /* Generate the include code, if any */ 3184 tplt_print(out,lemp,lemp->include,lemp->includeln,&lineno); 3185 if( mhflag ){ 3186 char *name = file_makename(lemp, ".h"); 3187 fprintf(out,"#include \"%s\"\n", name); lineno++; 3188 free(name); 3189 } 3190 tplt_xfer(lemp->name,in,out,&lineno); 3191 3192 /* Generate #defines for all tokens */ 3193 if( mhflag ){ 3194 char *prefix; 3195 fprintf(out,"#if INTERFACE\n"); lineno++; 3196 if( lemp->tokenprefix ) prefix = lemp->tokenprefix; 3197 else prefix = ""; 3198 for(i=1; i<lemp->nterminal; i++){ 3199 fprintf(out,"#define %s%-30s %2d\n",prefix,lemp->symbols[i]->name,i); 3200 lineno++; 3201 } 3202 fprintf(out,"#endif\n"); lineno++; 3203 } 3204 tplt_xfer(lemp->name,in,out,&lineno); 3205 3206 /* Generate the defines */ 3207 fprintf(out,"/* \001 */\n"); 3208 fprintf(out,"#define YYCODETYPE %s\n", 3209 minimum_size_type(0, lemp->nsymbol+5)); lineno++; 3210 fprintf(out,"#define YYNOCODE %d\n",lemp->nsymbol+1); lineno++; 3211 fprintf(out,"#define YYACTIONTYPE %s\n", 3212 minimum_size_type(0, lemp->nstate+lemp->nrule+5)); lineno++; 3213 print_stack_union(out,lemp,&lineno,mhflag); 3214 if( lemp->stacksize ){ 3215 if( atoi(lemp->stacksize)<=0 ){ 3216 ErrorMsg(lemp->filename,0, 3217 "Illegal stack size: [%s]. The stack size should be an integer constant.", 3218 lemp->stacksize); 3219 lemp->errorcnt++; 3220 lemp->stacksize = "100"; 3221 } 3222 fprintf(out,"#define YYSTACKDEPTH %s\n",lemp->stacksize); lineno++; 3223 }else{ 3224 fprintf(out,"#define YYSTACKDEPTH 100\n"); lineno++; 3225 } 3226 if( mhflag ){ 3227 fprintf(out,"#if INTERFACE\n"); lineno++; 3228 } 3229 name = lemp->name ? lemp->name : "Parse"; 3230 if( lemp->arg && lemp->arg[0] ){ 3231 int i; 3232 i = strlen(lemp->arg); 3233 while( i>=1 && isspace(lemp->arg[i-1]) ) i--; 3234 while( i>=1 && (isalnum(lemp->arg[i-1]) || lemp->arg[i-1]=='_') ) i--; 3235 fprintf(out,"#define %sARG_SDECL %s;\n",name,lemp->arg); lineno++; 3236 fprintf(out,"#define %sARG_PDECL ,%s\n",name,lemp->arg); lineno++; 3237 fprintf(out,"#define %sARG_FETCH %s = yypParser->%s\n", 3238 name,lemp->arg,&lemp->arg[i]); lineno++; 3239 fprintf(out,"#define %sARG_STORE yypParser->%s = %s\n", 3240 name,&lemp->arg[i],&lemp->arg[i]); lineno++; 3241 }else{ 3242 fprintf(out,"#define %sARG_SDECL\n",name); lineno++; 3243 fprintf(out,"#define %sARG_PDECL\n",name); lineno++; 3244 fprintf(out,"#define %sARG_FETCH\n",name); lineno++; 3245 fprintf(out,"#define %sARG_STORE\n",name); lineno++; 3246 } 3247 if( mhflag ){ 3248 fprintf(out,"#endif\n"); lineno++; 3249 } 3250 fprintf(out,"#define YYNSTATE %d\n",lemp->nstate); lineno++; 3251 fprintf(out,"#define YYNRULE %d\n",lemp->nrule); lineno++; 3252 fprintf(out,"#define YYERRORSYMBOL %d\n",lemp->errsym->index); lineno++; 3253 fprintf(out,"#define YYERRSYMDT yy%d\n",lemp->errsym->dtnum); lineno++; 3254 if( lemp->has_fallback ){ 3255 fprintf(out,"#define YYFALLBACK 1\n"); lineno++; 3256 } 3257 tplt_xfer(lemp->name,in,out,&lineno); 3258 3259 /* Generate the action table and its associates: 3260 ** 3261 ** yy_action[] A single table containing all actions. 3262 ** yy_lookahead[] A table containing the lookahead for each entry in 3263 ** yy_action. Used to detect hash collisions. 3264 ** yy_shift_ofst[] For each state, the offset into yy_action for 3265 ** shifting terminals. 3266 ** yy_reduce_ofst[] For each state, the offset into yy_action for 3267 ** shifting non-terminals after a reduce. 3268 ** yy_default[] Default action for each state. 3269 */ 3270 3271 /* Compute the actions on all states and count them up */ 3272 ax = malloc( sizeof(ax[0])*lemp->nstate*2 ); 3273 if( ax==0 ){ 3274 fprintf(stderr,"malloc failed\n"); 3275 exit(1); 3276 } 3277 for(i=0; i<lemp->nstate; i++){ 3278 stp = lemp->sorted[i]; 3279 stp->nTknAct = stp->nNtAct = 0; 3280 stp->iDflt = lemp->nstate + lemp->nrule; 3281 stp->iTknOfst = NO_OFFSET; 3282 stp->iNtOfst = NO_OFFSET; 3283 for(ap=stp->ap; ap; ap=ap->next){ 3284 if( compute_action(lemp,ap)>=0 ){ 3285 if( ap->sp->index<lemp->nterminal ){ 3286 stp->nTknAct++; 3287 }else if( ap->sp->index<lemp->nsymbol ){ 3288 stp->nNtAct++; 3289 }else{ 3290 stp->iDflt = compute_action(lemp, ap); 3291 } 3292 } 3293 } 3294 ax[i*2].stp = stp; 3295 ax[i*2].isTkn = 1; 3296 ax[i*2].nAction = stp->nTknAct; 3297 ax[i*2+1].stp = stp; 3298 ax[i*2+1].isTkn = 0; 3299 ax[i*2+1].nAction = stp->nNtAct; 3300 } 3301 mxTknOfst = mnTknOfst = 0; 3302 mxNtOfst = mnNtOfst = 0; 3303 3304 /* Compute the action table. In order to try to keep the size of the 3305 ** action table to a minimum, the heuristic of placing the largest action 3306 ** sets first is used. 3307 */ 3308 qsort(ax, lemp->nstate*2, sizeof(ax[0]), axset_compare); 3309 pActtab = acttab_alloc(); 3310 for(i=0; i<lemp->nstate*2 && ax[i].nAction>0; i++){ 3311 stp = ax[i].stp; 3312 if( ax[i].isTkn ){ 3313 for(ap=stp->ap; ap; ap=ap->next){ 3314 int action; 3315 if( ap->sp->index>=lemp->nterminal ) continue; 3316 action = compute_action(lemp, ap); 3317 if( action<0 ) continue; 3318 acttab_action(pActtab, ap->sp->index, action); 3319 } 3320 stp->iTknOfst = acttab_insert(pActtab); 3321 if( stp->iTknOfst<mnTknOfst ) mnTknOfst = stp->iTknOfst; 3322 if( stp->iTknOfst>mxTknOfst ) mxTknOfst = stp->iTknOfst; 3323 }else{ 3324 for(ap=stp->ap; ap; ap=ap->next){ 3325 int action; 3326 if( ap->sp->index<lemp->nterminal ) continue; 3327 if( ap->sp->index==lemp->nsymbol ) continue; 3328 action = compute_action(lemp, ap); 3329 if( action<0 ) continue; 3330 acttab_action(pActtab, ap->sp->index, action); 3331 } 3332 stp->iNtOfst = acttab_insert(pActtab); 3333 if( stp->iNtOfst<mnNtOfst ) mnNtOfst = stp->iNtOfst; 3334 if( stp->iNtOfst>mxNtOfst ) mxNtOfst = stp->iNtOfst; 3335 } 3336 } 3337 free(ax); 3338 3339 /* Output the yy_action table */ 3340 fprintf(out,"static YYACTIONTYPE yy_action[] = {\n"); lineno++; 3341 n = acttab_size(pActtab); 3342 for(i=j=0; i<n; i++){ 3343 int action = acttab_yyaction(pActtab, i); 3344 if( action<0 ) action = lemp->nsymbol + lemp->nrule + 2; 3345 if( j==0 ) fprintf(out," /* %5d */ ", i); 3346 fprintf(out, " %4d,", action); 3347 if( j==9 || i==n-1 ){ 3348 fprintf(out, "\n"); lineno++; 3349 j = 0; 3350 }else{ 3351 j++; 3352 } 3353 } 3354 fprintf(out, "};\n"); lineno++; 3355 3356 /* Output the yy_lookahead table */ 3357 fprintf(out,"static YYCODETYPE yy_lookahead[] = {\n"); lineno++; 3358 for(i=j=0; i<n; i++){ 3359 int la = acttab_yylookahead(pActtab, i); 3360 if( la<0 ) la = lemp->nsymbol; 3361 if( j==0 ) fprintf(out," /* %5d */ ", i); 3362 fprintf(out, " %4d,", la); 3363 if( j==9 || i==n-1 ){ 3364 fprintf(out, "\n"); lineno++; 3365 j = 0; 3366 }else{ 3367 j++; 3368 } 3369 } 3370 fprintf(out, "};\n"); lineno++; 3371 3372 /* Output the yy_shift_ofst[] table */ 3373 fprintf(out, "#define YY_SHIFT_USE_DFLT (%d)\n", mnTknOfst-1); lineno++; 3374 fprintf(out, "static %s yy_shift_ofst[] = {\n", 3375 minimum_size_type(mnTknOfst-1, mxTknOfst)); lineno++; 3376 n = lemp->nstate; 3377 for(i=j=0; i<n; i++){ 3378 int ofst; 3379 stp = lemp->sorted[i]; 3380 ofst = stp->iTknOfst; 3381 if( ofst==NO_OFFSET ) ofst = mnTknOfst - 1; 3382 if( j==0 ) fprintf(out," /* %5d */ ", i); 3383 fprintf(out, " %4d,", ofst); 3384 if( j==9 || i==n-1 ){ 3385 fprintf(out, "\n"); lineno++; 3386 j = 0; 3387 }else{ 3388 j++; 3389 } 3390 } 3391 fprintf(out, "};\n"); lineno++; 3392 3393 /* Output the yy_reduce_ofst[] table */ 3394 fprintf(out, "#define YY_REDUCE_USE_DFLT (%d)\n", mnNtOfst-1); lineno++; 3395 fprintf(out, "static %s yy_reduce_ofst[] = {\n", 3396 minimum_size_type(mnNtOfst-1, mxNtOfst)); lineno++; 3397 n = lemp->nstate; 3398 for(i=j=0; i<n; i++){ 3399 int ofst; 3400 stp = lemp->sorted[i]; 3401 ofst = stp->iNtOfst; 3402 if( ofst==NO_OFFSET ) ofst = mnNtOfst - 1; 3403 if( j==0 ) fprintf(out," /* %5d */ ", i); 3404 fprintf(out, " %4d,", ofst); 3405 if( j==9 || i==n-1 ){ 3406 fprintf(out, "\n"); lineno++; 3407 j = 0; 3408 }else{ 3409 j++; 3410 } 3411 } 3412 fprintf(out, "};\n"); lineno++; 3413 3414 /* Output the default action table */ 3415 fprintf(out, "static YYACTIONTYPE yy_default[] = {\n"); lineno++; 3416 n = lemp->nstate; 3417 for(i=j=0; i<n; i++){ 3418 stp = lemp->sorted[i]; 3419 if( j==0 ) fprintf(out," /* %5d */ ", i); 3420 fprintf(out, " %4d,", stp->iDflt); 3421 if( j==9 || i==n-1 ){ 3422 fprintf(out, "\n"); lineno++; 3423 j = 0; 3424 }else{ 3425 j++; 3426 } 3427 } 3428 fprintf(out, "};\n"); lineno++; 3429 tplt_xfer(lemp->name,in,out,&lineno); 3430 3431 /* Generate the table of fallback tokens. 3432 */ 3433 if( lemp->has_fallback ){ 3434 for(i=0; i<lemp->nterminal; i++){ 3435 struct symbol *p = lemp->symbols[i]; 3436 if( p->fallback==0 ){ 3437 fprintf(out, " 0, /* %10s => nothing */\n", p->name); 3438 }else{ 3439 fprintf(out, " %3d, /* %10s => %s */\n", p->fallback->index, 3440 p->name, p->fallback->name); 3441 } 3442 lineno++; 3443 } 3444 } 3445 tplt_xfer(lemp->name, in, out, &lineno); 3446 3447 /* Generate a table containing the symbolic name of every symbol 3448 */ 3449 for(i=0; i<lemp->nsymbol; i++){ 3450 sprintf(line,"\"%s\",",lemp->symbols[i]->name); 3451 fprintf(out," %-15s",line); 3452 if( (i&3)==3 ){ fprintf(out,"\n"); lineno++; } 3453 } 3454 if( (i&3)!=0 ){ fprintf(out,"\n"); lineno++; } 3455 tplt_xfer(lemp->name,in,out,&lineno); 3456 3457 /* Generate a table containing a text string that describes every 3458 ** rule in the rule set of the grammer. This information is used 3459 ** when tracing REDUCE actions. 3460 */ 3461 for(i=0, rp=lemp->rule; rp; rp=rp->next, i++){ 3462 assert( rp->index==i ); 3463 fprintf(out," /* %3d */ \"%s ::=", i, rp->lhs->name); 3464 for(j=0; j<rp->nrhs; j++) fprintf(out," %s",rp->rhs[j]->name); 3465 fprintf(out,"\",\n"); lineno++; 3466 } 3467 tplt_xfer(lemp->name,in,out,&lineno); 3468 3469 /* Generate code which executes every time a symbol is popped from 3470 ** the stack while processing errors or while destroying the parser. 3471 ** (In other words, generate the %destructor actions) 3472 */ 3473 if( lemp->tokendest ){ 3474 for(i=0; i<lemp->nsymbol; i++){ 3475 struct symbol *sp = lemp->symbols[i]; 3476 if( sp==0 || sp->type!=TERMINAL ) continue; 3477 fprintf(out," case %d:\n",sp->index); lineno++; 3478 } 3479 for(i=0; i<lemp->nsymbol && lemp->symbols[i]->type!=TERMINAL; i++); 3480 if( i<lemp->nsymbol ){ 3481 emit_destructor_code(out,lemp->symbols[i],lemp,&lineno); 3482 fprintf(out," break;\n"); lineno++; 3483 } 3484 } 3485 for(i=0; i<lemp->nsymbol; i++){ 3486 struct symbol *sp = lemp->symbols[i]; 3487 if( sp==0 || sp->type==TERMINAL || sp->destructor==0 ) continue; 3488 fprintf(out," case %d:\n",sp->index); lineno++; 3489 emit_destructor_code(out,lemp->symbols[i],lemp,&lineno); 3490 fprintf(out," break;\n"); lineno++; 3491 } 3492 if( lemp->vardest ){ 3493 struct symbol *dflt_sp = 0; 3494 for(i=0; i<lemp->nsymbol; i++){ 3495 struct symbol *sp = lemp->symbols[i]; 3496 if( sp==0 || sp->type==TERMINAL || 3497 sp->index<=0 || sp->destructor!=0 ) continue; 3498 fprintf(out," case %d:\n",sp->index); lineno++; 3499 dflt_sp = sp; 3500 } 3501 if( dflt_sp!=0 ){ 3502 emit_destructor_code(out,dflt_sp,lemp,&lineno); 3503 fprintf(out," break;\n"); lineno++; 3504 } 3505 } 3506 tplt_xfer(lemp->name,in,out,&lineno); 3507 3508 /* Generate code which executes whenever the parser stack overflows */ 3509 tplt_print(out,lemp,lemp->overflow,lemp->overflowln,&lineno); 3510 tplt_xfer(lemp->name,in,out,&lineno); 3511 3512 /* Generate the table of rule information 3513 ** 3514 ** Note: This code depends on the fact that rules are number 3515 ** sequentually beginning with 0. 3516 */ 3517 for(rp=lemp->rule; rp; rp=rp->next){ 3518 fprintf(out," { %d, %d },\n",rp->lhs->index,rp->nrhs); lineno++; 3519 } 3520 tplt_xfer(lemp->name,in,out,&lineno); 3521 3522 /* Generate code which execution during each REDUCE action */ 3523 for(rp=lemp->rule; rp; rp=rp->next){ 3524 fprintf(out," case %d:\n",rp->index); lineno++; 3525 emit_code(out,rp,lemp,&lineno); 3526 fprintf(out," break;\n"); lineno++; 3527 } 3528 tplt_xfer(lemp->name,in,out,&lineno); 3529 3530 /* Generate code which executes if a parse fails */ 3531 tplt_print(out,lemp,lemp->failure,lemp->failureln,&lineno); 3532 tplt_xfer(lemp->name,in,out,&lineno); 3533 3534 /* Generate code which executes when a syntax error occurs */ 3535 tplt_print(out,lemp,lemp->error,lemp->errorln,&lineno); 3536 tplt_xfer(lemp->name,in,out,&lineno); 3537 3538 /* Generate code which executes when the parser accepts its input */ 3539 tplt_print(out,lemp,lemp->accept,lemp->acceptln,&lineno); 3540 tplt_xfer(lemp->name,in,out,&lineno); 3541 3542 /* Append any addition code the user desires */ 3543 tplt_print(out,lemp,lemp->extracode,lemp->extracodeln,&lineno); 3544 3545 fclose(in); 3546 fclose(out); 3547 return; 3548 } 3549 3550 /* Generate a header file for the parser */ 3551 void ReportHeader(lemp) 3552 struct lemon *lemp; 3553 { 3554 FILE *out, *in; 3555 char *prefix; 3556 char line[LINESIZE]; 3557 char pattern[LINESIZE]; 3558 int i; 3559 3560 if( lemp->tokenprefix ) prefix = lemp->tokenprefix; 3561 else prefix = ""; 3562 in = file_open(lemp,".h","r"); 3563 if( in ){ 3564 for(i=1; i<lemp->nterminal && fgets(line,LINESIZE,in); i++){ 3565 sprintf(pattern,"#define %s%-30s %2d\n",prefix,lemp->symbols[i]->name,i); 3566 if( strcmp(line,pattern) ) break; 3567 } 3568 fclose(in); 3569 if( i==lemp->nterminal ){ 3570 /* No change in the file. Don't rewrite it. */ 3571 return; 3572 } 3573 } 3574 out = file_open(lemp,".h","w"); 3575 if( out ){ 3576 for(i=1; i<lemp->nterminal; i++){ 3577 fprintf(out,"#define %s%-30s %2d\n",prefix,lemp->symbols[i]->name,i); 3578 } 3579 fclose(out); 3580 } 3581 return; 3582 } 3583 3584 /* Reduce the size of the action tables, if possible, by making use 3585 ** of defaults. 3586 ** 3587 ** In this version, we take the most frequent REDUCE action and make 3588 ** it the default. Only default a reduce if there are more than one. 3589 */ 3590 void CompressTables(lemp) 3591 struct lemon *lemp; 3592 { 3593 struct state *stp; 3594 struct action *ap, *ap2; 3595 struct rule *rp, *rp2, *rbest; 3596 int nbest, n; 3597 int i; 3598 3599 for(i=0; i<lemp->nstate; i++){ 3600 stp = lemp->sorted[i]; 3601 nbest = 0; 3602 rbest = 0; 3603 3604 for(ap=stp->ap; ap; ap=ap->next){ 3605 if( ap->type!=REDUCE ) continue; 3606 rp = ap->x.rp; 3607 if( rp==rbest ) continue; 3608 n = 1; 3609 for(ap2=ap->next; ap2; ap2=ap2->next){ 3610 if( ap2->type!=REDUCE ) continue; 3611 rp2 = ap2->x.rp; 3612 if( rp2==rbest ) continue; 3613 if( rp2==rp ) n++; 3614 } 3615 if( n>nbest ){ 3616 nbest = n; 3617 rbest = rp; 3618 } 3619 } 3620 3621 /* Do not make a default if the number of rules to default 3622 ** is not at least 2 */ 3623 if( nbest<2 ) continue; 3624 3625 3626 /* Combine matching REDUCE actions into a single default */ 3627 for(ap=stp->ap; ap; ap=ap->next){ 3628 if( ap->type==REDUCE && ap->x.rp==rbest ) break; 3629 } 3630 assert( ap ); 3631 ap->sp = Symbol_new("{default}"); 3632 for(ap=ap->next; ap; ap=ap->next){ 3633 if( ap->type==REDUCE && ap->x.rp==rbest ) ap->type = NOT_USED; 3634 } 3635 stp->ap = Action_sort(stp->ap); 3636 } 3637 } 3638 3639 /***************** From the file "set.c" ************************************/ 3640 /* 3641 ** Set manipulation routines for the LEMON parser generator. 3642 */ 3643 3644 static int size = 0; 3645 3646 /* Set the set size */ 3647 void SetSize(n) 3648 int n; 3649 { 3650 size = n+1; 3651 } 3652 3653 /* Allocate a new set */ 3654 char *SetNew(){ 3655 char *s; 3656 int i; 3657 s = (char*)malloc( size ); 3658 if( s==0 ){ 3659 extern void memory_error(); 3660 memory_error(); 3661 } 3662 for(i=0; i<size; i++) s[i] = 0; 3663 return s; 3664 } 3665 3666 /* Deallocate a set */ 3667 void SetFree(s) 3668 char *s; 3669 { 3670 free(s); 3671 } 3672 3673 /* Add a new element to the set. Return TRUE if the element was added 3674 ** and FALSE if it was already there. */ 3675 int SetAdd(s,e) 3676 char *s; 3677 int e; 3678 { 3679 int rv; 3680 rv = s[e]; 3681 s[e] = 1; 3682 return !rv; 3683 } 3684 3685 /* Add every element of s2 to s1. Return TRUE if s1 changes. */ 3686 int SetUnion(s1,s2) 3687 char *s1; 3688 char *s2; 3689 { 3690 int i, progress; 3691 progress = 0; 3692 for(i=0; i<size; i++){ 3693 if( s2[i]==0 ) continue; 3694 if( s1[i]==0 ){ 3695 progress = 1; 3696 s1[i] = 1; 3697 } 3698 } 3699 return progress; 3700 } 3701 /********************** From the file "table.c" ****************************/ 3702 /* 3703 ** All code in this file has been automatically generated 3704 ** from a specification in the file 3705 ** "table.q" 3706 ** by the associative array code building program "aagen". 3707 ** Do not edit this file! Instead, edit the specification 3708 ** file, then rerun aagen. 3709 */ 3710 /* 3711 ** Code for processing tables in the LEMON parser generator. 3712 */ 3713 3714 PRIVATE int strhash(x) 3715 char *x; 3716 { 3717 int h = 0; 3718 while( *x) h = h*13 + *(x++); 3719 return h; 3720 } 3721 3722 /* Works like strdup, sort of. Save a string in malloced memory, but 3723 ** keep strings in a table so that the same string is not in more 3724 ** than one place. 3725 */ 3726 char *Strsafe(y) 3727 char *y; 3728 { 3729 char *z; 3730 3731 z = Strsafe_find(y); 3732 if( z==0 && (z=malloc( strlen(y)+1 ))!=0 ){ 3733 strcpy(z,y); 3734 Strsafe_insert(z); 3735 } 3736 MemoryCheck(z); 3737 return z; 3738 } 3739 3740 /* There is one instance of the following structure for each 3741 ** associative array of type "x1". 3742 */ 3743 struct s_x1 { 3744 int size; /* The number of available slots. */ 3745 /* Must be a power of 2 greater than or */ 3746 /* equal to 1 */ 3747 int count; /* Number of currently slots filled */ 3748 struct s_x1node *tbl; /* The data stored here */ 3749 struct s_x1node **ht; /* Hash table for lookups */ 3750 }; 3751 3752 /* There is one instance of this structure for every data element 3753 ** in an associative array of type "x1". 3754 */ 3755 typedef struct s_x1node { 3756 char *data; /* The data */ 3757 struct s_x1node *next; /* Next entry with the same hash */ 3758 struct s_x1node **from; /* Previous link */ 3759 } x1node; 3760 3761 /* There is only one instance of the array, which is the following */ 3762 static struct s_x1 *x1a; 3763 3764 /* Allocate a new associative array */ 3765 void Strsafe_init(){ 3766 if( x1a ) return; 3767 x1a = (struct s_x1*)malloc( sizeof(struct s_x1) ); 3768 if( x1a ){ 3769 x1a->size = 1024; 3770 x1a->count = 0; 3771 x1a->tbl = (x1node*)malloc( 3772 (sizeof(x1node) + sizeof(x1node*))*1024 ); 3773 if( x1a->tbl==0 ){ 3774 free(x1a); 3775 x1a = 0; 3776 }else{ 3777 int i; 3778 x1a->ht = (x1node**)&(x1a->tbl[1024]); 3779 for(i=0; i<1024; i++) x1a->ht[i] = 0; 3780 } 3781 } 3782 } 3783 /* Insert a new record into the array. Return TRUE if successful. 3784 ** Prior data with the same key is NOT overwritten */ 3785 int Strsafe_insert(data) 3786 char *data; 3787 { 3788 x1node *np; 3789 int h; 3790 int ph; 3791 3792 if( x1a==0 ) return 0; 3793 ph = strhash(data); 3794 h = ph & (x1a->size-1); 3795 np = x1a->ht[h]; 3796 while( np ){ 3797 if( strcmp(np->data,data)==0 ){ 3798 /* An existing entry with the same key is found. */ 3799 /* Fail because overwrite is not allows. */ 3800 return 0; 3801 } 3802 np = np->next; 3803 } 3804 if( x1a->count>=x1a->size ){ 3805 /* Need to make the hash table bigger */ 3806 int i,size; 3807 struct s_x1 array; 3808 array.size = size = x1a->size*2; 3809 array.count = x1a->count; 3810 array.tbl = (x1node*)malloc( 3811 (sizeof(x1node) + sizeof(x1node*))*size ); 3812 if( array.tbl==0 ) return 0; /* Fail due to malloc failure */ 3813 array.ht = (x1node**)&(array.tbl[size]); 3814 for(i=0; i<size; i++) array.ht[i] = 0; 3815 for(i=0; i<x1a->count; i++){ 3816 x1node *oldnp, *newnp; 3817 oldnp = &(x1a->tbl[i]); 3818 h = strhash(oldnp->data) & (size-1); 3819 newnp = &(array.tbl[i]); 3820 if( array.ht[h] ) array.ht[h]->from = &(newnp->next); 3821 newnp->next = array.ht[h]; 3822 newnp->data = oldnp->data; 3823 newnp->from = &(array.ht[h]); 3824 array.ht[h] = newnp; 3825 } 3826 free(x1a->tbl); 3827 *x1a = array; 3828 } 3829 /* Insert the new data */ 3830 h = ph & (x1a->size-1); 3831 np = &(x1a->tbl[x1a->count++]); 3832 np->data = data; 3833 if( x1a->ht[h] ) x1a->ht[h]->from = &(np->next); 3834 np->next = x1a->ht[h]; 3835 x1a->ht[h] = np; 3836 np->from = &(x1a->ht[h]); 3837 return 1; 3838 } 3839 3840 /* Return a pointer to data assigned to the given key. Return NULL 3841 ** if no such key. */ 3842 char *Strsafe_find(key) 3843 char *key; 3844 { 3845 int h; 3846 x1node *np; 3847 3848 if( x1a==0 ) return 0; 3849 h = strhash(key) & (x1a->size-1); 3850 np = x1a->ht[h]; 3851 while( np ){ 3852 if( strcmp(np->data,key)==0 ) break; 3853 np = np->next; 3854 } 3855 return np ? np->data : 0; 3856 } 3857 3858 /* Return a pointer to the (terminal or nonterminal) symbol "x". 3859 ** Create a new symbol if this is the first time "x" has been seen. 3860 */ 3861 struct symbol *Symbol_new(x) 3862 char *x; 3863 { 3864 struct symbol *sp; 3865 3866 sp = Symbol_find(x); 3867 if( sp==0 ){ 3868 sp = (struct symbol *)malloc( sizeof(struct symbol) ); 3869 MemoryCheck(sp); 3870 sp->name = Strsafe(x); 3871 sp->type = isupper(*x) ? TERMINAL : NONTERMINAL; 3872 sp->rule = 0; 3873 sp->fallback = 0; 3874 sp->prec = -1; 3875 sp->assoc = UNK; 3876 sp->firstset = 0; 3877 sp->lambda = B_FALSE; 3878 sp->destructor = 0; 3879 sp->datatype = 0; 3880 Symbol_insert(sp,sp->name); 3881 } 3882 return sp; 3883 } 3884 3885 /* Compare two symbols for working purposes 3886 ** 3887 ** Symbols that begin with upper case letters (terminals or tokens) 3888 ** must sort before symbols that begin with lower case letters 3889 ** (non-terminals). Other than that, the order does not matter. 3890 ** 3891 ** We find experimentally that leaving the symbols in their original 3892 ** order (the order they appeared in the grammar file) gives the 3893 ** smallest parser tables in SQLite. 3894 */ 3895 int Symbolcmpp(struct symbol **a, struct symbol **b){ 3896 int i1 = (**a).index + 10000000*((**a).name[0]>'Z'); 3897 int i2 = (**b).index + 10000000*((**b).name[0]>'Z'); 3898 return i1-i2; 3899 } 3900 3901 /* There is one instance of the following structure for each 3902 ** associative array of type "x2". 3903 */ 3904 struct s_x2 { 3905 int size; /* The number of available slots. */ 3906 /* Must be a power of 2 greater than or */ 3907 /* equal to 1 */ 3908 int count; /* Number of currently slots filled */ 3909 struct s_x2node *tbl; /* The data stored here */ 3910 struct s_x2node **ht; /* Hash table for lookups */ 3911 }; 3912 3913 /* There is one instance of this structure for every data element 3914 ** in an associative array of type "x2". 3915 */ 3916 typedef struct s_x2node { 3917 struct symbol *data; /* The data */ 3918 char *key; /* The key */ 3919 struct s_x2node *next; /* Next entry with the same hash */ 3920 struct s_x2node **from; /* Previous link */ 3921 } x2node; 3922 3923 /* There is only one instance of the array, which is the following */ 3924 static struct s_x2 *x2a; 3925 3926 /* Allocate a new associative array */ 3927 void Symbol_init(){ 3928 if( x2a ) return; 3929 x2a = (struct s_x2*)malloc( sizeof(struct s_x2) ); 3930 if( x2a ){ 3931 x2a->size = 128; 3932 x2a->count = 0; 3933 x2a->tbl = (x2node*)malloc( 3934 (sizeof(x2node) + sizeof(x2node*))*128 ); 3935 if( x2a->tbl==0 ){ 3936 free(x2a); 3937 x2a = 0; 3938 }else{ 3939 int i; 3940 x2a->ht = (x2node**)&(x2a->tbl[128]); 3941 for(i=0; i<128; i++) x2a->ht[i] = 0; 3942 } 3943 } 3944 } 3945 /* Insert a new record into the array. Return TRUE if successful. 3946 ** Prior data with the same key is NOT overwritten */ 3947 int Symbol_insert(data,key) 3948 struct symbol *data; 3949 char *key; 3950 { 3951 x2node *np; 3952 int h; 3953 int ph; 3954 3955 if( x2a==0 ) return 0; 3956 ph = strhash(key); 3957 h = ph & (x2a->size-1); 3958 np = x2a->ht[h]; 3959 while( np ){ 3960 if( strcmp(np->key,key)==0 ){ 3961 /* An existing entry with the same key is found. */ 3962 /* Fail because overwrite is not allows. */ 3963 return 0; 3964 } 3965 np = np->next; 3966 } 3967 if( x2a->count>=x2a->size ){ 3968 /* Need to make the hash table bigger */ 3969 int i,size; 3970 struct s_x2 array; 3971 array.size = size = x2a->size*2; 3972 array.count = x2a->count; 3973 array.tbl = (x2node*)malloc( 3974 (sizeof(x2node) + sizeof(x2node*))*size ); 3975 if( array.tbl==0 ) return 0; /* Fail due to malloc failure */ 3976 array.ht = (x2node**)&(array.tbl[size]); 3977 for(i=0; i<size; i++) array.ht[i] = 0; 3978 for(i=0; i<x2a->count; i++){ 3979 x2node *oldnp, *newnp; 3980 oldnp = &(x2a->tbl[i]); 3981 h = strhash(oldnp->key) & (size-1); 3982 newnp = &(array.tbl[i]); 3983 if( array.ht[h] ) array.ht[h]->from = &(newnp->next); 3984 newnp->next = array.ht[h]; 3985 newnp->key = oldnp->key; 3986 newnp->data = oldnp->data; 3987 newnp->from = &(array.ht[h]); 3988 array.ht[h] = newnp; 3989 } 3990 free(x2a->tbl); 3991 *x2a = array; 3992 } 3993 /* Insert the new data */ 3994 h = ph & (x2a->size-1); 3995 np = &(x2a->tbl[x2a->count++]); 3996 np->key = key; 3997 np->data = data; 3998 if( x2a->ht[h] ) x2a->ht[h]->from = &(np->next); 3999 np->next = x2a->ht[h]; 4000 x2a->ht[h] = np; 4001 np->from = &(x2a->ht[h]); 4002 return 1; 4003 } 4004 4005 /* Return a pointer to data assigned to the given key. Return NULL 4006 ** if no such key. */ 4007 struct symbol *Symbol_find(key) 4008 char *key; 4009 { 4010 int h; 4011 x2node *np; 4012 4013 if( x2a==0 ) return 0; 4014 h = strhash(key) & (x2a->size-1); 4015 np = x2a->ht[h]; 4016 while( np ){ 4017 if( strcmp(np->key,key)==0 ) break; 4018 np = np->next; 4019 } 4020 return np ? np->data : 0; 4021 } 4022 4023 /* Return the n-th data. Return NULL if n is out of range. */ 4024 struct symbol *Symbol_Nth(n) 4025 int n; 4026 { 4027 struct symbol *data; 4028 if( x2a && n>0 && n<=x2a->count ){ 4029 data = x2a->tbl[n-1].data; 4030 }else{ 4031 data = 0; 4032 } 4033 return data; 4034 } 4035 4036 /* Return the size of the array */ 4037 int Symbol_count() 4038 { 4039 return x2a ? x2a->count : 0; 4040 } 4041 4042 /* Return an array of pointers to all data in the table. 4043 ** The array is obtained from malloc. Return NULL if memory allocation 4044 ** problems, or if the array is empty. */ 4045 struct symbol **Symbol_arrayof() 4046 { 4047 struct symbol **array; 4048 int i,size; 4049 if( x2a==0 ) return 0; 4050 size = x2a->count; 4051 array = (struct symbol **)malloc( sizeof(struct symbol *)*size ); 4052 if( array ){ 4053 for(i=0; i<size; i++) array[i] = x2a->tbl[i].data; 4054 } 4055 return array; 4056 } 4057 4058 /* Compare two configurations */ 4059 int Configcmp(a,b) 4060 struct config *a; 4061 struct config *b; 4062 { 4063 int x; 4064 x = a->rp->index - b->rp->index; 4065 if( x==0 ) x = a->dot - b->dot; 4066 return x; 4067 } 4068 4069 /* Compare two states */ 4070 PRIVATE int statecmp(a,b) 4071 struct config *a; 4072 struct config *b; 4073 { 4074 int rc; 4075 for(rc=0; rc==0 && a && b; a=a->bp, b=b->bp){ 4076 rc = a->rp->index - b->rp->index; 4077 if( rc==0 ) rc = a->dot - b->dot; 4078 } 4079 if( rc==0 ){ 4080 if( a ) rc = 1; 4081 if( b ) rc = -1; 4082 } 4083 return rc; 4084 } 4085 4086 /* Hash a state */ 4087 PRIVATE int statehash(a) 4088 struct config *a; 4089 { 4090 int h=0; 4091 while( a ){ 4092 h = h*571 + a->rp->index*37 + a->dot; 4093 a = a->bp; 4094 } 4095 return h; 4096 } 4097 4098 /* Allocate a new state structure */ 4099 struct state *State_new() 4100 { 4101 struct state *new; 4102 new = (struct state *)malloc( sizeof(struct state) ); 4103 MemoryCheck(new); 4104 return new; 4105 } 4106 4107 /* There is one instance of the following structure for each 4108 ** associative array of type "x3". 4109 */ 4110 struct s_x3 { 4111 int size; /* The number of available slots. */ 4112 /* Must be a power of 2 greater than or */ 4113 /* equal to 1 */ 4114 int count; /* Number of currently slots filled */ 4115 struct s_x3node *tbl; /* The data stored here */ 4116 struct s_x3node **ht; /* Hash table for lookups */ 4117 }; 4118 4119 /* There is one instance of this structure for every data element 4120 ** in an associative array of type "x3". 4121 */ 4122 typedef struct s_x3node { 4123 struct state *data; /* The data */ 4124 struct config *key; /* The key */ 4125 struct s_x3node *next; /* Next entry with the same hash */ 4126 struct s_x3node **from; /* Previous link */ 4127 } x3node; 4128 4129 /* There is only one instance of the array, which is the following */ 4130 static struct s_x3 *x3a; 4131 4132 /* Allocate a new associative array */ 4133 void State_init(){ 4134 if( x3a ) return; 4135 x3a = (struct s_x3*)malloc( sizeof(struct s_x3) ); 4136 if( x3a ){ 4137 x3a->size = 128; 4138 x3a->count = 0; 4139 x3a->tbl = (x3node*)malloc( 4140 (sizeof(x3node) + sizeof(x3node*))*128 ); 4141 if( x3a->tbl==0 ){ 4142 free(x3a); 4143 x3a = 0; 4144 }else{ 4145 int i; 4146 x3a->ht = (x3node**)&(x3a->tbl[128]); 4147 for(i=0; i<128; i++) x3a->ht[i] = 0; 4148 } 4149 } 4150 } 4151 /* Insert a new record into the array. Return TRUE if successful. 4152 ** Prior data with the same key is NOT overwritten */ 4153 int State_insert(data,key) 4154 struct state *data; 4155 struct config *key; 4156 { 4157 x3node *np; 4158 int h; 4159 int ph; 4160 4161 if( x3a==0 ) return 0; 4162 ph = statehash(key); 4163 h = ph & (x3a->size-1); 4164 np = x3a->ht[h]; 4165 while( np ){ 4166 if( statecmp(np->key,key)==0 ){ 4167 /* An existing entry with the same key is found. */ 4168 /* Fail because overwrite is not allows. */ 4169 return 0; 4170 } 4171 np = np->next; 4172 } 4173 if( x3a->count>=x3a->size ){ 4174 /* Need to make the hash table bigger */ 4175 int i,size; 4176 struct s_x3 array; 4177 array.size = size = x3a->size*2; 4178 array.count = x3a->count; 4179 array.tbl = (x3node*)malloc( 4180 (sizeof(x3node) + sizeof(x3node*))*size ); 4181 if( array.tbl==0 ) return 0; /* Fail due to malloc failure */ 4182 array.ht = (x3node**)&(array.tbl[size]); 4183 for(i=0; i<size; i++) array.ht[i] = 0; 4184 for(i=0; i<x3a->count; i++){ 4185 x3node *oldnp, *newnp; 4186 oldnp = &(x3a->tbl[i]); 4187 h = statehash(oldnp->key) & (size-1); 4188 newnp = &(array.tbl[i]); 4189 if( array.ht[h] ) array.ht[h]->from = &(newnp->next); 4190 newnp->next = array.ht[h]; 4191 newnp->key = oldnp->key; 4192 newnp->data = oldnp->data; 4193 newnp->from = &(array.ht[h]); 4194 array.ht[h] = newnp; 4195 } 4196 free(x3a->tbl); 4197 *x3a = array; 4198 } 4199 /* Insert the new data */ 4200 h = ph & (x3a->size-1); 4201 np = &(x3a->tbl[x3a->count++]); 4202 np->key = key; 4203 np->data = data; 4204 if( x3a->ht[h] ) x3a->ht[h]->from = &(np->next); 4205 np->next = x3a->ht[h]; 4206 x3a->ht[h] = np; 4207 np->from = &(x3a->ht[h]); 4208 return 1; 4209 } 4210 4211 /* Return a pointer to data assigned to the given key. Return NULL 4212 ** if no such key. */ 4213 struct state *State_find(key) 4214 struct config *key; 4215 { 4216 int h; 4217 x3node *np; 4218 4219 if( x3a==0 ) return 0; 4220 h = statehash(key) & (x3a->size-1); 4221 np = x3a->ht[h]; 4222 while( np ){ 4223 if( statecmp(np->key,key)==0 ) break; 4224 np = np->next; 4225 } 4226 return np ? np->data : 0; 4227 } 4228 4229 /* Return an array of pointers to all data in the table. 4230 ** The array is obtained from malloc. Return NULL if memory allocation 4231 ** problems, or if the array is empty. */ 4232 struct state **State_arrayof() 4233 { 4234 struct state **array; 4235 int i,size; 4236 if( x3a==0 ) return 0; 4237 size = x3a->count; 4238 array = (struct state **)malloc( sizeof(struct state *)*size ); 4239 if( array ){ 4240 for(i=0; i<size; i++) array[i] = x3a->tbl[i].data; 4241 } 4242 return array; 4243 } 4244 4245 /* Hash a configuration */ 4246 PRIVATE int confighash(a) 4247 struct config *a; 4248 { 4249 int h=0; 4250 h = h*571 + a->rp->index*37 + a->dot; 4251 return h; 4252 } 4253 4254 /* There is one instance of the following structure for each 4255 ** associative array of type "x4". 4256 */ 4257 struct s_x4 { 4258 int size; /* The number of available slots. */ 4259 /* Must be a power of 2 greater than or */ 4260 /* equal to 1 */ 4261 int count; /* Number of currently slots filled */ 4262 struct s_x4node *tbl; /* The data stored here */ 4263 struct s_x4node **ht; /* Hash table for lookups */ 4264 }; 4265 4266 /* There is one instance of this structure for every data element 4267 ** in an associative array of type "x4". 4268 */ 4269 typedef struct s_x4node { 4270 struct config *data; /* The data */ 4271 struct s_x4node *next; /* Next entry with the same hash */ 4272 struct s_x4node **from; /* Previous link */ 4273 } x4node; 4274 4275 /* There is only one instance of the array, which is the following */ 4276 static struct s_x4 *x4a; 4277 4278 /* Allocate a new associative array */ 4279 void Configtable_init(){ 4280 if( x4a ) return; 4281 x4a = (struct s_x4*)malloc( sizeof(struct s_x4) ); 4282 if( x4a ){ 4283 x4a->size = 64; 4284 x4a->count = 0; 4285 x4a->tbl = (x4node*)malloc( 4286 (sizeof(x4node) + sizeof(x4node*))*64 ); 4287 if( x4a->tbl==0 ){ 4288 free(x4a); 4289 x4a = 0; 4290 }else{ 4291 int i; 4292 x4a->ht = (x4node**)&(x4a->tbl[64]); 4293 for(i=0; i<64; i++) x4a->ht[i] = 0; 4294 } 4295 } 4296 } 4297 /* Insert a new record into the array. Return TRUE if successful. 4298 ** Prior data with the same key is NOT overwritten */ 4299 int Configtable_insert(data) 4300 struct config *data; 4301 { 4302 x4node *np; 4303 int h; 4304 int ph; 4305 4306 if( x4a==0 ) return 0; 4307 ph = confighash(data); 4308 h = ph & (x4a->size-1); 4309 np = x4a->ht[h]; 4310 while( np ){ 4311 if( Configcmp(np->data,data)==0 ){ 4312 /* An existing entry with the same key is found. */ 4313 /* Fail because overwrite is not allows. */ 4314 return 0; 4315 } 4316 np = np->next; 4317 } 4318 if( x4a->count>=x4a->size ){ 4319 /* Need to make the hash table bigger */ 4320 int i,size; 4321 struct s_x4 array; 4322 array.size = size = x4a->size*2; 4323 array.count = x4a->count; 4324 array.tbl = (x4node*)malloc( 4325 (sizeof(x4node) + sizeof(x4node*))*size ); 4326 if( array.tbl==0 ) return 0; /* Fail due to malloc failure */ 4327 array.ht = (x4node**)&(array.tbl[size]); 4328 for(i=0; i<size; i++) array.ht[i] = 0; 4329 for(i=0; i<x4a->count; i++){ 4330 x4node *oldnp, *newnp; 4331 oldnp = &(x4a->tbl[i]); 4332 h = confighash(oldnp->data) & (size-1); 4333 newnp = &(array.tbl[i]); 4334 if( array.ht[h] ) array.ht[h]->from = &(newnp->next); 4335 newnp->next = array.ht[h]; 4336 newnp->data = oldnp->data; 4337 newnp->from = &(array.ht[h]); 4338 array.ht[h] = newnp; 4339 } 4340 free(x4a->tbl); 4341 *x4a = array; 4342 } 4343 /* Insert the new data */ 4344 h = ph & (x4a->size-1); 4345 np = &(x4a->tbl[x4a->count++]); 4346 np->data = data; 4347 if( x4a->ht[h] ) x4a->ht[h]->from = &(np->next); 4348 np->next = x4a->ht[h]; 4349 x4a->ht[h] = np; 4350 np->from = &(x4a->ht[h]); 4351 return 1; 4352 } 4353 4354 /* Return a pointer to data assigned to the given key. Return NULL 4355 ** if no such key. */ 4356 struct config *Configtable_find(key) 4357 struct config *key; 4358 { 4359 int h; 4360 x4node *np; 4361 4362 if( x4a==0 ) return 0; 4363 h = confighash(key) & (x4a->size-1); 4364 np = x4a->ht[h]; 4365 while( np ){ 4366 if( Configcmp(np->data,key)==0 ) break; 4367 np = np->next; 4368 } 4369 return np ? np->data : 0; 4370 } 4371 4372 /* Remove all data from the table. Pass each data to the function "f" 4373 ** as it is removed. ("f" may be null to avoid this step.) */ 4374 void Configtable_clear(f) 4375 int(*f)(/* struct config * */); 4376 { 4377 int i; 4378 if( x4a==0 || x4a->count==0 ) return; 4379 if( f ) for(i=0; i<x4a->count; i++) (*f)(x4a->tbl[i].data); 4380 for(i=0; i<x4a->size; i++) x4a->ht[i] = 0; 4381 x4a->count = 0; 4382 return; 4383 } 4384