1 /* 2 * ***************************************************************************** 3 * 4 * SPDX-License-Identifier: BSD-2-Clause 5 * 6 * Copyright (c) 2018-2021 Gavin D. Howard and contributors. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions are met: 10 * 11 * * Redistributions of source code must retain the above copyright notice, this 12 * list of conditions and the following disclaimer. 13 * 14 * * Redistributions in binary form must reproduce the above copyright notice, 15 * this list of conditions and the following disclaimer in the documentation 16 * and/or other materials provided with the distribution. 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 19 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 21 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE 22 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 23 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 24 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 25 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 26 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 27 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 28 * POSSIBILITY OF SUCH DAMAGE. 29 * 30 * ***************************************************************************** 31 * 32 * The parser for bc. 33 * 34 */ 35 36 #if BC_ENABLED 37 38 #include <assert.h> 39 #include <stdbool.h> 40 #include <stdlib.h> 41 #include <string.h> 42 43 #include <setjmp.h> 44 45 #include <bc.h> 46 #include <num.h> 47 #include <vm.h> 48 49 // Before you embark on trying to understand this code, have you read the 50 // Development manual (manuals/development.md) and the comment in include/bc.h 51 // yet? No? Do that first. I'm serious. 52 // 53 // The reason is because this file holds the most sensitive and finicky code in 54 // the entire codebase. Even getting history to work on Windows was nothing 55 // compared to this. This is where dreams go to die, where dragons live, and 56 // from which Ken Thompson himself would flee. 57 58 static void 59 bc_parse_else(BcParse* p); 60 61 static void 62 bc_parse_stmt(BcParse* p); 63 64 static BcParseStatus 65 bc_parse_expr_err(BcParse* p, uint8_t flags, BcParseNext next); 66 67 static void 68 bc_parse_expr_status(BcParse* p, uint8_t flags, BcParseNext next); 69 70 /** 71 * Returns true if an instruction could only have come from a "leaf" expression. 72 * For more on what leaf expressions are, read the comment for BC_PARSE_LEAF(). 73 * @param t The instruction to test. 74 */ 75 static bool 76 bc_parse_inst_isLeaf(BcInst t) 77 { 78 return (t >= BC_INST_NUM && t <= BC_INST_MAXSCALE) || 79 #if BC_ENABLE_EXTRA_MATH 80 t == BC_INST_TRUNC || 81 #endif // BC_ENABLE_EXTRA_MATH 82 t <= BC_INST_DEC; 83 } 84 85 /** 86 * Returns true if the *previous* token was a delimiter. A delimiter is anything 87 * that can legally end a statement. In bc's case, it could be a newline, a 88 * semicolon, and a brace in certain cases. 89 * @param p The parser. 90 * @return True if the token is a legal delimiter. 91 */ 92 static bool 93 bc_parse_isDelimiter(const BcParse* p) 94 { 95 BcLexType t = p->l.t; 96 bool good; 97 98 // If it's an obvious delimiter, say so. 99 if (BC_PARSE_DELIMITER(t)) return true; 100 101 good = false; 102 103 // If the current token is a keyword, then...beware. That means that we need 104 // to check for a "dangling" else, where there was no brace-delimited block 105 // on the previous if. 106 if (t == BC_LEX_KW_ELSE) 107 { 108 size_t i; 109 uint16_t *fptr = NULL, flags = BC_PARSE_FLAG_ELSE; 110 111 // As long as going up the stack is valid for a dangling else, keep on. 112 for (i = 0; i < p->flags.len && BC_PARSE_BLOCK_STMT(flags); ++i) 113 { 114 fptr = bc_vec_item_rev(&p->flags, i); 115 flags = *fptr; 116 117 // If we need a brace and don't have one, then we don't have a 118 // delimiter. 119 if ((flags & BC_PARSE_FLAG_BRACE) && p->l.last != BC_LEX_RBRACE) 120 { 121 return false; 122 } 123 } 124 125 // Oh, and we had also better have an if statement somewhere. 126 good = ((flags & BC_PARSE_FLAG_IF) != 0); 127 } 128 else if (t == BC_LEX_RBRACE) 129 { 130 size_t i; 131 132 // Since we have a brace, we need to just check if a brace was needed. 133 for (i = 0; !good && i < p->flags.len; ++i) 134 { 135 uint16_t* fptr = bc_vec_item_rev(&p->flags, i); 136 good = (((*fptr) & BC_PARSE_FLAG_BRACE) != 0); 137 } 138 } 139 140 return good; 141 } 142 143 /** 144 * Returns true if we are in top level of a function body. The POSIX grammar 145 * is defined such that anything is allowed after a function body, so we must 146 * use this function to detect that case when ending a function body. 147 * @param p The parser. 148 * @return True if we are in the top level of parsing a function body. 149 */ 150 static bool 151 bc_parse_TopFunc(const BcParse* p) 152 { 153 bool good = p->flags.len == 2; 154 155 uint16_t val = BC_PARSE_FLAG_BRACE | BC_PARSE_FLAG_FUNC_INNER; 156 val |= BC_PARSE_FLAG_FUNC; 157 158 return good && BC_PARSE_TOP_FLAG(p) == val; 159 } 160 161 /** 162 * Sets a previously defined exit label. What are labels? See the bc Parsing 163 * section of the Development manual (manuals/development.md). 164 * @param p The parser. 165 */ 166 static void 167 bc_parse_setLabel(BcParse* p) 168 { 169 BcFunc* func = p->func; 170 BcInstPtr* ip = bc_vec_top(&p->exits); 171 size_t* label; 172 173 assert(func == bc_vec_item(&p->prog->fns, p->fidx)); 174 175 // Set the preallocated label to the correct index. 176 label = bc_vec_item(&func->labels, ip->idx); 177 *label = func->code.len; 178 179 // Now, we don't need the exit label; it is done. 180 bc_vec_pop(&p->exits); 181 } 182 183 /** 184 * Creates a label and sets it to idx. If this is an exit label, then idx is 185 * actually invalid, but it doesn't matter because it will be fixed by 186 * bc_parse_setLabel() later. 187 * @param p The parser. 188 * @param idx The index of the label. 189 */ 190 static void 191 bc_parse_createLabel(BcParse* p, size_t idx) 192 { 193 bc_vec_push(&p->func->labels, &idx); 194 } 195 196 /** 197 * Creates a conditional label. Unlike an exit label, this label is set at 198 * creation time because it comes *before* the code that will target it. 199 * @param p The parser. 200 * @param idx The index of the label. 201 */ 202 static void 203 bc_parse_createCondLabel(BcParse* p, size_t idx) 204 { 205 bc_parse_createLabel(p, p->func->code.len); 206 bc_vec_push(&p->conds, &idx); 207 } 208 209 /* 210 * Creates an exit label to be filled in later by bc_parse_setLabel(). Also, why 211 * create a label to be filled in later? Because exit labels are meant to be 212 * targeted by code that comes *before* the label. Since we have to parse that 213 * code first, and don't know how long it will be, we need to just make sure to 214 * reserve a slot to be filled in later when we know. 215 * 216 * By the way, this uses BcInstPtr because it was convenient. The field idx 217 * holds the index, and the field func holds the loop boolean. 218 * 219 * @param p The parser. 220 * @param idx The index of the label's position. 221 * @param loop True if the exit label is for a loop or not. 222 */ 223 static void 224 bc_parse_createExitLabel(BcParse* p, size_t idx, bool loop) 225 { 226 BcInstPtr ip; 227 228 assert(p->func == bc_vec_item(&p->prog->fns, p->fidx)); 229 230 ip.func = loop; 231 ip.idx = idx; 232 ip.len = 0; 233 234 bc_vec_push(&p->exits, &ip); 235 bc_parse_createLabel(p, SIZE_MAX); 236 } 237 238 /** 239 * Pops the correct operators off of the operator stack based on the current 240 * operator. This is because of the Shunting-Yard algorithm. Lower prec means 241 * higher precedence. 242 * @param p The parser. 243 * @param type The operator. 244 * @param start The previous start of the operator stack. For more 245 * information, see the bc Parsing section of the Development 246 * manual (manuals/development.md). 247 * @param nexprs A pointer to the current number of expressions that have not 248 * been consumed yet. This is an IN and OUT parameter. 249 */ 250 static void 251 bc_parse_operator(BcParse* p, BcLexType type, size_t start, size_t* nexprs) 252 { 253 BcLexType t; 254 uchar l, r = BC_PARSE_OP_PREC(type); 255 uchar left = BC_PARSE_OP_LEFT(type); 256 257 // While we haven't hit the stop point yet. 258 while (p->ops.len > start) 259 { 260 // Get the top operator. 261 t = BC_PARSE_TOP_OP(p); 262 263 // If it's a right paren, we have reached the end of whatever expression 264 // this is no matter what. 265 if (t == BC_LEX_LPAREN) break; 266 267 // Break for precedence. Precedence operates differently on left and 268 // right associativity, by the way. A left associative operator that 269 // matches the current precedence should take priority, but a right 270 // associative operator should not. 271 l = BC_PARSE_OP_PREC(t); 272 if (l >= r && (l != r || !left)) break; 273 274 // Do the housekeeping. In particular, make sure to note that one 275 // expression was consumed. (Two were, but another was added.) 276 bc_parse_push(p, BC_PARSE_TOKEN_INST(t)); 277 bc_vec_pop(&p->ops); 278 *nexprs -= !BC_PARSE_OP_PREFIX(t); 279 } 280 281 bc_vec_push(&p->ops, &type); 282 } 283 284 /** 285 * Parses a right paren. In the Shunting-Yard algorithm, it needs to be put on 286 * the operator stack. But before that, it needs to consume whatever operators 287 * there are until it hits a left paren. 288 * @param p The parser. 289 * @param nexprs A pointer to the current number of expressions that have not 290 * been consumed yet. This is an IN and OUT parameter. 291 */ 292 static void 293 bc_parse_rightParen(BcParse* p, size_t* nexprs) 294 { 295 BcLexType top; 296 297 // Consume operators until a left paren. 298 while ((top = BC_PARSE_TOP_OP(p)) != BC_LEX_LPAREN) 299 { 300 bc_parse_push(p, BC_PARSE_TOKEN_INST(top)); 301 bc_vec_pop(&p->ops); 302 *nexprs -= !BC_PARSE_OP_PREFIX(top); 303 } 304 305 // We need to pop the left paren as well. 306 bc_vec_pop(&p->ops); 307 308 // Oh, and we also want the next token. 309 bc_lex_next(&p->l); 310 } 311 312 /** 313 * Parses function arguments. 314 * @param p The parser. 315 * @param flags Flags restricting what kind of expressions the arguments can 316 * be. 317 */ 318 static void 319 bc_parse_args(BcParse* p, uint8_t flags) 320 { 321 bool comma = false; 322 size_t nargs; 323 324 bc_lex_next(&p->l); 325 326 // Print and comparison operators not allowed. Well, comparison operators 327 // only for POSIX. But we do allow arrays, and we *must* get a value. 328 flags &= ~(BC_PARSE_PRINT | BC_PARSE_REL); 329 flags |= (BC_PARSE_ARRAY | BC_PARSE_NEEDVAL); 330 331 // Count the arguments and parse them. 332 for (nargs = 0; p->l.t != BC_LEX_RPAREN; ++nargs) 333 { 334 bc_parse_expr_status(p, flags, bc_parse_next_arg); 335 336 comma = (p->l.t == BC_LEX_COMMA); 337 if (comma) bc_lex_next(&p->l); 338 } 339 340 // An ending comma is FAIL. 341 if (BC_ERR(comma)) bc_parse_err(p, BC_ERR_PARSE_TOKEN); 342 343 // Now do the call with the number of arguments. 344 bc_parse_push(p, BC_INST_CALL); 345 bc_parse_pushIndex(p, nargs); 346 } 347 348 /** 349 * Parses a function call. 350 * @param p The parser. 351 * @param flags Flags restricting what kind of expressions the arguments can 352 * be. 353 */ 354 static void 355 bc_parse_call(BcParse* p, const char* name, uint8_t flags) 356 { 357 size_t idx; 358 359 bc_parse_args(p, flags); 360 361 // We just assert this because bc_parse_args() should 362 // ensure that the next token is what it should be. 363 assert(p->l.t == BC_LEX_RPAREN); 364 365 // We cannot use bc_program_insertFunc() here 366 // because it will overwrite an existing function. 367 idx = bc_map_index(&p->prog->fn_map, name); 368 369 // The function does not exist yet. Create a space for it. If the user does 370 // not define it, it's a *runtime* error, not a parse error. 371 if (idx == BC_VEC_INVALID_IDX) 372 { 373 idx = bc_program_insertFunc(p->prog, name); 374 375 assert(idx != BC_VEC_INVALID_IDX); 376 377 // Make sure that this pointer was not invalidated. 378 p->func = bc_vec_item(&p->prog->fns, p->fidx); 379 } 380 // The function exists, so set the right function index. 381 else idx = ((BcId*) bc_vec_item(&p->prog->fn_map, idx))->idx; 382 383 bc_parse_pushIndex(p, idx); 384 385 // Make sure to get the next token. 386 bc_lex_next(&p->l); 387 } 388 389 /** 390 * Parses a name/identifier-based expression. It could be a variable, an array 391 * element, an array itself (for function arguments), a function call, etc. 392 * 393 */ 394 static void 395 bc_parse_name(BcParse* p, BcInst* type, bool* can_assign, uint8_t flags) 396 { 397 char* name; 398 399 BC_SIG_ASSERT_LOCKED; 400 401 // We want a copy of the name since the lexer might overwrite its copy. 402 name = bc_vm_strdup(p->l.str.v); 403 404 BC_SETJMP_LOCKED(err); 405 406 // We need the next token to see if it's just a variable or something more. 407 bc_lex_next(&p->l); 408 409 // Array element or array. 410 if (p->l.t == BC_LEX_LBRACKET) 411 { 412 bc_lex_next(&p->l); 413 414 // Array only. This has to be a function parameter. 415 if (p->l.t == BC_LEX_RBRACKET) 416 { 417 // Error if arrays are not allowed. 418 if (BC_ERR(!(flags & BC_PARSE_ARRAY))) 419 { 420 bc_parse_err(p, BC_ERR_PARSE_EXPR); 421 } 422 423 *type = BC_INST_ARRAY; 424 *can_assign = false; 425 } 426 else 427 { 428 // If we are here, we have an array element. We need to set the 429 // expression parsing flags. 430 uint8_t flags2 = (flags & ~(BC_PARSE_PRINT | BC_PARSE_REL)) | 431 BC_PARSE_NEEDVAL; 432 433 bc_parse_expr_status(p, flags2, bc_parse_next_elem); 434 435 // The next token *must* be a right bracket. 436 if (BC_ERR(p->l.t != BC_LEX_RBRACKET)) 437 { 438 bc_parse_err(p, BC_ERR_PARSE_TOKEN); 439 } 440 441 *type = BC_INST_ARRAY_ELEM; 442 *can_assign = true; 443 } 444 445 // Make sure to get the next token. 446 bc_lex_next(&p->l); 447 448 // Push the instruction and the name of the identifier. 449 bc_parse_push(p, *type); 450 bc_parse_pushName(p, name, false); 451 } 452 else if (p->l.t == BC_LEX_LPAREN) 453 { 454 // We are parsing a function call; error if not allowed. 455 if (BC_ERR(flags & BC_PARSE_NOCALL)) 456 { 457 bc_parse_err(p, BC_ERR_PARSE_TOKEN); 458 } 459 460 *type = BC_INST_CALL; 461 *can_assign = false; 462 463 bc_parse_call(p, name, flags); 464 } 465 else 466 { 467 // Just a variable. 468 *type = BC_INST_VAR; 469 *can_assign = true; 470 bc_parse_push(p, BC_INST_VAR); 471 bc_parse_pushName(p, name, true); 472 } 473 474 err: 475 // Need to make sure to unallocate the name. 476 free(name); 477 BC_LONGJMP_CONT; 478 BC_SIG_MAYLOCK; 479 } 480 481 /** 482 * Parses a builtin function that takes no arguments. This includes read(), 483 * rand(), maxibase(), maxobase(), maxscale(), and maxrand(). 484 * @param p The parser. 485 * @param inst The instruction corresponding to the builtin. 486 */ 487 static void 488 bc_parse_noArgBuiltin(BcParse* p, BcInst inst) 489 { 490 // Must have a left paren. 491 bc_lex_next(&p->l); 492 if (BC_ERR(p->l.t != BC_LEX_LPAREN)) bc_parse_err(p, BC_ERR_PARSE_TOKEN); 493 494 // Must have a right paren. 495 bc_lex_next(&p->l); 496 if ((p->l.t != BC_LEX_RPAREN)) bc_parse_err(p, BC_ERR_PARSE_TOKEN); 497 498 bc_parse_push(p, inst); 499 500 bc_lex_next(&p->l); 501 } 502 503 /** 504 * Parses a builtin function that takes 1 argument. This includes length(), 505 * sqrt(), abs(), scale(), and irand(). 506 * @param p The parser. 507 * @param type The lex token. 508 * @param flags The expression parsing flags for parsing the argument. 509 * @param prev An out parameter; the previous instruction pointer. 510 */ 511 static void 512 bc_parse_builtin(BcParse* p, BcLexType type, uint8_t flags, BcInst* prev) 513 { 514 // Must have a left paren. 515 bc_lex_next(&p->l); 516 if (BC_ERR(p->l.t != BC_LEX_LPAREN)) bc_parse_err(p, BC_ERR_PARSE_TOKEN); 517 518 bc_lex_next(&p->l); 519 520 // Change the flags as needed for parsing the argument. 521 flags &= ~(BC_PARSE_PRINT | BC_PARSE_REL); 522 flags |= BC_PARSE_NEEDVAL; 523 524 // Since length can take arrays, we need to specially add that flag. 525 if (type == BC_LEX_KW_LENGTH) flags |= BC_PARSE_ARRAY; 526 527 bc_parse_expr_status(p, flags, bc_parse_next_rel); 528 529 // Must have a right paren. 530 if (BC_ERR(p->l.t != BC_LEX_RPAREN)) bc_parse_err(p, BC_ERR_PARSE_TOKEN); 531 532 // Adjust previous based on the token and push it. 533 *prev = type - BC_LEX_KW_LENGTH + BC_INST_LENGTH; 534 bc_parse_push(p, *prev); 535 536 bc_lex_next(&p->l); 537 } 538 539 /** 540 * Parses a builtin function that takes 3 arguments. This includes modexp() and 541 * divmod(). 542 */ 543 static void 544 bc_parse_builtin3(BcParse* p, BcLexType type, uint8_t flags, BcInst* prev) 545 { 546 assert(type == BC_LEX_KW_MODEXP || type == BC_LEX_KW_DIVMOD); 547 548 // Must have a left paren. 549 bc_lex_next(&p->l); 550 if (BC_ERR(p->l.t != BC_LEX_LPAREN)) bc_parse_err(p, BC_ERR_PARSE_TOKEN); 551 552 bc_lex_next(&p->l); 553 554 // Change the flags as needed for parsing the argument. 555 flags &= ~(BC_PARSE_PRINT | BC_PARSE_REL); 556 flags |= BC_PARSE_NEEDVAL; 557 558 bc_parse_expr_status(p, flags, bc_parse_next_builtin); 559 560 // Must have a comma. 561 if (BC_ERR(p->l.t != BC_LEX_COMMA)) bc_parse_err(p, BC_ERR_PARSE_TOKEN); 562 563 bc_lex_next(&p->l); 564 565 bc_parse_expr_status(p, flags, bc_parse_next_builtin); 566 567 // Must have a comma. 568 if (BC_ERR(p->l.t != BC_LEX_COMMA)) bc_parse_err(p, BC_ERR_PARSE_TOKEN); 569 570 bc_lex_next(&p->l); 571 572 // If it is a divmod, parse an array name. Otherwise, just parse another 573 // expression. 574 if (type == BC_LEX_KW_DIVMOD) 575 { 576 // Must have a name. 577 if (BC_ERR(p->l.t != BC_LEX_NAME)) bc_parse_err(p, BC_ERR_PARSE_TOKEN); 578 579 // This is safe because the next token should not overwrite the name. 580 bc_lex_next(&p->l); 581 582 // Must have a left bracket. 583 if (BC_ERR(p->l.t != BC_LEX_LBRACKET)) 584 { 585 bc_parse_err(p, BC_ERR_PARSE_TOKEN); 586 } 587 588 // This is safe because the next token should not overwrite the name. 589 bc_lex_next(&p->l); 590 591 // Must have a right bracket. 592 if (BC_ERR(p->l.t != BC_LEX_RBRACKET)) 593 { 594 bc_parse_err(p, BC_ERR_PARSE_TOKEN); 595 } 596 597 // This is safe because the next token should not overwrite the name. 598 bc_lex_next(&p->l); 599 } 600 else bc_parse_expr_status(p, flags, bc_parse_next_rel); 601 602 // Must have a right paren. 603 if (BC_ERR(p->l.t != BC_LEX_RPAREN)) bc_parse_err(p, BC_ERR_PARSE_TOKEN); 604 605 // Adjust previous based on the token and push it. 606 *prev = type - BC_LEX_KW_MODEXP + BC_INST_MODEXP; 607 bc_parse_push(p, *prev); 608 609 // If we have divmod, we need to assign the modulus to the array element, so 610 // we need to push the instructions for doing so. 611 if (type == BC_LEX_KW_DIVMOD) 612 { 613 // The zeroth element. 614 bc_parse_push(p, BC_INST_ZERO); 615 bc_parse_push(p, BC_INST_ARRAY_ELEM); 616 617 // Push the array. 618 bc_parse_pushName(p, p->l.str.v, false); 619 620 // Swap them and assign. After this, the top item on the stack should 621 // be the quotient. 622 bc_parse_push(p, BC_INST_SWAP); 623 bc_parse_push(p, BC_INST_ASSIGN_NO_VAL); 624 } 625 626 bc_lex_next(&p->l); 627 } 628 629 /** 630 * Parses the scale keyword. This is special because scale can be a value or a 631 * builtin function. 632 * @param p The parser. 633 * @param type An out parameter; the instruction for the parse. 634 * @param can_assign An out parameter; whether the expression can be assigned 635 * to. 636 * @param flags The expression parsing flags for parsing a scale() arg. 637 */ 638 static void 639 bc_parse_scale(BcParse* p, BcInst* type, bool* can_assign, uint8_t flags) 640 { 641 bc_lex_next(&p->l); 642 643 // Without the left paren, it's just the keyword. 644 if (p->l.t != BC_LEX_LPAREN) 645 { 646 // Set, push, and return. 647 *type = BC_INST_SCALE; 648 *can_assign = true; 649 bc_parse_push(p, BC_INST_SCALE); 650 return; 651 } 652 653 // Handle the scale function. 654 *type = BC_INST_SCALE_FUNC; 655 *can_assign = false; 656 657 // Once again, adjust the flags. 658 flags &= ~(BC_PARSE_PRINT | BC_PARSE_REL); 659 flags |= BC_PARSE_NEEDVAL; 660 661 bc_lex_next(&p->l); 662 663 bc_parse_expr_status(p, flags, bc_parse_next_rel); 664 665 // Must have a right paren. 666 if (BC_ERR(p->l.t != BC_LEX_RPAREN)) bc_parse_err(p, BC_ERR_PARSE_TOKEN); 667 668 bc_parse_push(p, BC_INST_SCALE_FUNC); 669 670 bc_lex_next(&p->l); 671 } 672 673 /** 674 * Parses and increment or decrement operator. This is a bit complex. 675 * @param p The parser. 676 * @param prev An out parameter; the previous instruction pointer. 677 * @param can_assign An out parameter; whether the expression can be assigned 678 * to. 679 * @param nexs An in/out parameter; the number of expressions in the 680 * parse tree that are not used. 681 * @param flags The expression parsing flags for parsing a scale() arg. 682 */ 683 static void 684 bc_parse_incdec(BcParse* p, BcInst* prev, bool* can_assign, size_t* nexs, 685 uint8_t flags) 686 { 687 BcLexType type; 688 uchar inst; 689 BcInst etype = *prev; 690 BcLexType last = p->l.last; 691 692 assert(prev != NULL && can_assign != NULL); 693 694 // If we can't assign to the previous token, then we have an error. 695 if (BC_ERR(last == BC_LEX_OP_INC || last == BC_LEX_OP_DEC || 696 last == BC_LEX_RPAREN)) 697 { 698 bc_parse_err(p, BC_ERR_PARSE_ASSIGN); 699 } 700 701 // Is the previous instruction for a variable? 702 if (BC_PARSE_INST_VAR(etype)) 703 { 704 // If so, this is a postfix operator. 705 if (!*can_assign) bc_parse_err(p, BC_ERR_PARSE_ASSIGN); 706 707 // Only postfix uses BC_INST_INC and BC_INST_DEC. 708 *prev = inst = BC_INST_INC + (p->l.t != BC_LEX_OP_INC); 709 bc_parse_push(p, inst); 710 bc_lex_next(&p->l); 711 *can_assign = false; 712 } 713 else 714 { 715 // This is a prefix operator. In that case, we just convert it to 716 // an assignment instruction. 717 *prev = inst = BC_INST_ASSIGN_PLUS + (p->l.t != BC_LEX_OP_INC); 718 719 bc_lex_next(&p->l); 720 type = p->l.t; 721 722 // Because we parse the next part of the expression 723 // right here, we need to increment this. 724 *nexs = *nexs + 1; 725 726 // Is the next token a normal identifier? 727 if (type == BC_LEX_NAME) 728 { 729 // Parse the name. 730 uint8_t flags2 = flags & ~BC_PARSE_ARRAY; 731 bc_parse_name(p, prev, can_assign, flags2 | BC_PARSE_NOCALL); 732 } 733 // Is the next token a global? 734 else if (type >= BC_LEX_KW_LAST && type <= BC_LEX_KW_OBASE) 735 { 736 bc_parse_push(p, type - BC_LEX_KW_LAST + BC_INST_LAST); 737 bc_lex_next(&p->l); 738 } 739 // Is the next token specifically scale, which needs special treatment? 740 else if (BC_NO_ERR(type == BC_LEX_KW_SCALE)) 741 { 742 bc_lex_next(&p->l); 743 744 // Check that scale() was not used. 745 if (BC_ERR(p->l.t == BC_LEX_LPAREN)) 746 { 747 bc_parse_err(p, BC_ERR_PARSE_TOKEN); 748 } 749 else bc_parse_push(p, BC_INST_SCALE); 750 } 751 // Now we know we have an error. 752 else bc_parse_err(p, BC_ERR_PARSE_TOKEN); 753 754 *can_assign = false; 755 756 bc_parse_push(p, BC_INST_ONE); 757 bc_parse_push(p, inst); 758 } 759 } 760 761 /** 762 * Parses the minus operator. This needs special treatment because it is either 763 * subtract or negation. 764 * @param p The parser. 765 * @param prev An in/out parameter; the previous instruction. 766 * @param ops_bgn The size of the operator stack. 767 * @param rparen True if the last token was a right paren. 768 * @param binlast True if the last token was a binary operator. 769 * @param nexprs An in/out parameter; the number of unused expressions. 770 */ 771 static void 772 bc_parse_minus(BcParse* p, BcInst* prev, size_t ops_bgn, bool rparen, 773 bool binlast, size_t* nexprs) 774 { 775 BcLexType type; 776 777 bc_lex_next(&p->l); 778 779 // Figure out if it's a minus or a negation. 780 type = BC_PARSE_LEAF(*prev, binlast, rparen) ? BC_LEX_OP_MINUS : BC_LEX_NEG; 781 *prev = BC_PARSE_TOKEN_INST(type); 782 783 // We can just push onto the op stack because this is the largest 784 // precedence operator that gets pushed. Inc/dec does not. 785 if (type != BC_LEX_OP_MINUS) bc_vec_push(&p->ops, &type); 786 else bc_parse_operator(p, type, ops_bgn, nexprs); 787 } 788 789 /** 790 * Parses a string. 791 * @param p The parser. 792 * @param inst The instruction corresponding to how the string was found and 793 * how it should be printed. 794 */ 795 static void 796 bc_parse_str(BcParse* p, BcInst inst) 797 { 798 bc_parse_addString(p); 799 bc_parse_push(p, inst); 800 bc_lex_next(&p->l); 801 } 802 803 /** 804 * Parses a print statement. 805 * @param p The parser. 806 */ 807 static void 808 bc_parse_print(BcParse* p, BcLexType type) 809 { 810 BcLexType t; 811 bool comma = false; 812 BcInst inst = type == BC_LEX_KW_STREAM ? BC_INST_PRINT_STREAM : 813 BC_INST_PRINT_POP; 814 815 bc_lex_next(&p->l); 816 817 t = p->l.t; 818 819 // A print or stream statement has to have *something*. 820 if (bc_parse_isDelimiter(p)) bc_parse_err(p, BC_ERR_PARSE_PRINT); 821 822 do 823 { 824 // If the token is a string, then print it with escapes. 825 // BC_INST_PRINT_POP plays that role for bc. 826 if (t == BC_LEX_STR) bc_parse_str(p, inst); 827 else 828 { 829 // We have an actual number; parse and add a print instruction. 830 bc_parse_expr_status(p, BC_PARSE_NEEDVAL, bc_parse_next_print); 831 bc_parse_push(p, inst); 832 } 833 834 // Is the next token a comma? 835 comma = (p->l.t == BC_LEX_COMMA); 836 837 // Get the next token if we have a comma. 838 if (comma) bc_lex_next(&p->l); 839 else 840 { 841 // If we don't have a comma, the statement needs to end. 842 if (!bc_parse_isDelimiter(p)) bc_parse_err(p, BC_ERR_PARSE_TOKEN); 843 else break; 844 } 845 846 t = p->l.t; 847 } 848 while (true); 849 850 // If we have a comma but no token, that's bad. 851 if (BC_ERR(comma)) bc_parse_err(p, BC_ERR_PARSE_TOKEN); 852 } 853 854 /** 855 * Parses a return statement. 856 * @param p The parser. 857 */ 858 static void 859 bc_parse_return(BcParse* p) 860 { 861 BcLexType t; 862 bool paren; 863 uchar inst = BC_INST_RET0; 864 865 // If we are not in a function, that's an error. 866 if (BC_ERR(!BC_PARSE_FUNC(p))) bc_parse_err(p, BC_ERR_PARSE_TOKEN); 867 868 // If we are in a void function, make sure to return void. 869 if (p->func->voidfn) inst = BC_INST_RET_VOID; 870 871 bc_lex_next(&p->l); 872 873 t = p->l.t; 874 paren = (t == BC_LEX_LPAREN); 875 876 // An empty return statement just needs to push the selected instruction. 877 if (bc_parse_isDelimiter(p)) bc_parse_push(p, inst); 878 else 879 { 880 BcParseStatus s; 881 882 // Need to parse the expression whose value will be returned. 883 s = bc_parse_expr_err(p, BC_PARSE_NEEDVAL, bc_parse_next_expr); 884 885 // If the expression was empty, just push the selected instruction. 886 if (s == BC_PARSE_STATUS_EMPTY_EXPR) 887 { 888 bc_parse_push(p, inst); 889 bc_lex_next(&p->l); 890 } 891 892 // POSIX requires parentheses. 893 if (!paren || p->l.last != BC_LEX_RPAREN) 894 { 895 bc_parse_err(p, BC_ERR_POSIX_RET); 896 } 897 898 // Void functions require an empty expression. 899 if (BC_ERR(p->func->voidfn)) 900 { 901 if (s != BC_PARSE_STATUS_EMPTY_EXPR) 902 { 903 bc_parse_verr(p, BC_ERR_PARSE_RET_VOID, p->func->name); 904 } 905 } 906 // If we got here, we want to be sure to end the function with a real 907 // return instruction, just in case. 908 else bc_parse_push(p, BC_INST_RET); 909 } 910 } 911 912 /** 913 * Clears flags that indicate the end of an if statement and its block and sets 914 * the jump location. 915 * @param p The parser. 916 */ 917 static void 918 bc_parse_noElse(BcParse* p) 919 { 920 uint16_t* flag_ptr = BC_PARSE_TOP_FLAG_PTR(p); 921 *flag_ptr = (*flag_ptr & ~(BC_PARSE_FLAG_IF_END)); 922 bc_parse_setLabel(p); 923 } 924 925 /** 926 * Ends (finishes parsing) the body of a control statement or a function. 927 * @param p The parser. 928 * @param brace True if the body was ended by a brace, false otherwise. 929 */ 930 static void 931 bc_parse_endBody(BcParse* p, bool brace) 932 { 933 bool has_brace, new_else = false; 934 935 // We cannot be ending a body if there are no bodies to end. 936 if (BC_ERR(p->flags.len <= 1)) bc_parse_err(p, BC_ERR_PARSE_TOKEN); 937 938 if (brace) 939 { 940 // The brace was already gotten; make sure that the caller did not lie. 941 // We check for the requirement of braces later. 942 assert(p->l.t == BC_LEX_RBRACE); 943 944 bc_lex_next(&p->l); 945 946 // If the next token is not a delimiter, that is a problem. 947 if (BC_ERR(!bc_parse_isDelimiter(p) && !bc_parse_TopFunc(p))) 948 { 949 bc_parse_err(p, BC_ERR_PARSE_TOKEN); 950 } 951 } 952 953 // Do we have a brace flag? 954 has_brace = (BC_PARSE_BRACE(p) != 0); 955 956 do 957 { 958 size_t len = p->flags.len; 959 bool loop; 960 961 // If we have a brace flag but not a brace, that's a problem. 962 if (has_brace && !brace) bc_parse_err(p, BC_ERR_PARSE_TOKEN); 963 964 // Are we inside a loop? 965 loop = (BC_PARSE_LOOP_INNER(p) != 0); 966 967 // If we are ending a loop or an else... 968 if (loop || BC_PARSE_ELSE(p)) 969 { 970 // Loops have condition labels that we have to take care of as well. 971 if (loop) 972 { 973 size_t* label = bc_vec_top(&p->conds); 974 975 bc_parse_push(p, BC_INST_JUMP); 976 bc_parse_pushIndex(p, *label); 977 978 bc_vec_pop(&p->conds); 979 } 980 981 bc_parse_setLabel(p); 982 bc_vec_pop(&p->flags); 983 } 984 // If we are ending a function... 985 else if (BC_PARSE_FUNC_INNER(p)) 986 { 987 BcInst inst = (p->func->voidfn ? BC_INST_RET_VOID : BC_INST_RET0); 988 bc_parse_push(p, inst); 989 bc_parse_updateFunc(p, BC_PROG_MAIN); 990 bc_vec_pop(&p->flags); 991 } 992 // If we have a brace flag and not an if statement, we can pop the top 993 // of the flags stack because they have been taken care of above. 994 else if (has_brace && !BC_PARSE_IF(p)) bc_vec_pop(&p->flags); 995 996 // This needs to be last to parse nested if's properly. 997 if (BC_PARSE_IF(p) && (len == p->flags.len || !BC_PARSE_BRACE(p))) 998 { 999 // Eat newlines. 1000 while (p->l.t == BC_LEX_NLINE) 1001 { 1002 bc_lex_next(&p->l); 1003 } 1004 1005 // *Now* we can pop the flags. 1006 bc_vec_pop(&p->flags); 1007 1008 // If we are allowed non-POSIX stuff... 1009 if (!BC_S) 1010 { 1011 // Have we found yet another dangling else? 1012 *(BC_PARSE_TOP_FLAG_PTR(p)) |= BC_PARSE_FLAG_IF_END; 1013 new_else = (p->l.t == BC_LEX_KW_ELSE); 1014 1015 // Parse the else or end the if statement body. 1016 if (new_else) bc_parse_else(p); 1017 else if (!has_brace && (!BC_PARSE_IF_END(p) || brace)) 1018 { 1019 bc_parse_noElse(p); 1020 } 1021 } 1022 // POSIX requires us to do the bare minimum only. 1023 else bc_parse_noElse(p); 1024 } 1025 1026 // If these are both true, we have "used" the braces that we found. 1027 if (brace && has_brace) brace = false; 1028 } 1029 // This condition was perhaps the hardest single part of the parser. If 1030 // the flags stack does not have enough, we should stop. If we have a 1031 // new else statement, we should stop. If we do have the end of an if 1032 // statement and we have eaten the brace, we should stop. If we do have 1033 // a brace flag, we should stop. 1034 while (p->flags.len > 1 && !new_else && (!BC_PARSE_IF_END(p) || brace) && 1035 !(has_brace = (BC_PARSE_BRACE(p) != 0))); 1036 1037 // If we have a brace, yet no body for it, that's a problem. 1038 if (BC_ERR(p->flags.len == 1 && brace)) bc_parse_err(p, BC_ERR_PARSE_TOKEN); 1039 else if (brace && BC_PARSE_BRACE(p)) 1040 { 1041 // If we make it here, we have a brace and a flag for it. 1042 uint16_t flags = BC_PARSE_TOP_FLAG(p); 1043 1044 // This condition ensure that the *last* body is correctly finished by 1045 // popping its flags. 1046 if (!(flags & (BC_PARSE_FLAG_FUNC_INNER | BC_PARSE_FLAG_LOOP_INNER)) && 1047 !(flags & (BC_PARSE_FLAG_IF | BC_PARSE_FLAG_ELSE)) && 1048 !(flags & (BC_PARSE_FLAG_IF_END))) 1049 { 1050 bc_vec_pop(&p->flags); 1051 } 1052 } 1053 } 1054 1055 /** 1056 * Starts the body of a control statement or function. 1057 * @param p The parser. 1058 * @param flags The current flags (will be edited). 1059 */ 1060 static void 1061 bc_parse_startBody(BcParse* p, uint16_t flags) 1062 { 1063 assert(flags); 1064 flags |= (BC_PARSE_TOP_FLAG(p) & (BC_PARSE_FLAG_FUNC | BC_PARSE_FLAG_LOOP)); 1065 flags |= BC_PARSE_FLAG_BODY; 1066 bc_vec_push(&p->flags, &flags); 1067 } 1068 1069 void 1070 bc_parse_endif(BcParse* p) 1071 { 1072 size_t i; 1073 bool good; 1074 1075 // Not a problem if this is true. 1076 if (BC_NO_ERR(!BC_PARSE_NO_EXEC(p))) return; 1077 1078 good = true; 1079 1080 // Find an instance of a body that needs closing, i.e., a statement that did 1081 // not have a right brace when it should have. 1082 for (i = 0; good && i < p->flags.len; ++i) 1083 { 1084 uint16_t flag = *((uint16_t*) bc_vec_item(&p->flags, i)); 1085 good = ((flag & BC_PARSE_FLAG_BRACE) != BC_PARSE_FLAG_BRACE); 1086 } 1087 1088 // If we did not find such an instance... 1089 if (good) 1090 { 1091 // We set this to restore it later. We don't want the parser thinking 1092 // that we are on stdin for this one because it will want more. 1093 bool is_stdin = vm.is_stdin; 1094 1095 vm.is_stdin = false; 1096 1097 // End all of the if statements and loops. 1098 while (p->flags.len > 1 || BC_PARSE_IF_END(p)) 1099 { 1100 if (BC_PARSE_IF_END(p)) bc_parse_noElse(p); 1101 if (p->flags.len > 1) bc_parse_endBody(p, false); 1102 } 1103 1104 vm.is_stdin = is_stdin; 1105 } 1106 // If we reach here, a block was not properly closed, and we should error. 1107 else bc_parse_err(&vm.prs, BC_ERR_PARSE_BLOCK); 1108 } 1109 1110 /** 1111 * Parses an if statement. 1112 * @param p The parser. 1113 */ 1114 static void 1115 bc_parse_if(BcParse* p) 1116 { 1117 // We are allowed relational operators, and we must have a value. 1118 size_t idx; 1119 uint8_t flags = (BC_PARSE_REL | BC_PARSE_NEEDVAL); 1120 1121 // Get the left paren and barf if necessary. 1122 bc_lex_next(&p->l); 1123 if (BC_ERR(p->l.t != BC_LEX_LPAREN)) bc_parse_err(p, BC_ERR_PARSE_TOKEN); 1124 1125 // Parse the condition. 1126 bc_lex_next(&p->l); 1127 bc_parse_expr_status(p, flags, bc_parse_next_rel); 1128 1129 // Must have a right paren. 1130 if (BC_ERR(p->l.t != BC_LEX_RPAREN)) bc_parse_err(p, BC_ERR_PARSE_TOKEN); 1131 1132 bc_lex_next(&p->l); 1133 1134 // Insert the conditional jump instruction. 1135 bc_parse_push(p, BC_INST_JUMP_ZERO); 1136 1137 idx = p->func->labels.len; 1138 1139 // Push the index for the instruction and create an exit label for an else 1140 // statement. 1141 bc_parse_pushIndex(p, idx); 1142 bc_parse_createExitLabel(p, idx, false); 1143 1144 bc_parse_startBody(p, BC_PARSE_FLAG_IF); 1145 } 1146 1147 /** 1148 * Parses an else statement. 1149 * @param p The parser. 1150 */ 1151 static void 1152 bc_parse_else(BcParse* p) 1153 { 1154 size_t idx = p->func->labels.len; 1155 1156 // We must be at the end of an if statement. 1157 if (BC_ERR(!BC_PARSE_IF_END(p))) bc_parse_err(p, BC_ERR_PARSE_TOKEN); 1158 1159 // Push an unconditional jump to make bc jump over the else statement if it 1160 // executed the original if statement. 1161 bc_parse_push(p, BC_INST_JUMP); 1162 bc_parse_pushIndex(p, idx); 1163 1164 // Clear the else stuff. Yes, that function is misnamed for its use here, 1165 // but deal with it. 1166 bc_parse_noElse(p); 1167 1168 // Create the exit label and parse the body. 1169 bc_parse_createExitLabel(p, idx, false); 1170 bc_parse_startBody(p, BC_PARSE_FLAG_ELSE); 1171 1172 bc_lex_next(&p->l); 1173 } 1174 1175 /** 1176 * Parse a while loop. 1177 * @param p The parser. 1178 */ 1179 static void 1180 bc_parse_while(BcParse* p) 1181 { 1182 // We are allowed relational operators, and we must have a value. 1183 size_t idx; 1184 uint8_t flags = (BC_PARSE_REL | BC_PARSE_NEEDVAL); 1185 1186 // Get the left paren and barf if necessary. 1187 bc_lex_next(&p->l); 1188 if (BC_ERR(p->l.t != BC_LEX_LPAREN)) bc_parse_err(p, BC_ERR_PARSE_TOKEN); 1189 bc_lex_next(&p->l); 1190 1191 // Create the labels. Loops need both. 1192 bc_parse_createCondLabel(p, p->func->labels.len); 1193 idx = p->func->labels.len; 1194 bc_parse_createExitLabel(p, idx, true); 1195 1196 // Parse the actual condition and barf on non-right paren. 1197 bc_parse_expr_status(p, flags, bc_parse_next_rel); 1198 if (BC_ERR(p->l.t != BC_LEX_RPAREN)) bc_parse_err(p, BC_ERR_PARSE_TOKEN); 1199 bc_lex_next(&p->l); 1200 1201 // Now we can push the conditional jump and start the body. 1202 bc_parse_push(p, BC_INST_JUMP_ZERO); 1203 bc_parse_pushIndex(p, idx); 1204 bc_parse_startBody(p, BC_PARSE_FLAG_LOOP | BC_PARSE_FLAG_LOOP_INNER); 1205 } 1206 1207 /** 1208 * Parse a for loop. 1209 * @param p The parser. 1210 */ 1211 static void 1212 bc_parse_for(BcParse* p) 1213 { 1214 size_t cond_idx, exit_idx, body_idx, update_idx; 1215 1216 // Barf on the missing left paren. 1217 bc_lex_next(&p->l); 1218 if (BC_ERR(p->l.t != BC_LEX_LPAREN)) bc_parse_err(p, BC_ERR_PARSE_TOKEN); 1219 bc_lex_next(&p->l); 1220 1221 // The first statement can be empty, but if it is, check for error in POSIX 1222 // mode. Otherwise, parse it. 1223 if (p->l.t != BC_LEX_SCOLON) bc_parse_expr_status(p, 0, bc_parse_next_for); 1224 else bc_parse_err(p, BC_ERR_POSIX_FOR); 1225 1226 // Must have a semicolon. 1227 if (BC_ERR(p->l.t != BC_LEX_SCOLON)) bc_parse_err(p, BC_ERR_PARSE_TOKEN); 1228 bc_lex_next(&p->l); 1229 1230 // These are indices for labels. There are so many of them because the end 1231 // of the loop must unconditionally jump to the update code. Then the update 1232 // code must unconditionally jump to the condition code. Then the condition 1233 // code must *conditionally* jump to the exit. 1234 cond_idx = p->func->labels.len; 1235 update_idx = cond_idx + 1; 1236 body_idx = update_idx + 1; 1237 exit_idx = body_idx + 1; 1238 1239 // This creates the condition label. 1240 bc_parse_createLabel(p, p->func->code.len); 1241 1242 // Parse an expression if it exists. 1243 if (p->l.t != BC_LEX_SCOLON) 1244 { 1245 uint8_t flags = (BC_PARSE_REL | BC_PARSE_NEEDVAL); 1246 bc_parse_expr_status(p, flags, bc_parse_next_for); 1247 } 1248 else 1249 { 1250 // Set this for the next call to bc_parse_number because an empty 1251 // condition means that it is an infinite loop, so the condition must be 1252 // non-zero. This is safe to set because the current token is a 1253 // semicolon, which has no string requirement. 1254 bc_vec_string(&p->l.str, sizeof(bc_parse_one) - 1, bc_parse_one); 1255 bc_parse_number(p); 1256 1257 // An empty condition makes POSIX mad. 1258 bc_parse_err(p, BC_ERR_POSIX_FOR); 1259 } 1260 1261 // Must have a semicolon. 1262 if (BC_ERR(p->l.t != BC_LEX_SCOLON)) bc_parse_err(p, BC_ERR_PARSE_TOKEN); 1263 bc_lex_next(&p->l); 1264 1265 // Now we can set up the conditional jump to the exit and an unconditional 1266 // jump to the body right after. The unconditional jump to the body is 1267 // because there is update code coming right after the condition, so we need 1268 // to skip it to get to the body. 1269 bc_parse_push(p, BC_INST_JUMP_ZERO); 1270 bc_parse_pushIndex(p, exit_idx); 1271 bc_parse_push(p, BC_INST_JUMP); 1272 bc_parse_pushIndex(p, body_idx); 1273 1274 // Now create the label for the update code. 1275 bc_parse_createCondLabel(p, update_idx); 1276 1277 // Parse if not empty, and if it is, let POSIX yell if necessary. 1278 if (p->l.t != BC_LEX_RPAREN) bc_parse_expr_status(p, 0, bc_parse_next_rel); 1279 else bc_parse_err(p, BC_ERR_POSIX_FOR); 1280 1281 // Must have a right paren. 1282 if (BC_ERR(p->l.t != BC_LEX_RPAREN)) bc_parse_err(p, BC_ERR_PARSE_TOKEN); 1283 1284 // Set up a jump to the condition right after the update code. 1285 bc_parse_push(p, BC_INST_JUMP); 1286 bc_parse_pushIndex(p, cond_idx); 1287 bc_parse_createLabel(p, p->func->code.len); 1288 1289 // Create an exit label for the body and start the body. 1290 bc_parse_createExitLabel(p, exit_idx, true); 1291 bc_lex_next(&p->l); 1292 bc_parse_startBody(p, BC_PARSE_FLAG_LOOP | BC_PARSE_FLAG_LOOP_INNER); 1293 } 1294 1295 /** 1296 * Parse a statement or token that indicates a loop exit. This includes an 1297 * actual loop exit, the break keyword, or the continue keyword. 1298 * @param p The parser. 1299 * @param type The type of exit. 1300 */ 1301 static void 1302 bc_parse_loopExit(BcParse* p, BcLexType type) 1303 { 1304 size_t i; 1305 BcInstPtr* ip; 1306 1307 // Must have a loop. If we don't, that's an error. 1308 if (BC_ERR(!BC_PARSE_LOOP(p))) bc_parse_err(p, BC_ERR_PARSE_TOKEN); 1309 1310 // If we have a break statement... 1311 if (type == BC_LEX_KW_BREAK) 1312 { 1313 // If there are no exits, something went wrong somewhere. 1314 if (BC_ERR(!p->exits.len)) bc_parse_err(p, BC_ERR_PARSE_TOKEN); 1315 1316 // Get the exit. 1317 i = p->exits.len - 1; 1318 ip = bc_vec_item(&p->exits, i); 1319 1320 // The condition !ip->func is true if the exit is not for a loop, so we 1321 // need to find the first actual loop exit. 1322 while (!ip->func && i < p->exits.len) 1323 { 1324 ip = bc_vec_item(&p->exits, i); 1325 i -= 1; 1326 } 1327 1328 // Make sure everything is hunky dory. 1329 assert(ip != NULL && (i < p->exits.len || ip->func)); 1330 1331 // Set the index for the exit. 1332 i = ip->idx; 1333 } 1334 // If we have a continue statement or just the loop end, jump to the 1335 // condition (or update for a foor loop). 1336 else i = *((size_t*) bc_vec_top(&p->conds)); 1337 1338 // Add the unconditional jump. 1339 bc_parse_push(p, BC_INST_JUMP); 1340 bc_parse_pushIndex(p, i); 1341 1342 bc_lex_next(&p->l); 1343 } 1344 1345 /** 1346 * Parse a function (header). 1347 * @param p The parser. 1348 */ 1349 static void 1350 bc_parse_func(BcParse* p) 1351 { 1352 bool comma = false, voidfn; 1353 uint16_t flags; 1354 size_t idx; 1355 1356 bc_lex_next(&p->l); 1357 1358 // Must have a name. 1359 if (BC_ERR(p->l.t != BC_LEX_NAME)) bc_parse_err(p, BC_ERR_PARSE_FUNC); 1360 1361 // If the name is "void", and POSIX is not on, mark as void. 1362 voidfn = (!BC_IS_POSIX && p->l.t == BC_LEX_NAME && 1363 !strcmp(p->l.str.v, "void")); 1364 1365 // We can safely do this because the expected token should not overwrite the 1366 // function name. 1367 bc_lex_next(&p->l); 1368 1369 // If we *don't* have another name, then void is the name of the function. 1370 voidfn = (voidfn && p->l.t == BC_LEX_NAME); 1371 1372 // With a void function, allow POSIX to complain and get a new token. 1373 if (voidfn) 1374 { 1375 bc_parse_err(p, BC_ERR_POSIX_VOID); 1376 1377 // We can safely do this because the expected token should not overwrite 1378 // the function name. 1379 bc_lex_next(&p->l); 1380 } 1381 1382 // Must have a left paren. 1383 if (BC_ERR(p->l.t != BC_LEX_LPAREN)) bc_parse_err(p, BC_ERR_PARSE_FUNC); 1384 1385 // Make sure the functions map and vector are synchronized. 1386 assert(p->prog->fns.len == p->prog->fn_map.len); 1387 1388 // Insert the function by name into the map and vector. 1389 idx = bc_program_insertFunc(p->prog, p->l.str.v); 1390 1391 // Make sure the insert worked. 1392 assert(idx); 1393 1394 // Update the function pointer and stuff in the parser and set its void. 1395 bc_parse_updateFunc(p, idx); 1396 p->func->voidfn = voidfn; 1397 1398 bc_lex_next(&p->l); 1399 1400 // While we do not have a right paren, we are still parsing arguments. 1401 while (p->l.t != BC_LEX_RPAREN) 1402 { 1403 BcType t = BC_TYPE_VAR; 1404 1405 // If we have an asterisk, we are parsing a reference argument. 1406 if (p->l.t == BC_LEX_OP_MULTIPLY) 1407 { 1408 t = BC_TYPE_REF; 1409 bc_lex_next(&p->l); 1410 1411 // Let POSIX complain if necessary. 1412 bc_parse_err(p, BC_ERR_POSIX_REF); 1413 } 1414 1415 // If we don't have a name, the argument will not have a name. Barf. 1416 if (BC_ERR(p->l.t != BC_LEX_NAME)) bc_parse_err(p, BC_ERR_PARSE_FUNC); 1417 1418 // Increment the number of parameters. 1419 p->func->nparams += 1; 1420 1421 // Copy the string in the lexer so that we can use the lexer again. 1422 bc_vec_string(&p->buf, p->l.str.len, p->l.str.v); 1423 1424 bc_lex_next(&p->l); 1425 1426 // We are parsing an array parameter if this is true. 1427 if (p->l.t == BC_LEX_LBRACKET) 1428 { 1429 // Set the array type, unless we are already parsing a reference. 1430 if (t == BC_TYPE_VAR) t = BC_TYPE_ARRAY; 1431 1432 bc_lex_next(&p->l); 1433 1434 // The brackets *must* be empty. 1435 if (BC_ERR(p->l.t != BC_LEX_RBRACKET)) 1436 { 1437 bc_parse_err(p, BC_ERR_PARSE_FUNC); 1438 } 1439 1440 bc_lex_next(&p->l); 1441 } 1442 // If we did *not* get a bracket, but we are expecting a reference, we 1443 // have a problem. 1444 else if (BC_ERR(t == BC_TYPE_REF)) 1445 { 1446 bc_parse_verr(p, BC_ERR_PARSE_REF_VAR, p->buf.v); 1447 } 1448 1449 // Test for comma and get the next token if it exists. 1450 comma = (p->l.t == BC_LEX_COMMA); 1451 if (comma) bc_lex_next(&p->l); 1452 1453 // Insert the parameter into the function. 1454 bc_func_insert(p->func, p->prog, p->buf.v, t, p->l.line); 1455 } 1456 1457 // If we have a comma, but no parameter, barf. 1458 if (BC_ERR(comma)) bc_parse_err(p, BC_ERR_PARSE_FUNC); 1459 1460 // Start the body. 1461 flags = BC_PARSE_FLAG_FUNC | BC_PARSE_FLAG_FUNC_INNER; 1462 bc_parse_startBody(p, flags); 1463 1464 bc_lex_next(&p->l); 1465 1466 // POSIX requires that a brace be on the same line as the function header. 1467 // If we don't have a brace, let POSIX throw an error. 1468 if (p->l.t != BC_LEX_LBRACE) bc_parse_err(p, BC_ERR_POSIX_BRACE); 1469 } 1470 1471 /** 1472 * Parse an auto list. 1473 * @param p The parser. 1474 */ 1475 static void 1476 bc_parse_auto(BcParse* p) 1477 { 1478 bool comma, one; 1479 1480 // Error if the auto keyword appeared in the wrong place. 1481 if (BC_ERR(!p->auto_part)) bc_parse_err(p, BC_ERR_PARSE_TOKEN); 1482 bc_lex_next(&p->l); 1483 1484 p->auto_part = comma = false; 1485 1486 // We need at least one variable or array. 1487 one = (p->l.t == BC_LEX_NAME); 1488 1489 // While we have a variable or array. 1490 while (p->l.t == BC_LEX_NAME) 1491 { 1492 BcType t; 1493 1494 // Copy the name from the lexer, so we can use it again. 1495 bc_vec_string(&p->buf, p->l.str.len - 1, p->l.str.v); 1496 1497 bc_lex_next(&p->l); 1498 1499 // If we are parsing an array... 1500 if (p->l.t == BC_LEX_LBRACKET) 1501 { 1502 t = BC_TYPE_ARRAY; 1503 1504 bc_lex_next(&p->l); 1505 1506 // The brackets *must* be empty. 1507 if (BC_ERR(p->l.t != BC_LEX_RBRACKET)) 1508 { 1509 bc_parse_err(p, BC_ERR_PARSE_FUNC); 1510 } 1511 1512 bc_lex_next(&p->l); 1513 } 1514 else t = BC_TYPE_VAR; 1515 1516 // Test for comma and get the next token if it exists. 1517 comma = (p->l.t == BC_LEX_COMMA); 1518 if (comma) bc_lex_next(&p->l); 1519 1520 // Insert the auto into the function. 1521 bc_func_insert(p->func, p->prog, p->buf.v, t, p->l.line); 1522 } 1523 1524 // If we have a comma, but no auto, barf. 1525 if (BC_ERR(comma)) bc_parse_err(p, BC_ERR_PARSE_FUNC); 1526 1527 // If we don't have any variables or arrays, barf. 1528 if (BC_ERR(!one)) bc_parse_err(p, BC_ERR_PARSE_NO_AUTO); 1529 1530 // The auto statement should be all that's in the statement. 1531 if (BC_ERR(!bc_parse_isDelimiter(p))) bc_parse_err(p, BC_ERR_PARSE_TOKEN); 1532 } 1533 1534 /** 1535 * Parses a body. 1536 * @param p The parser. 1537 * @param brace True if a brace was encountered, false otherwise. 1538 */ 1539 static void 1540 bc_parse_body(BcParse* p, bool brace) 1541 { 1542 uint16_t* flag_ptr = BC_PARSE_TOP_FLAG_PTR(p); 1543 1544 assert(flag_ptr != NULL); 1545 assert(p->flags.len >= 2); 1546 1547 // The body flag is for when we expect a body. We got a body, so clear the 1548 // flag. 1549 *flag_ptr &= ~(BC_PARSE_FLAG_BODY); 1550 1551 // If we are inside a function, that means we just barely entered it, and 1552 // we can expect an auto list. 1553 if (*flag_ptr & BC_PARSE_FLAG_FUNC_INNER) 1554 { 1555 // We *must* have a brace in this case. 1556 if (BC_ERR(!brace)) bc_parse_err(p, BC_ERR_PARSE_TOKEN); 1557 1558 p->auto_part = (p->l.t != BC_LEX_KW_AUTO); 1559 1560 if (!p->auto_part) 1561 { 1562 // Make sure this is true to not get a parse error. 1563 p->auto_part = true; 1564 1565 // Since we already have the auto keyword, parse. 1566 bc_parse_auto(p); 1567 } 1568 1569 // Eat a newline. 1570 if (p->l.t == BC_LEX_NLINE) bc_lex_next(&p->l); 1571 } 1572 else 1573 { 1574 // This is the easy part. 1575 size_t len = p->flags.len; 1576 1577 assert(*flag_ptr); 1578 1579 // Parse a statement. 1580 bc_parse_stmt(p); 1581 1582 // This is a very important condition to get right. If there is no 1583 // brace, and no body flag, and the flags len hasn't shrunk, then we 1584 // have a body that was not delimited by braces, so we need to end it 1585 // now, after just one statement. 1586 if (!brace && !BC_PARSE_BODY(p) && len <= p->flags.len) 1587 { 1588 bc_parse_endBody(p, false); 1589 } 1590 } 1591 } 1592 1593 /** 1594 * Parses a statement. This is the entry point for just about everything, except 1595 * function definitions. 1596 * @param p The parser. 1597 */ 1598 static void 1599 bc_parse_stmt(BcParse* p) 1600 { 1601 size_t len; 1602 uint16_t flags; 1603 BcLexType type = p->l.t; 1604 1605 // Eat newline. 1606 if (type == BC_LEX_NLINE) 1607 { 1608 bc_lex_next(&p->l); 1609 return; 1610 } 1611 1612 // Eat auto list. 1613 if (type == BC_LEX_KW_AUTO) 1614 { 1615 bc_parse_auto(p); 1616 return; 1617 } 1618 1619 // If we reach this point, no auto list is allowed. 1620 p->auto_part = false; 1621 1622 // Everything but an else needs to be taken care of here, but else is 1623 // special. 1624 if (type != BC_LEX_KW_ELSE) 1625 { 1626 // After an if, no else found. 1627 if (BC_PARSE_IF_END(p)) 1628 { 1629 // Clear the expectation for else, end body, and return. Returning 1630 // gives us a clean slate for parsing again. 1631 bc_parse_noElse(p); 1632 if (p->flags.len > 1 && !BC_PARSE_BRACE(p)) 1633 { 1634 bc_parse_endBody(p, false); 1635 } 1636 1637 return; 1638 } 1639 // With a left brace, we are parsing a body. 1640 else if (type == BC_LEX_LBRACE) 1641 { 1642 // We need to start a body if we are not expecting one yet. 1643 if (!BC_PARSE_BODY(p)) 1644 { 1645 bc_parse_startBody(p, BC_PARSE_FLAG_BRACE); 1646 bc_lex_next(&p->l); 1647 } 1648 // If we *are* expecting a body, that body should get a brace. This 1649 // takes care of braces being on a different line than if and loop 1650 // headers. 1651 else 1652 { 1653 *(BC_PARSE_TOP_FLAG_PTR(p)) |= BC_PARSE_FLAG_BRACE; 1654 bc_lex_next(&p->l); 1655 bc_parse_body(p, true); 1656 } 1657 1658 // If we have reached this point, we need to return for a clean 1659 // slate. 1660 return; 1661 } 1662 // This happens when we are expecting a body and get a single statement, 1663 // i.e., a body with no braces surrounding it. Returns after for a clean 1664 // slate. 1665 else if (BC_PARSE_BODY(p) && !BC_PARSE_BRACE(p)) 1666 { 1667 bc_parse_body(p, false); 1668 return; 1669 } 1670 } 1671 1672 len = p->flags.len; 1673 flags = BC_PARSE_TOP_FLAG(p); 1674 1675 switch (type) 1676 { 1677 // All of these are valid for expressions. 1678 case BC_LEX_OP_INC: 1679 case BC_LEX_OP_DEC: 1680 case BC_LEX_OP_MINUS: 1681 case BC_LEX_OP_BOOL_NOT: 1682 case BC_LEX_LPAREN: 1683 case BC_LEX_NAME: 1684 case BC_LEX_NUMBER: 1685 case BC_LEX_KW_IBASE: 1686 case BC_LEX_KW_LAST: 1687 case BC_LEX_KW_LENGTH: 1688 case BC_LEX_KW_OBASE: 1689 case BC_LEX_KW_SCALE: 1690 #if BC_ENABLE_EXTRA_MATH 1691 case BC_LEX_KW_SEED: 1692 #endif // BC_ENABLE_EXTRA_MATH 1693 case BC_LEX_KW_SQRT: 1694 case BC_LEX_KW_ABS: 1695 #if BC_ENABLE_EXTRA_MATH 1696 case BC_LEX_KW_IRAND: 1697 #endif // BC_ENABLE_EXTRA_MATH 1698 case BC_LEX_KW_ASCIIFY: 1699 case BC_LEX_KW_MODEXP: 1700 case BC_LEX_KW_DIVMOD: 1701 case BC_LEX_KW_READ: 1702 #if BC_ENABLE_EXTRA_MATH 1703 case BC_LEX_KW_RAND: 1704 #endif // BC_ENABLE_EXTRA_MATH 1705 case BC_LEX_KW_MAXIBASE: 1706 case BC_LEX_KW_MAXOBASE: 1707 case BC_LEX_KW_MAXSCALE: 1708 #if BC_ENABLE_EXTRA_MATH 1709 case BC_LEX_KW_MAXRAND: 1710 #endif // BC_ENABLE_EXTRA_MATH 1711 case BC_LEX_KW_LINE_LENGTH: 1712 case BC_LEX_KW_GLOBAL_STACKS: 1713 case BC_LEX_KW_LEADING_ZERO: 1714 { 1715 bc_parse_expr_status(p, BC_PARSE_PRINT, bc_parse_next_expr); 1716 break; 1717 } 1718 1719 case BC_LEX_KW_ELSE: 1720 { 1721 bc_parse_else(p); 1722 break; 1723 } 1724 1725 // Just eat. 1726 case BC_LEX_SCOLON: 1727 { 1728 // Do nothing. 1729 break; 1730 } 1731 1732 case BC_LEX_RBRACE: 1733 { 1734 bc_parse_endBody(p, true); 1735 break; 1736 } 1737 1738 case BC_LEX_STR: 1739 { 1740 bc_parse_str(p, BC_INST_PRINT_STR); 1741 break; 1742 } 1743 1744 case BC_LEX_KW_BREAK: 1745 case BC_LEX_KW_CONTINUE: 1746 { 1747 bc_parse_loopExit(p, p->l.t); 1748 break; 1749 } 1750 1751 case BC_LEX_KW_FOR: 1752 { 1753 bc_parse_for(p); 1754 break; 1755 } 1756 1757 case BC_LEX_KW_HALT: 1758 { 1759 bc_parse_push(p, BC_INST_HALT); 1760 bc_lex_next(&p->l); 1761 break; 1762 } 1763 1764 case BC_LEX_KW_IF: 1765 { 1766 bc_parse_if(p); 1767 break; 1768 } 1769 1770 case BC_LEX_KW_LIMITS: 1771 { 1772 // `limits` is a compile-time command, so execute it right away. 1773 bc_vm_printf("BC_LONG_BIT = %lu\n", (ulong) BC_LONG_BIT); 1774 bc_vm_printf("BC_BASE_DIGS = %lu\n", (ulong) BC_BASE_DIGS); 1775 bc_vm_printf("BC_BASE_POW = %lu\n", (ulong) BC_BASE_POW); 1776 bc_vm_printf("BC_OVERFLOW_MAX = %lu\n", (ulong) BC_NUM_BIGDIG_MAX); 1777 bc_vm_printf("\n"); 1778 bc_vm_printf("BC_BASE_MAX = %lu\n", BC_MAX_OBASE); 1779 bc_vm_printf("BC_DIM_MAX = %lu\n", BC_MAX_DIM); 1780 bc_vm_printf("BC_SCALE_MAX = %lu\n", BC_MAX_SCALE); 1781 bc_vm_printf("BC_STRING_MAX = %lu\n", BC_MAX_STRING); 1782 bc_vm_printf("BC_NAME_MAX = %lu\n", BC_MAX_NAME); 1783 bc_vm_printf("BC_NUM_MAX = %lu\n", BC_MAX_NUM); 1784 #if BC_ENABLE_EXTRA_MATH 1785 bc_vm_printf("BC_RAND_MAX = %lu\n", BC_MAX_RAND); 1786 #endif // BC_ENABLE_EXTRA_MATH 1787 bc_vm_printf("MAX Exponent = %lu\n", BC_MAX_EXP); 1788 bc_vm_printf("Number of vars = %lu\n", BC_MAX_VARS); 1789 1790 bc_lex_next(&p->l); 1791 1792 break; 1793 } 1794 1795 case BC_LEX_KW_STREAM: 1796 case BC_LEX_KW_PRINT: 1797 { 1798 bc_parse_print(p, type); 1799 break; 1800 } 1801 1802 case BC_LEX_KW_QUIT: 1803 { 1804 // Quit is a compile-time command. We don't exit directly, so the vm 1805 // can clean up. 1806 vm.status = BC_STATUS_QUIT; 1807 BC_JMP; 1808 break; 1809 } 1810 1811 case BC_LEX_KW_RETURN: 1812 { 1813 bc_parse_return(p); 1814 break; 1815 } 1816 1817 case BC_LEX_KW_WHILE: 1818 { 1819 bc_parse_while(p); 1820 break; 1821 } 1822 1823 default: 1824 { 1825 bc_parse_err(p, BC_ERR_PARSE_TOKEN); 1826 } 1827 } 1828 1829 // If the flags did not change, we expect a delimiter. 1830 if (len == p->flags.len && flags == BC_PARSE_TOP_FLAG(p)) 1831 { 1832 if (BC_ERR(!bc_parse_isDelimiter(p))) 1833 { 1834 bc_parse_err(p, BC_ERR_PARSE_TOKEN); 1835 } 1836 } 1837 1838 // Make sure semicolons are eaten. 1839 while (p->l.t == BC_LEX_SCOLON) 1840 { 1841 bc_lex_next(&p->l); 1842 } 1843 1844 // POSIX's grammar does not allow a function definition after a semicolon 1845 // without a newline, so check specifically for that case and error if 1846 // the POSIX standard flag is set. 1847 if (p->l.last == BC_LEX_SCOLON && p->l.t == BC_LEX_KW_DEFINE && BC_IS_POSIX) 1848 { 1849 bc_parse_err(p, BC_ERR_POSIX_FUNC_AFTER_SEMICOLON); 1850 } 1851 } 1852 1853 void 1854 bc_parse_parse(BcParse* p) 1855 { 1856 assert(p); 1857 1858 BC_SETJMP_LOCKED(exit); 1859 1860 // We should not let an EOF get here unless some partial parse was not 1861 // completed, in which case, it's the user's fault. 1862 if (BC_ERR(p->l.t == BC_LEX_EOF)) bc_parse_err(p, BC_ERR_PARSE_EOF); 1863 1864 // Functions need special parsing. 1865 else if (p->l.t == BC_LEX_KW_DEFINE) 1866 { 1867 if (BC_ERR(BC_PARSE_NO_EXEC(p))) 1868 { 1869 bc_parse_endif(p); 1870 if (BC_ERR(BC_PARSE_NO_EXEC(p))) 1871 { 1872 bc_parse_err(p, BC_ERR_PARSE_TOKEN); 1873 } 1874 } 1875 bc_parse_func(p); 1876 } 1877 1878 // Otherwise, parse a normal statement. 1879 else bc_parse_stmt(p); 1880 1881 exit: 1882 1883 // We need to reset on error. 1884 if (BC_ERR(((vm.status && vm.status != BC_STATUS_QUIT) || vm.sig))) 1885 { 1886 bc_parse_reset(p); 1887 } 1888 1889 BC_LONGJMP_CONT; 1890 BC_SIG_MAYLOCK; 1891 } 1892 1893 /** 1894 * Parse an expression. This is the actual implementation of the Shunting-Yard 1895 * Algorithm. 1896 * @param p The parser. 1897 * @param flags The flags for what is valid in the expression. 1898 * @param next A set of tokens for what is valid *after* the expression. 1899 * @return A parse status. In some places, an empty expression is an 1900 * error, and sometimes, it is required. This allows this function 1901 * to tell the caller if the expression was empty and let the 1902 * caller handle it. 1903 */ 1904 static BcParseStatus 1905 bc_parse_expr_err(BcParse* p, uint8_t flags, BcParseNext next) 1906 { 1907 BcInst prev = BC_INST_PRINT; 1908 uchar inst = BC_INST_INVALID; 1909 BcLexType top, t; 1910 size_t nexprs, ops_bgn; 1911 uint32_t i, nparens, nrelops; 1912 bool pfirst, rprn, done, get_token, assign, bin_last, incdec, can_assign; 1913 1914 // One of these *must* be true. 1915 assert(!(flags & BC_PARSE_PRINT) || !(flags & BC_PARSE_NEEDVAL)); 1916 1917 // These are set very carefully. In fact, controlling the values of these 1918 // locals is the biggest part of making this work. ops_bgn especially is 1919 // important because it marks where the operator stack begins for *this* 1920 // invocation of this function. That's because bc_parse_expr_err() is 1921 // recursive (the Shunting-Yard Algorithm is most easily expressed 1922 // recursively when parsing subexpressions), and each invocation needs to 1923 // know where to stop. 1924 // 1925 // - nparens is the number of left parens without matches. 1926 // - nrelops is the number of relational operators that appear in the expr. 1927 // - nexprs is the number of unused expressions. 1928 // - rprn is a right paren encountered last. 1929 // - done means the expression has been fully parsed. 1930 // - get_token is true when a token is needed at the end of an iteration. 1931 // - assign is true when an assignment statement was parsed last. 1932 // - incdec is true when the previous operator was an inc or dec operator. 1933 // - can_assign is true when an assignemnt is valid. 1934 // - bin_last is true when the previous instruction was a binary operator. 1935 t = p->l.t; 1936 pfirst = (p->l.t == BC_LEX_LPAREN); 1937 nparens = nrelops = 0; 1938 nexprs = 0; 1939 ops_bgn = p->ops.len; 1940 rprn = done = get_token = assign = incdec = can_assign = false; 1941 bin_last = true; 1942 1943 // We want to eat newlines if newlines are not a valid ending token. 1944 // This is for spacing in things like for loop headers. 1945 if (!(flags & BC_PARSE_NOREAD)) 1946 { 1947 while ((t = p->l.t) == BC_LEX_NLINE) 1948 { 1949 bc_lex_next(&p->l); 1950 } 1951 } 1952 1953 // This is the Shunting-Yard algorithm loop. 1954 for (; !done && BC_PARSE_EXPR(t); t = p->l.t) 1955 { 1956 switch (t) 1957 { 1958 case BC_LEX_OP_INC: 1959 case BC_LEX_OP_DEC: 1960 { 1961 // These operators can only be used with items that can be 1962 // assigned to. 1963 if (BC_ERR(incdec)) bc_parse_err(p, BC_ERR_PARSE_ASSIGN); 1964 1965 bc_parse_incdec(p, &prev, &can_assign, &nexprs, flags); 1966 1967 rprn = get_token = bin_last = false; 1968 incdec = true; 1969 flags &= ~(BC_PARSE_ARRAY); 1970 1971 break; 1972 } 1973 1974 #if BC_ENABLE_EXTRA_MATH 1975 case BC_LEX_OP_TRUNC: 1976 { 1977 // The previous token must have been a leaf expression, or the 1978 // operator is in the wrong place. 1979 if (BC_ERR(!BC_PARSE_LEAF(prev, bin_last, rprn))) 1980 { 1981 bc_parse_err(p, BC_ERR_PARSE_TOKEN); 1982 } 1983 1984 // I can just add the instruction because 1985 // negative will already be taken care of. 1986 bc_parse_push(p, BC_INST_TRUNC); 1987 1988 rprn = can_assign = incdec = false; 1989 get_token = true; 1990 flags &= ~(BC_PARSE_ARRAY); 1991 1992 break; 1993 } 1994 #endif // BC_ENABLE_EXTRA_MATH 1995 1996 case BC_LEX_OP_MINUS: 1997 { 1998 bc_parse_minus(p, &prev, ops_bgn, rprn, bin_last, &nexprs); 1999 2000 rprn = get_token = can_assign = false; 2001 2002 // This is true if it was a binary operator last. 2003 bin_last = (prev == BC_INST_MINUS); 2004 if (bin_last) incdec = false; 2005 2006 flags &= ~(BC_PARSE_ARRAY); 2007 2008 break; 2009 } 2010 2011 // All of this group, including the fallthrough, is to parse binary 2012 // operators. 2013 case BC_LEX_OP_ASSIGN_POWER: 2014 case BC_LEX_OP_ASSIGN_MULTIPLY: 2015 case BC_LEX_OP_ASSIGN_DIVIDE: 2016 case BC_LEX_OP_ASSIGN_MODULUS: 2017 case BC_LEX_OP_ASSIGN_PLUS: 2018 case BC_LEX_OP_ASSIGN_MINUS: 2019 #if BC_ENABLE_EXTRA_MATH 2020 case BC_LEX_OP_ASSIGN_PLACES: 2021 case BC_LEX_OP_ASSIGN_LSHIFT: 2022 case BC_LEX_OP_ASSIGN_RSHIFT: 2023 #endif // BC_ENABLE_EXTRA_MATH 2024 case BC_LEX_OP_ASSIGN: 2025 { 2026 // We need to make sure the assignment is valid. 2027 if (!BC_PARSE_INST_VAR(prev)) 2028 { 2029 bc_parse_err(p, BC_ERR_PARSE_ASSIGN); 2030 } 2031 2032 // Fallthrough. 2033 BC_FALLTHROUGH 2034 } 2035 2036 case BC_LEX_OP_POWER: 2037 case BC_LEX_OP_MULTIPLY: 2038 case BC_LEX_OP_DIVIDE: 2039 case BC_LEX_OP_MODULUS: 2040 case BC_LEX_OP_PLUS: 2041 #if BC_ENABLE_EXTRA_MATH 2042 case BC_LEX_OP_PLACES: 2043 case BC_LEX_OP_LSHIFT: 2044 case BC_LEX_OP_RSHIFT: 2045 #endif // BC_ENABLE_EXTRA_MATH 2046 case BC_LEX_OP_REL_EQ: 2047 case BC_LEX_OP_REL_LE: 2048 case BC_LEX_OP_REL_GE: 2049 case BC_LEX_OP_REL_NE: 2050 case BC_LEX_OP_REL_LT: 2051 case BC_LEX_OP_REL_GT: 2052 case BC_LEX_OP_BOOL_NOT: 2053 case BC_LEX_OP_BOOL_OR: 2054 case BC_LEX_OP_BOOL_AND: 2055 { 2056 // This is true if the operator if the token is a prefix 2057 // operator. This is only for boolean not. 2058 if (BC_PARSE_OP_PREFIX(t)) 2059 { 2060 // Prefix operators are only allowed after binary operators 2061 // or prefix operators. 2062 if (BC_ERR(!bin_last && !BC_PARSE_OP_PREFIX(p->l.last))) 2063 { 2064 bc_parse_err(p, BC_ERR_PARSE_EXPR); 2065 } 2066 } 2067 // If we execute the else, that means we have a binary operator. 2068 // If the previous operator was a prefix or a binary operator, 2069 // then a binary operator is not allowed. 2070 else if (BC_ERR(BC_PARSE_PREV_PREFIX(prev) || bin_last)) 2071 { 2072 bc_parse_err(p, BC_ERR_PARSE_EXPR); 2073 } 2074 2075 nrelops += (t >= BC_LEX_OP_REL_EQ && t <= BC_LEX_OP_REL_GT); 2076 prev = BC_PARSE_TOKEN_INST(t); 2077 2078 bc_parse_operator(p, t, ops_bgn, &nexprs); 2079 2080 rprn = incdec = can_assign = false; 2081 get_token = true; 2082 bin_last = !BC_PARSE_OP_PREFIX(t); 2083 flags &= ~(BC_PARSE_ARRAY); 2084 2085 break; 2086 } 2087 2088 case BC_LEX_LPAREN: 2089 { 2090 // A left paren is *not* allowed right after a leaf expr. 2091 if (BC_ERR(BC_PARSE_LEAF(prev, bin_last, rprn))) 2092 { 2093 bc_parse_err(p, BC_ERR_PARSE_EXPR); 2094 } 2095 2096 nparens += 1; 2097 rprn = incdec = can_assign = false; 2098 get_token = true; 2099 2100 // Push the paren onto the operator stack. 2101 bc_vec_push(&p->ops, &t); 2102 2103 break; 2104 } 2105 2106 case BC_LEX_RPAREN: 2107 { 2108 // This needs to be a status. The error is handled in 2109 // bc_parse_expr_status(). 2110 if (BC_ERR(p->l.last == BC_LEX_LPAREN)) 2111 { 2112 return BC_PARSE_STATUS_EMPTY_EXPR; 2113 } 2114 2115 // The right paren must not come after a prefix or binary 2116 // operator. 2117 if (BC_ERR(bin_last || BC_PARSE_PREV_PREFIX(prev))) 2118 { 2119 bc_parse_err(p, BC_ERR_PARSE_EXPR); 2120 } 2121 2122 // If there are no parens left, we are done, but we need another 2123 // token. 2124 if (!nparens) 2125 { 2126 done = true; 2127 get_token = false; 2128 break; 2129 } 2130 2131 nparens -= 1; 2132 rprn = true; 2133 get_token = bin_last = incdec = false; 2134 2135 bc_parse_rightParen(p, &nexprs); 2136 2137 break; 2138 } 2139 2140 case BC_LEX_STR: 2141 { 2142 // POSIX only allows strings alone. 2143 if (BC_IS_POSIX) bc_parse_err(p, BC_ERR_POSIX_EXPR_STRING); 2144 2145 // A string is a leaf and cannot come right after a leaf. 2146 if (BC_ERR(BC_PARSE_LEAF(prev, bin_last, rprn))) 2147 { 2148 bc_parse_err(p, BC_ERR_PARSE_EXPR); 2149 } 2150 2151 bc_parse_addString(p); 2152 2153 get_token = true; 2154 bin_last = rprn = false; 2155 nexprs += 1; 2156 2157 break; 2158 } 2159 2160 case BC_LEX_NAME: 2161 { 2162 // A name is a leaf and cannot come right after a leaf. 2163 if (BC_ERR(BC_PARSE_LEAF(prev, bin_last, rprn))) 2164 { 2165 bc_parse_err(p, BC_ERR_PARSE_EXPR); 2166 } 2167 2168 get_token = bin_last = false; 2169 2170 bc_parse_name(p, &prev, &can_assign, flags & ~BC_PARSE_NOCALL); 2171 2172 rprn = (prev == BC_INST_CALL); 2173 nexprs += 1; 2174 flags &= ~(BC_PARSE_ARRAY); 2175 2176 break; 2177 } 2178 2179 case BC_LEX_NUMBER: 2180 { 2181 // A number is a leaf and cannot come right after a leaf. 2182 if (BC_ERR(BC_PARSE_LEAF(prev, bin_last, rprn))) 2183 { 2184 bc_parse_err(p, BC_ERR_PARSE_EXPR); 2185 } 2186 2187 // The number instruction is pushed in here. 2188 bc_parse_number(p); 2189 2190 nexprs += 1; 2191 prev = BC_INST_NUM; 2192 get_token = true; 2193 rprn = bin_last = can_assign = false; 2194 flags &= ~(BC_PARSE_ARRAY); 2195 2196 break; 2197 } 2198 2199 case BC_LEX_KW_IBASE: 2200 case BC_LEX_KW_LAST: 2201 case BC_LEX_KW_OBASE: 2202 #if BC_ENABLE_EXTRA_MATH 2203 case BC_LEX_KW_SEED: 2204 #endif // BC_ENABLE_EXTRA_MATH 2205 { 2206 // All of these are leaves and cannot come right after a leaf. 2207 if (BC_ERR(BC_PARSE_LEAF(prev, bin_last, rprn))) 2208 { 2209 bc_parse_err(p, BC_ERR_PARSE_EXPR); 2210 } 2211 2212 prev = t - BC_LEX_KW_LAST + BC_INST_LAST; 2213 bc_parse_push(p, prev); 2214 2215 get_token = can_assign = true; 2216 rprn = bin_last = false; 2217 nexprs += 1; 2218 flags &= ~(BC_PARSE_ARRAY); 2219 2220 break; 2221 } 2222 2223 case BC_LEX_KW_LENGTH: 2224 case BC_LEX_KW_SQRT: 2225 case BC_LEX_KW_ABS: 2226 #if BC_ENABLE_EXTRA_MATH 2227 case BC_LEX_KW_IRAND: 2228 #endif // BC_ENABLE_EXTRA_MATH 2229 case BC_LEX_KW_ASCIIFY: 2230 { 2231 // All of these are leaves and cannot come right after a leaf. 2232 if (BC_ERR(BC_PARSE_LEAF(prev, bin_last, rprn))) 2233 { 2234 bc_parse_err(p, BC_ERR_PARSE_EXPR); 2235 } 2236 2237 bc_parse_builtin(p, t, flags, &prev); 2238 2239 rprn = get_token = bin_last = incdec = can_assign = false; 2240 nexprs += 1; 2241 flags &= ~(BC_PARSE_ARRAY); 2242 2243 break; 2244 } 2245 2246 case BC_LEX_KW_READ: 2247 #if BC_ENABLE_EXTRA_MATH 2248 case BC_LEX_KW_RAND: 2249 #endif // BC_ENABLE_EXTRA_MATH 2250 case BC_LEX_KW_MAXIBASE: 2251 case BC_LEX_KW_MAXOBASE: 2252 case BC_LEX_KW_MAXSCALE: 2253 #if BC_ENABLE_EXTRA_MATH 2254 case BC_LEX_KW_MAXRAND: 2255 #endif // BC_ENABLE_EXTRA_MATH 2256 case BC_LEX_KW_LINE_LENGTH: 2257 case BC_LEX_KW_GLOBAL_STACKS: 2258 case BC_LEX_KW_LEADING_ZERO: 2259 { 2260 // All of these are leaves and cannot come right after a leaf. 2261 if (BC_ERR(BC_PARSE_LEAF(prev, bin_last, rprn))) 2262 { 2263 bc_parse_err(p, BC_ERR_PARSE_EXPR); 2264 } 2265 2266 // Error if we have read and it's not allowed. 2267 else if (t == BC_LEX_KW_READ && BC_ERR(flags & BC_PARSE_NOREAD)) 2268 { 2269 bc_parse_err(p, BC_ERR_EXEC_REC_READ); 2270 } 2271 2272 prev = t - BC_LEX_KW_READ + BC_INST_READ; 2273 bc_parse_noArgBuiltin(p, prev); 2274 2275 rprn = get_token = bin_last = incdec = can_assign = false; 2276 nexprs += 1; 2277 flags &= ~(BC_PARSE_ARRAY); 2278 2279 break; 2280 } 2281 2282 case BC_LEX_KW_SCALE: 2283 { 2284 // This is a leaf and cannot come right after a leaf. 2285 if (BC_ERR(BC_PARSE_LEAF(prev, bin_last, rprn))) 2286 { 2287 bc_parse_err(p, BC_ERR_PARSE_EXPR); 2288 } 2289 2290 // Scale needs special work because it can be a variable *or* a 2291 // function. 2292 bc_parse_scale(p, &prev, &can_assign, flags); 2293 2294 rprn = get_token = bin_last = false; 2295 nexprs += 1; 2296 flags &= ~(BC_PARSE_ARRAY); 2297 2298 break; 2299 } 2300 2301 case BC_LEX_KW_MODEXP: 2302 case BC_LEX_KW_DIVMOD: 2303 { 2304 // This is a leaf and cannot come right after a leaf. 2305 if (BC_ERR(BC_PARSE_LEAF(prev, bin_last, rprn))) 2306 { 2307 bc_parse_err(p, BC_ERR_PARSE_EXPR); 2308 } 2309 2310 bc_parse_builtin3(p, t, flags, &prev); 2311 2312 rprn = get_token = bin_last = incdec = can_assign = false; 2313 nexprs += 1; 2314 flags &= ~(BC_PARSE_ARRAY); 2315 2316 break; 2317 } 2318 2319 default: 2320 { 2321 #ifndef NDEBUG 2322 // We should never get here, even in debug builds. 2323 bc_parse_err(p, BC_ERR_PARSE_TOKEN); 2324 break; 2325 #endif // NDEBUG 2326 } 2327 } 2328 2329 if (get_token) bc_lex_next(&p->l); 2330 } 2331 2332 // Now that we have parsed the expression, we need to empty the operator 2333 // stack. 2334 while (p->ops.len > ops_bgn) 2335 { 2336 top = BC_PARSE_TOP_OP(p); 2337 assign = top >= BC_LEX_OP_ASSIGN_POWER && top <= BC_LEX_OP_ASSIGN; 2338 2339 // There should not be *any* parens on the stack anymore. 2340 if (BC_ERR(top == BC_LEX_LPAREN || top == BC_LEX_RPAREN)) 2341 { 2342 bc_parse_err(p, BC_ERR_PARSE_EXPR); 2343 } 2344 2345 bc_parse_push(p, BC_PARSE_TOKEN_INST(top)); 2346 2347 // Adjust the number of unused expressions. 2348 nexprs -= !BC_PARSE_OP_PREFIX(top); 2349 bc_vec_pop(&p->ops); 2350 2351 incdec = false; 2352 } 2353 2354 // There must be only one expression at the top. 2355 if (BC_ERR(nexprs != 1)) bc_parse_err(p, BC_ERR_PARSE_EXPR); 2356 2357 // Check that the next token is correct. 2358 for (i = 0; i < next.len && t != next.tokens[i]; ++i) 2359 { 2360 continue; 2361 } 2362 if (BC_ERR(i == next.len && !bc_parse_isDelimiter(p))) 2363 { 2364 bc_parse_err(p, BC_ERR_PARSE_EXPR); 2365 } 2366 2367 // Check that POSIX would be happy with the number of relational operators. 2368 if (!(flags & BC_PARSE_REL) && nrelops) 2369 { 2370 bc_parse_err(p, BC_ERR_POSIX_REL_POS); 2371 } 2372 else if ((flags & BC_PARSE_REL) && nrelops > 1) 2373 { 2374 bc_parse_err(p, BC_ERR_POSIX_MULTIREL); 2375 } 2376 2377 // If this is true, then we might be in a situation where we don't print. 2378 // We would want to have the increment/decrement operator not make an extra 2379 // copy if it's not necessary. 2380 if (!(flags & BC_PARSE_NEEDVAL) && !pfirst) 2381 { 2382 // We have the easy case if the last operator was an assignment 2383 // operator. 2384 if (assign) 2385 { 2386 inst = *((uchar*) bc_vec_top(&p->func->code)); 2387 inst += (BC_INST_ASSIGN_POWER_NO_VAL - BC_INST_ASSIGN_POWER); 2388 incdec = false; 2389 } 2390 // If we have an inc/dec operator and we are *not* printing, implement 2391 // the optimization to get rid of the extra copy. 2392 else if (incdec && !(flags & BC_PARSE_PRINT)) 2393 { 2394 inst = *((uchar*) bc_vec_top(&p->func->code)); 2395 incdec = (inst <= BC_INST_DEC); 2396 inst = BC_INST_ASSIGN_PLUS_NO_VAL + 2397 (inst != BC_INST_INC && inst != BC_INST_ASSIGN_PLUS); 2398 } 2399 2400 // This condition allows us to change the previous assignment 2401 // instruction (which does a copy) for a NO_VAL version, which does not. 2402 // This condition is set if either of the above if statements ends up 2403 // being true. 2404 if (inst >= BC_INST_ASSIGN_POWER_NO_VAL && 2405 inst <= BC_INST_ASSIGN_NO_VAL) 2406 { 2407 // Pop the previous assignment instruction and push a new one. 2408 // Inc/dec needs the extra instruction because it is now a binary 2409 // operator and needs a second operand. 2410 bc_vec_pop(&p->func->code); 2411 if (incdec) bc_parse_push(p, BC_INST_ONE); 2412 bc_parse_push(p, inst); 2413 } 2414 } 2415 2416 // If we might have to print... 2417 if ((flags & BC_PARSE_PRINT)) 2418 { 2419 // With a paren first or the last operator not being an assignment, we 2420 // *do* want to print. 2421 if (pfirst || !assign) bc_parse_push(p, BC_INST_PRINT); 2422 } 2423 // We need to make sure to push a pop instruction for assignment statements 2424 // that will not print. The print will pop, but without it, we need to pop. 2425 else if (!(flags & BC_PARSE_NEEDVAL) && 2426 (inst < BC_INST_ASSIGN_POWER_NO_VAL || 2427 inst > BC_INST_ASSIGN_NO_VAL)) 2428 { 2429 bc_parse_push(p, BC_INST_POP); 2430 } 2431 2432 // We want to eat newlines if newlines are not a valid ending token. 2433 // This is for spacing in things like for loop headers. 2434 // 2435 // Yes, this is one case where I reuse a variable for a different purpose; 2436 // in this case, incdec being true now means that newlines are not valid. 2437 for (incdec = true, i = 0; i < next.len && incdec; ++i) 2438 { 2439 incdec = (next.tokens[i] != BC_LEX_NLINE); 2440 } 2441 if (incdec) 2442 { 2443 while (p->l.t == BC_LEX_NLINE) 2444 { 2445 bc_lex_next(&p->l); 2446 } 2447 } 2448 2449 return BC_PARSE_STATUS_SUCCESS; 2450 } 2451 2452 /** 2453 * Parses an expression with bc_parse_expr_err(), but throws an error if it gets 2454 * an empty expression. 2455 * @param p The parser. 2456 * @param flags The flags for what is valid in the expression. 2457 * @param next A set of tokens for what is valid *after* the expression. 2458 */ 2459 static void 2460 bc_parse_expr_status(BcParse* p, uint8_t flags, BcParseNext next) 2461 { 2462 BcParseStatus s = bc_parse_expr_err(p, flags, next); 2463 2464 if (BC_ERR(s == BC_PARSE_STATUS_EMPTY_EXPR)) 2465 { 2466 bc_parse_err(p, BC_ERR_PARSE_EMPTY_EXPR); 2467 } 2468 } 2469 2470 void 2471 bc_parse_expr(BcParse* p, uint8_t flags) 2472 { 2473 assert(p); 2474 bc_parse_expr_status(p, flags, bc_parse_next_read); 2475 } 2476 #endif // BC_ENABLED 2477