1 /* 2 * ***************************************************************************** 3 * 4 * SPDX-License-Identifier: BSD-2-Clause 5 * 6 * Copyright (c) 2018-2021 Gavin D. Howard and contributors. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions are met: 10 * 11 * * Redistributions of source code must retain the above copyright notice, this 12 * list of conditions and the following disclaimer. 13 * 14 * * Redistributions in binary form must reproduce the above copyright notice, 15 * this list of conditions and the following disclaimer in the documentation 16 * and/or other materials provided with the distribution. 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 19 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 21 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE 22 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 23 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 24 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 25 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 26 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 27 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 28 * POSSIBILITY OF SUCH DAMAGE. 29 * 30 * ***************************************************************************** 31 * 32 * The parser for bc. 33 * 34 */ 35 36 #if BC_ENABLED 37 38 #include <assert.h> 39 #include <stdbool.h> 40 #include <stdlib.h> 41 #include <string.h> 42 43 #include <setjmp.h> 44 45 #include <bc.h> 46 #include <num.h> 47 #include <vm.h> 48 49 // Before you embark on trying to understand this code, have you read the 50 // Development manual (manuals/development.md) and the comment in include/bc.h 51 // yet? No? Do that first. I'm serious. 52 // 53 // The reason is because this file holds the most sensitive and finicky code in 54 // the entire codebase. Even getting history to work on Windows was nothing 55 // compared to this. This is where dreams go to die, where dragons live, and 56 // from which Ken Thompson himself would flee. 57 58 static void bc_parse_else(BcParse *p); 59 static void bc_parse_stmt(BcParse *p); 60 static BcParseStatus bc_parse_expr_err(BcParse *p, uint8_t flags, 61 BcParseNext next); 62 static void bc_parse_expr_status(BcParse *p, uint8_t flags, BcParseNext next); 63 64 /** 65 * Returns true if an instruction could only have come from a "leaf" expression. 66 * For more on what leaf expressions are, read the comment for BC_PARSE_LEAF(). 67 * @param t The instruction to test. 68 */ 69 static bool bc_parse_inst_isLeaf(BcInst t) { 70 return (t >= BC_INST_NUM && t <= BC_INST_MAXSCALE) || 71 #if BC_ENABLE_EXTRA_MATH 72 t == BC_INST_TRUNC || 73 #endif // BC_ENABLE_EXTRA_MATH 74 t <= BC_INST_DEC; 75 } 76 77 /** 78 * Returns true if the *previous* token was a delimiter. A delimiter is anything 79 * that can legally end a statement. In bc's case, it could be a newline, a 80 * semicolon, and a brace in certain cases. 81 * @param p The parser. 82 * @return True if the token is a legal delimiter. 83 */ 84 static bool bc_parse_isDelimiter(const BcParse *p) { 85 86 BcLexType t = p->l.t; 87 bool good; 88 89 // If it's an obvious delimiter, say so. 90 if (BC_PARSE_DELIMITER(t)) return true; 91 92 good = false; 93 94 // If the current token is a keyword, then...beware. That means that we need 95 // to check for a "dangling" else, where there was no brace-delimited block 96 // on the previous if. 97 if (t == BC_LEX_KW_ELSE) { 98 99 size_t i; 100 uint16_t *fptr = NULL, flags = BC_PARSE_FLAG_ELSE; 101 102 // As long as going up the stack is valid for a dangling else, keep on. 103 for (i = 0; i < p->flags.len && BC_PARSE_BLOCK_STMT(flags); ++i) { 104 105 fptr = bc_vec_item_rev(&p->flags, i); 106 flags = *fptr; 107 108 // If we need a brace and don't have one, then we don't have a 109 // delimiter. 110 if ((flags & BC_PARSE_FLAG_BRACE) && p->l.last != BC_LEX_RBRACE) 111 return false; 112 } 113 114 // Oh, and we had also better have an if statement somewhere. 115 good = ((flags & BC_PARSE_FLAG_IF) != 0); 116 } 117 else if (t == BC_LEX_RBRACE) { 118 119 size_t i; 120 121 // Since we have a brace, we need to just check if a brace was needed. 122 for (i = 0; !good && i < p->flags.len; ++i) { 123 uint16_t *fptr = bc_vec_item_rev(&p->flags, i); 124 good = (((*fptr) & BC_PARSE_FLAG_BRACE) != 0); 125 } 126 } 127 128 return good; 129 } 130 131 /** 132 * Returns true if we are in top level of a function body. The POSIX grammar 133 * is defined such that anything is allowed after a function body, so we must 134 * use this function to detect that case when ending a function body. 135 * @param p The parser. 136 * @return True if we are in the top level of parsing a function body. 137 */ 138 static bool bc_parse_TopFunc(const BcParse *p) { 139 140 bool good = p->flags.len == 2; 141 142 uint16_t val = BC_PARSE_FLAG_BRACE | BC_PARSE_FLAG_FUNC_INNER; 143 val |= BC_PARSE_FLAG_FUNC; 144 145 return good && BC_PARSE_TOP_FLAG(p) == val; 146 } 147 148 /** 149 * Sets a previously defined exit label. What are labels? See the bc Parsing 150 * section of the Development manual (manuals/development.md). 151 * @param p The parser. 152 */ 153 static void bc_parse_setLabel(BcParse *p) { 154 155 BcFunc *func = p->func; 156 BcInstPtr *ip = bc_vec_top(&p->exits); 157 size_t *label; 158 159 assert(func == bc_vec_item(&p->prog->fns, p->fidx)); 160 161 // Set the preallocated label to the correct index. 162 label = bc_vec_item(&func->labels, ip->idx); 163 *label = func->code.len; 164 165 // Now, we don't need the exit label; it is done. 166 bc_vec_pop(&p->exits); 167 } 168 169 /** 170 * Creates a label and sets it to idx. If this is an exit label, then idx is 171 * actually invalid, but it doesn't matter because it will be fixed by 172 * bc_parse_setLabel() later. 173 * @param p The parser. 174 * @param idx The index of the label. 175 */ 176 static void bc_parse_createLabel(BcParse *p, size_t idx) { 177 bc_vec_push(&p->func->labels, &idx); 178 } 179 180 /** 181 * Creates a conditional label. Unlike an exit label, this label is set at 182 * creation time because it comes *before* the code that will target it. 183 * @param p The parser. 184 * @param idx The index of the label. 185 */ 186 static void bc_parse_createCondLabel(BcParse *p, size_t idx) { 187 bc_parse_createLabel(p, p->func->code.len); 188 bc_vec_push(&p->conds, &idx); 189 } 190 191 /* 192 * Creates an exit label to be filled in later by bc_parse_setLabel(). Also, why 193 * create a label to be filled in later? Because exit labels are meant to be 194 * targeted by code that comes *before* the label. Since we have to parse that 195 * code first, and don't know how long it will be, we need to just make sure to 196 * reserve a slot to be filled in later when we know. 197 * 198 * By the way, this uses BcInstPtr because it was convenient. The field idx 199 * holds the index, and the field func holds the loop boolean. 200 * 201 * @param p The parser. 202 * @param idx The index of the label's position. 203 * @param loop True if the exit label is for a loop or not. 204 */ 205 static void bc_parse_createExitLabel(BcParse *p, size_t idx, bool loop) { 206 207 BcInstPtr ip; 208 209 assert(p->func == bc_vec_item(&p->prog->fns, p->fidx)); 210 211 ip.func = loop; 212 ip.idx = idx; 213 ip.len = 0; 214 215 bc_vec_push(&p->exits, &ip); 216 bc_parse_createLabel(p, SIZE_MAX); 217 } 218 219 /** 220 * Pops the correct operators off of the operator stack based on the current 221 * operator. This is because of the Shunting-Yard algorithm. Lower prec means 222 * higher precedence. 223 * @param p The parser. 224 * @param type The operator. 225 * @param start The previous start of the operator stack. For more 226 * information, see the bc Parsing section of the Development 227 * manual (manuals/development.md). 228 * @param nexprs A pointer to the current number of expressions that have not 229 * been consumed yet. This is an IN and OUT parameter. 230 */ 231 static void bc_parse_operator(BcParse *p, BcLexType type, 232 size_t start, size_t *nexprs) 233 { 234 BcLexType t; 235 uchar l, r = BC_PARSE_OP_PREC(type); 236 uchar left = BC_PARSE_OP_LEFT(type); 237 238 // While we haven't hit the stop point yet. 239 while (p->ops.len > start) { 240 241 // Get the top operator. 242 t = BC_PARSE_TOP_OP(p); 243 244 // If it's a right paren, we have reached the end of whatever expression 245 // this is no matter what. 246 if (t == BC_LEX_LPAREN) break; 247 248 // Break for precedence. Precedence operates differently on left and 249 // right associativity, by the way. A left associative operator that 250 // matches the current precedence should take priority, but a right 251 // associative operator should not. 252 l = BC_PARSE_OP_PREC(t); 253 if (l >= r && (l != r || !left)) break; 254 255 // Do the housekeeping. In particular, make sure to note that one 256 // expression was consumed. (Two were, but another was added.) 257 bc_parse_push(p, BC_PARSE_TOKEN_INST(t)); 258 bc_vec_pop(&p->ops); 259 *nexprs -= !BC_PARSE_OP_PREFIX(t); 260 } 261 262 bc_vec_push(&p->ops, &type); 263 } 264 265 /** 266 * Parses a right paren. In the Shunting-Yard algorithm, it needs to be put on 267 * the operator stack. But before that, it needs to consume whatever operators 268 * there are until it hits a left paren. 269 * @param p The parser. 270 * @param nexprs A pointer to the current number of expressions that have not 271 * been consumed yet. This is an IN and OUT parameter. 272 */ 273 static void bc_parse_rightParen(BcParse *p, size_t *nexprs) { 274 275 BcLexType top; 276 277 // Consume operators until a left paren. 278 while ((top = BC_PARSE_TOP_OP(p)) != BC_LEX_LPAREN) { 279 bc_parse_push(p, BC_PARSE_TOKEN_INST(top)); 280 bc_vec_pop(&p->ops); 281 *nexprs -= !BC_PARSE_OP_PREFIX(top); 282 } 283 284 // We need to pop the left paren as well. 285 bc_vec_pop(&p->ops); 286 287 // Oh, and we also want the next token. 288 bc_lex_next(&p->l); 289 } 290 291 /** 292 * Parses function arguments. 293 * @param p The parser. 294 * @param flags Flags restricting what kind of expressions the arguments can 295 * be. 296 */ 297 static void bc_parse_args(BcParse *p, uint8_t flags) { 298 299 bool comma = false; 300 size_t nargs; 301 302 bc_lex_next(&p->l); 303 304 // Print and comparison operators not allowed. Well, comparison operators 305 // only for POSIX. But we do allow arrays, and we *must* get a value. 306 flags &= ~(BC_PARSE_PRINT | BC_PARSE_REL); 307 flags |= (BC_PARSE_ARRAY | BC_PARSE_NEEDVAL); 308 309 // Count the arguments and parse them. 310 for (nargs = 0; p->l.t != BC_LEX_RPAREN; ++nargs) { 311 312 bc_parse_expr_status(p, flags, bc_parse_next_arg); 313 314 comma = (p->l.t == BC_LEX_COMMA); 315 if (comma) bc_lex_next(&p->l); 316 } 317 318 // An ending comma is FAIL. 319 if (BC_ERR(comma)) bc_parse_err(p, BC_ERR_PARSE_TOKEN); 320 321 // Now do the call with the number of arguments. 322 bc_parse_push(p, BC_INST_CALL); 323 bc_parse_pushIndex(p, nargs); 324 } 325 326 /** 327 * Parses a function call. 328 * @param p The parser. 329 * @param flags Flags restricting what kind of expressions the arguments can 330 * be. 331 */ 332 static void bc_parse_call(BcParse *p, const char *name, uint8_t flags) { 333 334 size_t idx; 335 336 bc_parse_args(p, flags); 337 338 // We just assert this because bc_parse_args() should 339 // ensure that the next token is what it should be. 340 assert(p->l.t == BC_LEX_RPAREN); 341 342 // We cannot use bc_program_insertFunc() here 343 // because it will overwrite an existing function. 344 idx = bc_map_index(&p->prog->fn_map, name); 345 346 // The function does not exist yet. Create a space for it. If the user does 347 // not define it, it's a *runtime* error, not a parse error. 348 if (idx == BC_VEC_INVALID_IDX) { 349 350 idx = bc_program_insertFunc(p->prog, name); 351 352 assert(idx != BC_VEC_INVALID_IDX); 353 354 // Make sure that this pointer was not invalidated. 355 p->func = bc_vec_item(&p->prog->fns, p->fidx); 356 } 357 // The function exists, so set the right function index. 358 else idx = ((BcId*) bc_vec_item(&p->prog->fn_map, idx))->idx; 359 360 bc_parse_pushIndex(p, idx); 361 362 // Make sure to get the next token. 363 bc_lex_next(&p->l); 364 } 365 366 /** 367 * Parses a name/identifier-based expression. It could be a variable, an array 368 * element, an array itself (for function arguments), a function call, etc. 369 * 370 */ 371 static void bc_parse_name(BcParse *p, BcInst *type, 372 bool *can_assign, uint8_t flags) 373 { 374 char *name; 375 376 BC_SIG_ASSERT_LOCKED; 377 378 // We want a copy of the name since the lexer might overwrite its copy. 379 name = bc_vm_strdup(p->l.str.v); 380 381 BC_SETJMP_LOCKED(err); 382 383 // We need the next token to see if it's just a variable or something more. 384 bc_lex_next(&p->l); 385 386 // Array element or array. 387 if (p->l.t == BC_LEX_LBRACKET) { 388 389 bc_lex_next(&p->l); 390 391 // Array only. This has to be a function parameter. 392 if (p->l.t == BC_LEX_RBRACKET) { 393 394 // Error if arrays are not allowed. 395 if (BC_ERR(!(flags & BC_PARSE_ARRAY))) 396 bc_parse_err(p, BC_ERR_PARSE_EXPR); 397 398 *type = BC_INST_ARRAY; 399 *can_assign = false; 400 } 401 else { 402 403 // If we are here, we have an array element. We need to set the 404 // expression parsing flags. 405 uint8_t flags2 = (flags & ~(BC_PARSE_PRINT | BC_PARSE_REL)) | 406 BC_PARSE_NEEDVAL; 407 408 bc_parse_expr_status(p, flags2, bc_parse_next_elem); 409 410 // The next token *must* be a right bracket. 411 if (BC_ERR(p->l.t != BC_LEX_RBRACKET)) 412 bc_parse_err(p, BC_ERR_PARSE_TOKEN); 413 414 *type = BC_INST_ARRAY_ELEM; 415 *can_assign = true; 416 } 417 418 // Make sure to get the next token. 419 bc_lex_next(&p->l); 420 421 // Push the instruction and the name of the identifier. 422 bc_parse_push(p, *type); 423 bc_parse_pushName(p, name, false); 424 } 425 else if (p->l.t == BC_LEX_LPAREN) { 426 427 // We are parsing a function call; error if not allowed. 428 if (BC_ERR(flags & BC_PARSE_NOCALL)) 429 bc_parse_err(p, BC_ERR_PARSE_TOKEN); 430 431 *type = BC_INST_CALL; 432 *can_assign = false; 433 434 bc_parse_call(p, name, flags); 435 } 436 else { 437 // Just a variable. 438 *type = BC_INST_VAR; 439 *can_assign = true; 440 bc_parse_push(p, BC_INST_VAR); 441 bc_parse_pushName(p, name, true); 442 } 443 444 err: 445 // Need to make sure to unallocate the name. 446 free(name); 447 BC_LONGJMP_CONT; 448 BC_SIG_MAYLOCK; 449 } 450 451 /** 452 * Parses a builtin function that takes no arguments. This includes read(), 453 * rand(), maxibase(), maxobase(), maxscale(), and maxrand(). 454 * @param p The parser. 455 * @param inst The instruction corresponding to the builtin. 456 */ 457 static void bc_parse_noArgBuiltin(BcParse *p, BcInst inst) { 458 459 // Must have a left paren. 460 bc_lex_next(&p->l); 461 if (BC_ERR(p->l.t != BC_LEX_LPAREN)) bc_parse_err(p, BC_ERR_PARSE_TOKEN); 462 463 // Must have a right paren. 464 bc_lex_next(&p->l); 465 if ((p->l.t != BC_LEX_RPAREN)) bc_parse_err(p, BC_ERR_PARSE_TOKEN); 466 467 bc_parse_push(p, inst); 468 469 bc_lex_next(&p->l); 470 } 471 472 /** 473 * Parses a builtin function that takes 1 argument. This includes length(), 474 * sqrt(), abs(), scale(), and irand(). 475 * @param p The parser. 476 * @param type The lex token. 477 * @param flags The expression parsing flags for parsing the argument. 478 * @param prev An out parameter; the previous instruction pointer. 479 */ 480 static void bc_parse_builtin(BcParse *p, BcLexType type, 481 uint8_t flags, BcInst *prev) 482 { 483 // Must have a left paren. 484 bc_lex_next(&p->l); 485 if (BC_ERR(p->l.t != BC_LEX_LPAREN)) 486 bc_parse_err(p, BC_ERR_PARSE_TOKEN); 487 488 bc_lex_next(&p->l); 489 490 // Change the flags as needed for parsing the argument. 491 flags &= ~(BC_PARSE_PRINT | BC_PARSE_REL); 492 flags |= BC_PARSE_NEEDVAL; 493 494 // Since length can take arrays, we need to specially add that flag. 495 if (type == BC_LEX_KW_LENGTH) flags |= BC_PARSE_ARRAY; 496 497 bc_parse_expr_status(p, flags, bc_parse_next_rel); 498 499 // Must have a right paren. 500 if (BC_ERR(p->l.t != BC_LEX_RPAREN)) 501 bc_parse_err(p, BC_ERR_PARSE_TOKEN); 502 503 // Adjust previous based on the token and push it. 504 *prev = type - BC_LEX_KW_LENGTH + BC_INST_LENGTH; 505 bc_parse_push(p, *prev); 506 507 bc_lex_next(&p->l); 508 } 509 510 /** 511 * Parses a builtin function that takes 3 arguments. This includes modexp() and 512 * divmod(). 513 */ 514 static void bc_parse_builtin3(BcParse *p, BcLexType type, 515 uint8_t flags, BcInst *prev) 516 { 517 assert(type == BC_LEX_KW_MODEXP || type == BC_LEX_KW_DIVMOD); 518 519 // Must have a left paren. 520 bc_lex_next(&p->l); 521 if (BC_ERR(p->l.t != BC_LEX_LPAREN)) 522 bc_parse_err(p, BC_ERR_PARSE_TOKEN); 523 524 bc_lex_next(&p->l); 525 526 // Change the flags as needed for parsing the argument. 527 flags &= ~(BC_PARSE_PRINT | BC_PARSE_REL); 528 flags |= BC_PARSE_NEEDVAL; 529 530 bc_parse_expr_status(p, flags, bc_parse_next_builtin); 531 532 // Must have a comma. 533 if (BC_ERR(p->l.t != BC_LEX_COMMA)) 534 bc_parse_err(p, BC_ERR_PARSE_TOKEN); 535 536 bc_lex_next(&p->l); 537 538 bc_parse_expr_status(p, flags, bc_parse_next_builtin); 539 540 // Must have a comma. 541 if (BC_ERR(p->l.t != BC_LEX_COMMA)) 542 bc_parse_err(p, BC_ERR_PARSE_TOKEN); 543 544 bc_lex_next(&p->l); 545 546 // If it is a divmod, parse an array name. Otherwise, just parse another 547 // expression. 548 if (type == BC_LEX_KW_DIVMOD) { 549 550 // Must have a name. 551 if (BC_ERR(p->l.t != BC_LEX_NAME)) bc_parse_err(p, BC_ERR_PARSE_TOKEN); 552 553 // This is safe because the next token should not overwrite the name. 554 bc_lex_next(&p->l); 555 556 // Must have a left bracket. 557 if (BC_ERR(p->l.t != BC_LEX_LBRACKET)) 558 bc_parse_err(p, BC_ERR_PARSE_TOKEN); 559 560 // This is safe because the next token should not overwrite the name. 561 bc_lex_next(&p->l); 562 563 // Must have a right bracket. 564 if (BC_ERR(p->l.t != BC_LEX_RBRACKET)) 565 bc_parse_err(p, BC_ERR_PARSE_TOKEN); 566 567 // This is safe because the next token should not overwrite the name. 568 bc_lex_next(&p->l); 569 } 570 else bc_parse_expr_status(p, flags, bc_parse_next_rel); 571 572 // Must have a right paren. 573 if (BC_ERR(p->l.t != BC_LEX_RPAREN)) 574 bc_parse_err(p, BC_ERR_PARSE_TOKEN); 575 576 // Adjust previous based on the token and push it. 577 *prev = type - BC_LEX_KW_MODEXP + BC_INST_MODEXP; 578 bc_parse_push(p, *prev); 579 580 // If we have divmod, we need to assign the modulus to the array element, so 581 // we need to push the instructions for doing so. 582 if (type == BC_LEX_KW_DIVMOD) { 583 584 // The zeroth element. 585 bc_parse_push(p, BC_INST_ZERO); 586 bc_parse_push(p, BC_INST_ARRAY_ELEM); 587 588 // Push the array. 589 bc_parse_pushName(p, p->l.str.v, false); 590 591 // Swap them and assign. After this, the top item on the stack should 592 // be the quotient. 593 bc_parse_push(p, BC_INST_SWAP); 594 bc_parse_push(p, BC_INST_ASSIGN_NO_VAL); 595 } 596 597 bc_lex_next(&p->l); 598 } 599 600 /** 601 * Parses the scale keyword. This is special because scale can be a value or a 602 * builtin function. 603 * @param p The parser. 604 * @param type An out parameter; the instruction for the parse. 605 * @param can_assign An out parameter; whether the expression can be assigned 606 * to. 607 * @param flags The expression parsing flags for parsing a scale() arg. 608 */ 609 static void bc_parse_scale(BcParse *p, BcInst *type, 610 bool *can_assign, uint8_t flags) 611 { 612 bc_lex_next(&p->l); 613 614 // Without the left paren, it's just the keyword. 615 if (p->l.t != BC_LEX_LPAREN) { 616 617 // Set, push, and return. 618 *type = BC_INST_SCALE; 619 *can_assign = true; 620 bc_parse_push(p, BC_INST_SCALE); 621 return; 622 } 623 624 // Handle the scale function. 625 *type = BC_INST_SCALE_FUNC; 626 *can_assign = false; 627 628 // Once again, adjust the flags. 629 flags &= ~(BC_PARSE_PRINT | BC_PARSE_REL); 630 flags |= BC_PARSE_NEEDVAL; 631 632 bc_lex_next(&p->l); 633 634 bc_parse_expr_status(p, flags, bc_parse_next_rel); 635 636 // Must have a right paren. 637 if (BC_ERR(p->l.t != BC_LEX_RPAREN)) 638 bc_parse_err(p, BC_ERR_PARSE_TOKEN); 639 640 bc_parse_push(p, BC_INST_SCALE_FUNC); 641 642 bc_lex_next(&p->l); 643 } 644 645 /** 646 * Parses and increment or decrement operator. This is a bit complex. 647 * @param p The parser. 648 * @param prev An out parameter; the previous instruction pointer. 649 * @param can_assign An out parameter; whether the expression can be assigned 650 * to. 651 * @param nexs An in/out parameter; the number of expressions in the 652 * parse tree that are not used. 653 * @param flags The expression parsing flags for parsing a scale() arg. 654 */ 655 static void bc_parse_incdec(BcParse *p, BcInst *prev, bool *can_assign, 656 size_t *nexs, uint8_t flags) 657 { 658 BcLexType type; 659 uchar inst; 660 BcInst etype = *prev; 661 BcLexType last = p->l.last; 662 663 assert(prev != NULL && can_assign != NULL); 664 665 // If we can't assign to the previous token, then we have an error. 666 if (BC_ERR(last == BC_LEX_OP_INC || last == BC_LEX_OP_DEC || 667 last == BC_LEX_RPAREN)) 668 { 669 bc_parse_err(p, BC_ERR_PARSE_ASSIGN); 670 } 671 672 // Is the previous instruction for a variable? 673 if (BC_PARSE_INST_VAR(etype)) { 674 675 // If so, this is a postfix operator. 676 if (!*can_assign) bc_parse_err(p, BC_ERR_PARSE_ASSIGN); 677 678 // Only postfix uses BC_INST_INC and BC_INST_DEC. 679 *prev = inst = BC_INST_INC + (p->l.t != BC_LEX_OP_INC); 680 bc_parse_push(p, inst); 681 bc_lex_next(&p->l); 682 *can_assign = false; 683 } 684 else { 685 686 // This is a prefix operator. In that case, we just convert it to 687 // an assignment instruction. 688 *prev = inst = BC_INST_ASSIGN_PLUS + (p->l.t != BC_LEX_OP_INC); 689 690 bc_lex_next(&p->l); 691 type = p->l.t; 692 693 // Because we parse the next part of the expression 694 // right here, we need to increment this. 695 *nexs = *nexs + 1; 696 697 // Is the next token a normal identifier? 698 if (type == BC_LEX_NAME) { 699 700 // Parse the name. 701 uint8_t flags2 = flags & ~BC_PARSE_ARRAY; 702 bc_parse_name(p, prev, can_assign, flags2 | BC_PARSE_NOCALL); 703 } 704 // Is the next token a global? 705 else if (type >= BC_LEX_KW_LAST && type <= BC_LEX_KW_OBASE) { 706 bc_parse_push(p, type - BC_LEX_KW_LAST + BC_INST_LAST); 707 bc_lex_next(&p->l); 708 } 709 // Is the next token specifically scale, which needs special treatment? 710 else if (BC_NO_ERR(type == BC_LEX_KW_SCALE)) { 711 712 bc_lex_next(&p->l); 713 714 // Check that scale() was not used. 715 if (BC_ERR(p->l.t == BC_LEX_LPAREN)) 716 bc_parse_err(p, BC_ERR_PARSE_TOKEN); 717 else bc_parse_push(p, BC_INST_SCALE); 718 } 719 // Now we know we have an error. 720 else bc_parse_err(p, BC_ERR_PARSE_TOKEN); 721 722 *can_assign = false; 723 724 bc_parse_push(p, BC_INST_ONE); 725 bc_parse_push(p, inst); 726 } 727 } 728 729 /** 730 * Parses the minus operator. This needs special treatment because it is either 731 * subtract or negation. 732 * @param p The parser. 733 * @param prev An in/out parameter; the previous instruction. 734 * @param ops_bgn The size of the operator stack. 735 * @param rparen True if the last token was a right paren. 736 * @param binlast True if the last token was a binary operator. 737 * @param nexprs An in/out parameter; the number of unused expressions. 738 */ 739 static void bc_parse_minus(BcParse *p, BcInst *prev, size_t ops_bgn, 740 bool rparen, bool binlast, size_t *nexprs) 741 { 742 BcLexType type; 743 744 bc_lex_next(&p->l); 745 746 // Figure out if it's a minus or a negation. 747 type = BC_PARSE_LEAF(*prev, binlast, rparen) ? BC_LEX_OP_MINUS : BC_LEX_NEG; 748 *prev = BC_PARSE_TOKEN_INST(type); 749 750 // We can just push onto the op stack because this is the largest 751 // precedence operator that gets pushed. Inc/dec does not. 752 if (type != BC_LEX_OP_MINUS) bc_vec_push(&p->ops, &type); 753 else bc_parse_operator(p, type, ops_bgn, nexprs); 754 } 755 756 /** 757 * Parses a string. 758 * @param p The parser. 759 * @param inst The instruction corresponding to how the string was found and 760 * how it should be printed. 761 */ 762 static void bc_parse_str(BcParse *p, BcInst inst) { 763 bc_parse_addString(p); 764 bc_parse_push(p, inst); 765 bc_lex_next(&p->l); 766 } 767 768 /** 769 * Parses a print statement. 770 * @param p The parser. 771 */ 772 static void bc_parse_print(BcParse *p, BcLexType type) { 773 774 BcLexType t; 775 bool comma = false; 776 BcInst inst = type == BC_LEX_KW_STREAM ? 777 BC_INST_PRINT_STREAM : BC_INST_PRINT_POP; 778 779 bc_lex_next(&p->l); 780 781 t = p->l.t; 782 783 // A print or stream statement has to have *something*. 784 if (bc_parse_isDelimiter(p)) bc_parse_err(p, BC_ERR_PARSE_PRINT); 785 786 do { 787 788 // If the token is a string, then print it with escapes. 789 // BC_INST_PRINT_POP plays that role for bc. 790 if (t == BC_LEX_STR) bc_parse_str(p, inst); 791 else { 792 // We have an actual number; parse and add a print instruction. 793 bc_parse_expr_status(p, BC_PARSE_NEEDVAL, bc_parse_next_print); 794 bc_parse_push(p, inst); 795 } 796 797 // Is the next token a comma? 798 comma = (p->l.t == BC_LEX_COMMA); 799 800 // Get the next token if we have a comma. 801 if (comma) bc_lex_next(&p->l); 802 else { 803 804 // If we don't have a comma, the statement needs to end. 805 if (!bc_parse_isDelimiter(p)) 806 bc_parse_err(p, BC_ERR_PARSE_TOKEN); 807 else break; 808 } 809 810 t = p->l.t; 811 812 } while (true); 813 814 // If we have a comma but no token, that's bad. 815 if (BC_ERR(comma)) bc_parse_err(p, BC_ERR_PARSE_TOKEN); 816 } 817 818 /** 819 * Parses a return statement. 820 * @param p The parser. 821 */ 822 static void bc_parse_return(BcParse *p) { 823 824 BcLexType t; 825 bool paren; 826 uchar inst = BC_INST_RET0; 827 828 // If we are not in a function, that's an error. 829 if (BC_ERR(!BC_PARSE_FUNC(p))) bc_parse_err(p, BC_ERR_PARSE_TOKEN); 830 831 // If we are in a void function, make sure to return void. 832 if (p->func->voidfn) inst = BC_INST_RET_VOID; 833 834 bc_lex_next(&p->l); 835 836 t = p->l.t; 837 paren = (t == BC_LEX_LPAREN); 838 839 // An empty return statement just needs to push the selected instruction. 840 if (bc_parse_isDelimiter(p)) bc_parse_push(p, inst); 841 else { 842 843 BcParseStatus s; 844 845 // Need to parse the expression whose value will be returned. 846 s = bc_parse_expr_err(p, BC_PARSE_NEEDVAL, bc_parse_next_expr); 847 848 // If the expression was empty, just push the selected instruction. 849 if (s == BC_PARSE_STATUS_EMPTY_EXPR) { 850 bc_parse_push(p, inst); 851 bc_lex_next(&p->l); 852 } 853 854 // POSIX requires parentheses. 855 if (!paren || p->l.last != BC_LEX_RPAREN) { 856 bc_parse_err(p, BC_ERR_POSIX_RET); 857 } 858 859 // Void functions require an empty expression. 860 if (BC_ERR(p->func->voidfn)) { 861 if (s != BC_PARSE_STATUS_EMPTY_EXPR) 862 bc_parse_verr(p, BC_ERR_PARSE_RET_VOID, p->func->name); 863 } 864 // If we got here, we want to be sure to end the function with a real 865 // return instruction, just in case. 866 else bc_parse_push(p, BC_INST_RET); 867 } 868 } 869 870 /** 871 * Clears flags that indicate the end of an if statement and its block and sets 872 * the jump location. 873 * @param p The parser. 874 */ 875 static void bc_parse_noElse(BcParse *p) { 876 uint16_t *flag_ptr = BC_PARSE_TOP_FLAG_PTR(p); 877 *flag_ptr = (*flag_ptr & ~(BC_PARSE_FLAG_IF_END)); 878 bc_parse_setLabel(p); 879 } 880 881 /** 882 * Ends (finishes parsing) the body of a control statement or a function. 883 * @param p The parser. 884 * @param brace True if the body was ended by a brace, false otherwise. 885 */ 886 static void bc_parse_endBody(BcParse *p, bool brace) { 887 888 bool has_brace, new_else = false; 889 890 // We cannot be ending a body if there are no bodies to end. 891 if (BC_ERR(p->flags.len <= 1)) bc_parse_err(p, BC_ERR_PARSE_TOKEN); 892 893 if (brace) { 894 895 // The brace was already gotten; make sure that the caller did not lie. 896 // We check for the requirement of braces later. 897 assert(p->l.t == BC_LEX_RBRACE); 898 899 bc_lex_next(&p->l); 900 901 // If the next token is not a delimiter, that is a problem. 902 if (BC_ERR(!bc_parse_isDelimiter(p) && !bc_parse_TopFunc(p))) 903 bc_parse_err(p, BC_ERR_PARSE_TOKEN); 904 } 905 906 // Do we have a brace flag? 907 has_brace = (BC_PARSE_BRACE(p) != 0); 908 909 do { 910 size_t len = p->flags.len; 911 bool loop; 912 913 // If we have a brace flag but not a brace, that's a problem. 914 if (has_brace && !brace) bc_parse_err(p, BC_ERR_PARSE_TOKEN); 915 916 // Are we inside a loop? 917 loop = (BC_PARSE_LOOP_INNER(p) != 0); 918 919 // If we are ending a loop or an else... 920 if (loop || BC_PARSE_ELSE(p)) { 921 922 // Loops have condition labels that we have to take care of as well. 923 if (loop) { 924 925 size_t *label = bc_vec_top(&p->conds); 926 927 bc_parse_push(p, BC_INST_JUMP); 928 bc_parse_pushIndex(p, *label); 929 930 bc_vec_pop(&p->conds); 931 } 932 933 bc_parse_setLabel(p); 934 bc_vec_pop(&p->flags); 935 } 936 // If we are ending a function... 937 else if (BC_PARSE_FUNC_INNER(p)) { 938 BcInst inst = (p->func->voidfn ? BC_INST_RET_VOID : BC_INST_RET0); 939 bc_parse_push(p, inst); 940 bc_parse_updateFunc(p, BC_PROG_MAIN); 941 bc_vec_pop(&p->flags); 942 } 943 // If we have a brace flag and not an if statement, we can pop the top 944 // of the flags stack because they have been taken care of above. 945 else if (has_brace && !BC_PARSE_IF(p)) bc_vec_pop(&p->flags); 946 947 // This needs to be last to parse nested if's properly. 948 if (BC_PARSE_IF(p) && (len == p->flags.len || !BC_PARSE_BRACE(p))) { 949 950 // Eat newlines. 951 while (p->l.t == BC_LEX_NLINE) bc_lex_next(&p->l); 952 953 // *Now* we can pop the flags. 954 bc_vec_pop(&p->flags); 955 956 // If we are allowed non-POSIX stuff... 957 if (!BC_S) { 958 959 // Have we found yet another dangling else? 960 *(BC_PARSE_TOP_FLAG_PTR(p)) |= BC_PARSE_FLAG_IF_END; 961 new_else = (p->l.t == BC_LEX_KW_ELSE); 962 963 // Parse the else or end the if statement body. 964 if (new_else) bc_parse_else(p); 965 else if (!has_brace && (!BC_PARSE_IF_END(p) || brace)) 966 bc_parse_noElse(p); 967 } 968 // POSIX requires us to do the bare minimum only. 969 else bc_parse_noElse(p); 970 } 971 972 // If these are both true, we have "used" the braces that we found. 973 if (brace && has_brace) brace = false; 974 975 // This condition was perhaps the hardest single part of the parser. If the 976 // flags stack does not have enough, we should stop. If we have a new else 977 // statement, we should stop. If we do have the end of an if statement and 978 // we have eaten the brace, we should stop. If we do have a brace flag, we 979 // should stop. 980 } while (p->flags.len > 1 && !new_else && (!BC_PARSE_IF_END(p) || brace) && 981 !(has_brace = (BC_PARSE_BRACE(p) != 0))); 982 983 // If we have a brace, yet no body for it, that's a problem. 984 if (BC_ERR(p->flags.len == 1 && brace)) 985 bc_parse_err(p, BC_ERR_PARSE_TOKEN); 986 else if (brace && BC_PARSE_BRACE(p)) { 987 988 // If we make it here, we have a brace and a flag for it. 989 uint16_t flags = BC_PARSE_TOP_FLAG(p); 990 991 // This condition ensure that the *last* body is correctly finished by 992 // popping its flags. 993 if (!(flags & (BC_PARSE_FLAG_FUNC_INNER | BC_PARSE_FLAG_LOOP_INNER)) && 994 !(flags & (BC_PARSE_FLAG_IF | BC_PARSE_FLAG_ELSE)) && 995 !(flags & (BC_PARSE_FLAG_IF_END))) 996 { 997 bc_vec_pop(&p->flags); 998 } 999 } 1000 } 1001 1002 /** 1003 * Starts the body of a control statement or function. 1004 * @param p The parser. 1005 * @param flags The current flags (will be edited). 1006 */ 1007 static void bc_parse_startBody(BcParse *p, uint16_t flags) { 1008 assert(flags); 1009 flags |= (BC_PARSE_TOP_FLAG(p) & (BC_PARSE_FLAG_FUNC | BC_PARSE_FLAG_LOOP)); 1010 flags |= BC_PARSE_FLAG_BODY; 1011 bc_vec_push(&p->flags, &flags); 1012 } 1013 1014 void bc_parse_endif(BcParse *p) { 1015 1016 size_t i; 1017 bool good; 1018 1019 // Not a problem if this is true. 1020 if (BC_NO_ERR(!BC_PARSE_NO_EXEC(p))) return; 1021 1022 good = true; 1023 1024 // Find an instance of a body that needs closing, i.e., a statement that did 1025 // not have a right brace when it should have. 1026 for (i = 0; good && i < p->flags.len; ++i) { 1027 uint16_t flag = *((uint16_t*) bc_vec_item(&p->flags, i)); 1028 good = ((flag & BC_PARSE_FLAG_BRACE) != BC_PARSE_FLAG_BRACE); 1029 } 1030 1031 // If we did not find such an instance... 1032 if (good) { 1033 1034 // We set this to restore it later. We don't want the parser thinking 1035 // that we are on stdin for this one because it will want more. 1036 bool is_stdin = vm.is_stdin; 1037 1038 vm.is_stdin = false; 1039 1040 // End all of the if statements and loops. 1041 while (p->flags.len > 1 || BC_PARSE_IF_END(p)) { 1042 if (BC_PARSE_IF_END(p)) bc_parse_noElse(p); 1043 if (p->flags.len > 1) bc_parse_endBody(p, false); 1044 } 1045 1046 vm.is_stdin = is_stdin; 1047 } 1048 // If we reach here, a block was not properly closed, and we should error. 1049 else bc_parse_err(&vm.prs, BC_ERR_PARSE_BLOCK); 1050 } 1051 1052 /** 1053 * Parses an if statement. 1054 * @param p The parser. 1055 */ 1056 static void bc_parse_if(BcParse *p) { 1057 1058 // We are allowed relational operators, and we must have a value. 1059 size_t idx; 1060 uint8_t flags = (BC_PARSE_REL | BC_PARSE_NEEDVAL); 1061 1062 // Get the left paren and barf if necessary. 1063 bc_lex_next(&p->l); 1064 if (BC_ERR(p->l.t != BC_LEX_LPAREN)) 1065 bc_parse_err(p, BC_ERR_PARSE_TOKEN); 1066 1067 // Parse the condition. 1068 bc_lex_next(&p->l); 1069 bc_parse_expr_status(p, flags, bc_parse_next_rel); 1070 1071 // Must have a right paren. 1072 if (BC_ERR(p->l.t != BC_LEX_RPAREN)) 1073 bc_parse_err(p, BC_ERR_PARSE_TOKEN); 1074 1075 bc_lex_next(&p->l); 1076 1077 // Insert the conditional jump instruction. 1078 bc_parse_push(p, BC_INST_JUMP_ZERO); 1079 1080 idx = p->func->labels.len; 1081 1082 // Push the index for the instruction and create an exit label for an else 1083 // statement. 1084 bc_parse_pushIndex(p, idx); 1085 bc_parse_createExitLabel(p, idx, false); 1086 1087 bc_parse_startBody(p, BC_PARSE_FLAG_IF); 1088 } 1089 1090 /** 1091 * Parses an else statement. 1092 * @param p The parser. 1093 */ 1094 static void bc_parse_else(BcParse *p) { 1095 1096 size_t idx = p->func->labels.len; 1097 1098 // We must be at the end of an if statement. 1099 if (BC_ERR(!BC_PARSE_IF_END(p))) 1100 bc_parse_err(p, BC_ERR_PARSE_TOKEN); 1101 1102 // Push an unconditional jump to make bc jump over the else statement if it 1103 // executed the original if statement. 1104 bc_parse_push(p, BC_INST_JUMP); 1105 bc_parse_pushIndex(p, idx); 1106 1107 // Clear the else stuff. Yes, that function is misnamed for its use here, 1108 // but deal with it. 1109 bc_parse_noElse(p); 1110 1111 // Create the exit label and parse the body. 1112 bc_parse_createExitLabel(p, idx, false); 1113 bc_parse_startBody(p, BC_PARSE_FLAG_ELSE); 1114 1115 bc_lex_next(&p->l); 1116 } 1117 1118 /** 1119 * Parse a while loop. 1120 * @param p The parser. 1121 */ 1122 static void bc_parse_while(BcParse *p) { 1123 1124 // We are allowed relational operators, and we must have a value. 1125 size_t idx; 1126 uint8_t flags = (BC_PARSE_REL | BC_PARSE_NEEDVAL); 1127 1128 // Get the left paren and barf if necessary. 1129 bc_lex_next(&p->l); 1130 if (BC_ERR(p->l.t != BC_LEX_LPAREN)) 1131 bc_parse_err(p, BC_ERR_PARSE_TOKEN); 1132 bc_lex_next(&p->l); 1133 1134 // Create the labels. Loops need both. 1135 bc_parse_createCondLabel(p, p->func->labels.len); 1136 idx = p->func->labels.len; 1137 bc_parse_createExitLabel(p, idx, true); 1138 1139 // Parse the actual condition and barf on non-right paren. 1140 bc_parse_expr_status(p, flags, bc_parse_next_rel); 1141 if (BC_ERR(p->l.t != BC_LEX_RPAREN)) 1142 bc_parse_err(p, BC_ERR_PARSE_TOKEN); 1143 bc_lex_next(&p->l); 1144 1145 // Now we can push the conditional jump and start the body. 1146 bc_parse_push(p, BC_INST_JUMP_ZERO); 1147 bc_parse_pushIndex(p, idx); 1148 bc_parse_startBody(p, BC_PARSE_FLAG_LOOP | BC_PARSE_FLAG_LOOP_INNER); 1149 } 1150 1151 /** 1152 * Parse a for loop. 1153 * @param p The parser. 1154 */ 1155 static void bc_parse_for(BcParse *p) { 1156 1157 size_t cond_idx, exit_idx, body_idx, update_idx; 1158 1159 // Barf on the missing left paren. 1160 bc_lex_next(&p->l); 1161 if (BC_ERR(p->l.t != BC_LEX_LPAREN)) 1162 bc_parse_err(p, BC_ERR_PARSE_TOKEN); 1163 bc_lex_next(&p->l); 1164 1165 // The first statement can be empty, but if it is, check for error in POSIX 1166 // mode. Otherwise, parse it. 1167 if (p->l.t != BC_LEX_SCOLON) 1168 bc_parse_expr_status(p, 0, bc_parse_next_for); 1169 else bc_parse_err(p, BC_ERR_POSIX_FOR); 1170 1171 // Must have a semicolon. 1172 if (BC_ERR(p->l.t != BC_LEX_SCOLON)) bc_parse_err(p, BC_ERR_PARSE_TOKEN); 1173 bc_lex_next(&p->l); 1174 1175 // These are indices for labels. There are so many of them because the end 1176 // of the loop must unconditionally jump to the update code. Then the update 1177 // code must unconditionally jump to the condition code. Then the condition 1178 // code must *conditionally* jump to the exit. 1179 cond_idx = p->func->labels.len; 1180 update_idx = cond_idx + 1; 1181 body_idx = update_idx + 1; 1182 exit_idx = body_idx + 1; 1183 1184 // This creates the condition label. 1185 bc_parse_createLabel(p, p->func->code.len); 1186 1187 // Parse an expression if it exists. 1188 if (p->l.t != BC_LEX_SCOLON) { 1189 uint8_t flags = (BC_PARSE_REL | BC_PARSE_NEEDVAL); 1190 bc_parse_expr_status(p, flags, bc_parse_next_for); 1191 } 1192 else { 1193 1194 // Set this for the next call to bc_parse_number because an empty 1195 // condition means that it is an infinite loop, so the condition must be 1196 // non-zero. This is safe to set because the current token is a 1197 // semicolon, which has no string requirement. 1198 bc_vec_string(&p->l.str, sizeof(bc_parse_one) - 1, bc_parse_one); 1199 bc_parse_number(p); 1200 1201 // An empty condition makes POSIX mad. 1202 bc_parse_err(p, BC_ERR_POSIX_FOR); 1203 } 1204 1205 // Must have a semicolon. 1206 if (BC_ERR(p->l.t != BC_LEX_SCOLON)) 1207 bc_parse_err(p, BC_ERR_PARSE_TOKEN); 1208 bc_lex_next(&p->l); 1209 1210 // Now we can set up the conditional jump to the exit and an unconditional 1211 // jump to the body right after. The unconditional jump to the body is 1212 // because there is update code coming right after the condition, so we need 1213 // to skip it to get to the body. 1214 bc_parse_push(p, BC_INST_JUMP_ZERO); 1215 bc_parse_pushIndex(p, exit_idx); 1216 bc_parse_push(p, BC_INST_JUMP); 1217 bc_parse_pushIndex(p, body_idx); 1218 1219 // Now create the label for the update code. 1220 bc_parse_createCondLabel(p, update_idx); 1221 1222 // Parse if not empty, and if it is, let POSIX yell if necessary. 1223 if (p->l.t != BC_LEX_RPAREN) 1224 bc_parse_expr_status(p, 0, bc_parse_next_rel); 1225 else bc_parse_err(p, BC_ERR_POSIX_FOR); 1226 1227 // Must have a right paren. 1228 if (BC_ERR(p->l.t != BC_LEX_RPAREN)) 1229 bc_parse_err(p, BC_ERR_PARSE_TOKEN); 1230 1231 // Set up a jump to the condition right after the update code. 1232 bc_parse_push(p, BC_INST_JUMP); 1233 bc_parse_pushIndex(p, cond_idx); 1234 bc_parse_createLabel(p, p->func->code.len); 1235 1236 // Create an exit label for the body and start the body. 1237 bc_parse_createExitLabel(p, exit_idx, true); 1238 bc_lex_next(&p->l); 1239 bc_parse_startBody(p, BC_PARSE_FLAG_LOOP | BC_PARSE_FLAG_LOOP_INNER); 1240 } 1241 1242 /** 1243 * Parse a statement or token that indicates a loop exit. This includes an 1244 * actual loop exit, the break keyword, or the continue keyword. 1245 * @param p The parser. 1246 * @param type The type of exit. 1247 */ 1248 static void bc_parse_loopExit(BcParse *p, BcLexType type) { 1249 1250 size_t i; 1251 BcInstPtr *ip; 1252 1253 // Must have a loop. If we don't, that's an error. 1254 if (BC_ERR(!BC_PARSE_LOOP(p))) bc_parse_err(p, BC_ERR_PARSE_TOKEN); 1255 1256 // If we have a break statement... 1257 if (type == BC_LEX_KW_BREAK) { 1258 1259 // If there are no exits, something went wrong somewhere. 1260 if (BC_ERR(!p->exits.len)) bc_parse_err(p, BC_ERR_PARSE_TOKEN); 1261 1262 // Get the exit. 1263 i = p->exits.len - 1; 1264 ip = bc_vec_item(&p->exits, i); 1265 1266 // The condition !ip->func is true if the exit is not for a loop, so we 1267 // need to find the first actual loop exit. 1268 while (!ip->func && i < p->exits.len) ip = bc_vec_item(&p->exits, i--); 1269 1270 // Make sure everything is hunky dory. 1271 assert(ip != NULL && (i < p->exits.len || ip->func)); 1272 1273 // Set the index for the exit. 1274 i = ip->idx; 1275 } 1276 // If we have a continue statement or just the loop end, jump to the 1277 // condition (or update for a foor loop). 1278 else i = *((size_t*) bc_vec_top(&p->conds)); 1279 1280 // Add the unconditional jump. 1281 bc_parse_push(p, BC_INST_JUMP); 1282 bc_parse_pushIndex(p, i); 1283 1284 bc_lex_next(&p->l); 1285 } 1286 1287 /** 1288 * Parse a function (header). 1289 * @param p The parser. 1290 */ 1291 static void bc_parse_func(BcParse *p) { 1292 1293 bool comma = false, voidfn; 1294 uint16_t flags; 1295 size_t idx; 1296 1297 bc_lex_next(&p->l); 1298 1299 // Must have a name. 1300 if (BC_ERR(p->l.t != BC_LEX_NAME)) bc_parse_err(p, BC_ERR_PARSE_FUNC); 1301 1302 // If the name is "void", and POSIX is not on, mark as void. 1303 voidfn = (!BC_IS_POSIX && p->l.t == BC_LEX_NAME && 1304 !strcmp(p->l.str.v, "void")); 1305 1306 // We can safely do this because the expected token should not overwrite the 1307 // function name. 1308 bc_lex_next(&p->l); 1309 1310 // If we *don't* have another name, then void is the name of the function. 1311 voidfn = (voidfn && p->l.t == BC_LEX_NAME); 1312 1313 // With a void function, allow POSIX to complain and get a new token. 1314 if (voidfn) { 1315 1316 bc_parse_err(p, BC_ERR_POSIX_VOID); 1317 1318 // We can safely do this because the expected token should not overwrite 1319 // the function name. 1320 bc_lex_next(&p->l); 1321 } 1322 1323 // Must have a left paren. 1324 if (BC_ERR(p->l.t != BC_LEX_LPAREN)) 1325 bc_parse_err(p, BC_ERR_PARSE_FUNC); 1326 1327 // Make sure the functions map and vector are synchronized. 1328 assert(p->prog->fns.len == p->prog->fn_map.len); 1329 1330 // Insert the function by name into the map and vector. 1331 idx = bc_program_insertFunc(p->prog, p->l.str.v); 1332 1333 // Make sure the insert worked. 1334 assert(idx); 1335 1336 // Update the function pointer and stuff in the parser and set its void. 1337 bc_parse_updateFunc(p, idx); 1338 p->func->voidfn = voidfn; 1339 1340 bc_lex_next(&p->l); 1341 1342 // While we do not have a right paren, we are still parsing arguments. 1343 while (p->l.t != BC_LEX_RPAREN) { 1344 1345 BcType t = BC_TYPE_VAR; 1346 1347 // If we have an asterisk, we are parsing a reference argument. 1348 if (p->l.t == BC_LEX_OP_MULTIPLY) { 1349 1350 t = BC_TYPE_REF; 1351 bc_lex_next(&p->l); 1352 1353 // Let POSIX complain if necessary. 1354 bc_parse_err(p, BC_ERR_POSIX_REF); 1355 } 1356 1357 // If we don't have a name, the argument will not have a name. Barf. 1358 if (BC_ERR(p->l.t != BC_LEX_NAME)) 1359 bc_parse_err(p, BC_ERR_PARSE_FUNC); 1360 1361 // Increment the number of parameters. 1362 p->func->nparams += 1; 1363 1364 // Copy the string in the lexer so that we can use the lexer again. 1365 bc_vec_string(&p->buf, p->l.str.len, p->l.str.v); 1366 1367 bc_lex_next(&p->l); 1368 1369 // We are parsing an array parameter if this is true. 1370 if (p->l.t == BC_LEX_LBRACKET) { 1371 1372 // Set the array type, unless we are already parsing a reference. 1373 if (t == BC_TYPE_VAR) t = BC_TYPE_ARRAY; 1374 1375 bc_lex_next(&p->l); 1376 1377 // The brackets *must* be empty. 1378 if (BC_ERR(p->l.t != BC_LEX_RBRACKET)) 1379 bc_parse_err(p, BC_ERR_PARSE_FUNC); 1380 1381 bc_lex_next(&p->l); 1382 } 1383 // If we did *not* get a bracket, but we are expecting a reference, we 1384 // have a problem. 1385 else if (BC_ERR(t == BC_TYPE_REF)) 1386 bc_parse_verr(p, BC_ERR_PARSE_REF_VAR, p->buf.v); 1387 1388 // Test for comma and get the next token if it exists. 1389 comma = (p->l.t == BC_LEX_COMMA); 1390 if (comma) bc_lex_next(&p->l); 1391 1392 // Insert the parameter into the function. 1393 bc_func_insert(p->func, p->prog, p->buf.v, t, p->l.line); 1394 } 1395 1396 // If we have a comma, but no parameter, barf. 1397 if (BC_ERR(comma)) bc_parse_err(p, BC_ERR_PARSE_FUNC); 1398 1399 // Start the body. 1400 flags = BC_PARSE_FLAG_FUNC | BC_PARSE_FLAG_FUNC_INNER; 1401 bc_parse_startBody(p, flags); 1402 1403 bc_lex_next(&p->l); 1404 1405 // POSIX requires that a brace be on the same line as the function header. 1406 // If we don't have a brace, let POSIX throw an error. 1407 if (p->l.t != BC_LEX_LBRACE) bc_parse_err(p, BC_ERR_POSIX_BRACE); 1408 } 1409 1410 /** 1411 * Parse an auto list. 1412 * @param p The parser. 1413 */ 1414 static void bc_parse_auto(BcParse *p) { 1415 1416 bool comma, one; 1417 1418 // Error if the auto keyword appeared in the wrong place. 1419 if (BC_ERR(!p->auto_part)) bc_parse_err(p, BC_ERR_PARSE_TOKEN); 1420 bc_lex_next(&p->l); 1421 1422 p->auto_part = comma = false; 1423 1424 // We need at least one variable or array. 1425 one = (p->l.t == BC_LEX_NAME); 1426 1427 // While we have a variable or array. 1428 while (p->l.t == BC_LEX_NAME) { 1429 1430 BcType t; 1431 1432 // Copy the name from the lexer, so we can use it again. 1433 bc_vec_string(&p->buf, p->l.str.len - 1, p->l.str.v); 1434 1435 bc_lex_next(&p->l); 1436 1437 // If we are parsing an array... 1438 if (p->l.t == BC_LEX_LBRACKET) { 1439 1440 t = BC_TYPE_ARRAY; 1441 1442 bc_lex_next(&p->l); 1443 1444 // The brackets *must* be empty. 1445 if (BC_ERR(p->l.t != BC_LEX_RBRACKET)) 1446 bc_parse_err(p, BC_ERR_PARSE_FUNC); 1447 1448 bc_lex_next(&p->l); 1449 } 1450 else t = BC_TYPE_VAR; 1451 1452 // Test for comma and get the next token if it exists. 1453 comma = (p->l.t == BC_LEX_COMMA); 1454 if (comma) bc_lex_next(&p->l); 1455 1456 // Insert the auto into the function. 1457 bc_func_insert(p->func, p->prog, p->buf.v, t, p->l.line); 1458 } 1459 1460 // If we have a comma, but no auto, barf. 1461 if (BC_ERR(comma)) bc_parse_err(p, BC_ERR_PARSE_FUNC); 1462 1463 // If we don't have any variables or arrays, barf. 1464 if (BC_ERR(!one)) bc_parse_err(p, BC_ERR_PARSE_NO_AUTO); 1465 1466 // The auto statement should be all that's in the statement. 1467 if (BC_ERR(!bc_parse_isDelimiter(p))) 1468 bc_parse_err(p, BC_ERR_PARSE_TOKEN); 1469 } 1470 1471 /** 1472 * Parses a body. 1473 * @param p The parser. 1474 * @param brace True if a brace was encountered, false otherwise. 1475 */ 1476 static void bc_parse_body(BcParse *p, bool brace) { 1477 1478 uint16_t *flag_ptr = BC_PARSE_TOP_FLAG_PTR(p); 1479 1480 assert(flag_ptr != NULL); 1481 assert(p->flags.len >= 2); 1482 1483 // The body flag is for when we expect a body. We got a body, so clear the 1484 // flag. 1485 *flag_ptr &= ~(BC_PARSE_FLAG_BODY); 1486 1487 // If we are inside a function, that means we just barely entered it, and 1488 // we can expect an auto list. 1489 if (*flag_ptr & BC_PARSE_FLAG_FUNC_INNER) { 1490 1491 // We *must* have a brace in this case. 1492 if (BC_ERR(!brace)) bc_parse_err(p, BC_ERR_PARSE_TOKEN); 1493 1494 p->auto_part = (p->l.t != BC_LEX_KW_AUTO); 1495 1496 if (!p->auto_part) { 1497 1498 // Make sure this is true to not get a parse error. 1499 p->auto_part = true; 1500 1501 // Since we already have the auto keyword, parse. 1502 bc_parse_auto(p); 1503 } 1504 1505 // Eat a newline. 1506 if (p->l.t == BC_LEX_NLINE) bc_lex_next(&p->l); 1507 } 1508 else { 1509 1510 // This is the easy part. 1511 size_t len = p->flags.len; 1512 1513 assert(*flag_ptr); 1514 1515 // Parse a statement. 1516 bc_parse_stmt(p); 1517 1518 // This is a very important condition to get right. If there is no 1519 // brace, and no body flag, and the flags len hasn't shrunk, then we 1520 // have a body that was not delimited by braces, so we need to end it 1521 // now, after just one statement. 1522 if (!brace && !BC_PARSE_BODY(p) && len <= p->flags.len) 1523 bc_parse_endBody(p, false); 1524 } 1525 } 1526 1527 /** 1528 * Parses a statement. This is the entry point for just about everything, except 1529 * function definitions. 1530 * @param p The parser. 1531 */ 1532 static void bc_parse_stmt(BcParse *p) { 1533 1534 size_t len; 1535 uint16_t flags; 1536 BcLexType type = p->l.t; 1537 1538 // Eat newline. 1539 if (type == BC_LEX_NLINE) { 1540 bc_lex_next(&p->l); 1541 return; 1542 } 1543 1544 // Eat auto list. 1545 if (type == BC_LEX_KW_AUTO) { 1546 bc_parse_auto(p); 1547 return; 1548 } 1549 1550 // If we reach this point, no auto list is allowed. 1551 p->auto_part = false; 1552 1553 // Everything but an else needs to be taken care of here, but else is 1554 // special. 1555 if (type != BC_LEX_KW_ELSE) { 1556 1557 // After an if, no else found. 1558 if (BC_PARSE_IF_END(p)) { 1559 1560 // Clear the expectation for else, end body, and return. Returning 1561 // gives us a clean slate for parsing again. 1562 bc_parse_noElse(p); 1563 if (p->flags.len > 1 && !BC_PARSE_BRACE(p)) 1564 bc_parse_endBody(p, false); 1565 return; 1566 } 1567 // With a left brace, we are parsing a body. 1568 else if (type == BC_LEX_LBRACE) { 1569 1570 // We need to start a body if we are not expecting one yet. 1571 if (!BC_PARSE_BODY(p)) { 1572 bc_parse_startBody(p, BC_PARSE_FLAG_BRACE); 1573 bc_lex_next(&p->l); 1574 } 1575 // If we *are* expecting a body, that body should get a brace. This 1576 // takes care of braces being on a different line than if and loop 1577 // headers. 1578 else { 1579 *(BC_PARSE_TOP_FLAG_PTR(p)) |= BC_PARSE_FLAG_BRACE; 1580 bc_lex_next(&p->l); 1581 bc_parse_body(p, true); 1582 } 1583 1584 // If we have reached this point, we need to return for a clean 1585 // slate. 1586 return; 1587 } 1588 // This happens when we are expecting a body and get a single statement, 1589 // i.e., a body with no braces surrounding it. Returns after for a clean 1590 // slate. 1591 else if (BC_PARSE_BODY(p) && !BC_PARSE_BRACE(p)) { 1592 bc_parse_body(p, false); 1593 return; 1594 } 1595 } 1596 1597 len = p->flags.len; 1598 flags = BC_PARSE_TOP_FLAG(p); 1599 1600 switch (type) { 1601 1602 // All of these are valid for expressions. 1603 case BC_LEX_OP_INC: 1604 case BC_LEX_OP_DEC: 1605 case BC_LEX_OP_MINUS: 1606 case BC_LEX_OP_BOOL_NOT: 1607 case BC_LEX_LPAREN: 1608 case BC_LEX_NAME: 1609 case BC_LEX_NUMBER: 1610 case BC_LEX_KW_IBASE: 1611 case BC_LEX_KW_LAST: 1612 case BC_LEX_KW_LENGTH: 1613 case BC_LEX_KW_OBASE: 1614 case BC_LEX_KW_SCALE: 1615 #if BC_ENABLE_EXTRA_MATH 1616 case BC_LEX_KW_SEED: 1617 #endif // BC_ENABLE_EXTRA_MATH 1618 case BC_LEX_KW_SQRT: 1619 case BC_LEX_KW_ABS: 1620 #if BC_ENABLE_EXTRA_MATH 1621 case BC_LEX_KW_IRAND: 1622 #endif // BC_ENABLE_EXTRA_MATH 1623 case BC_LEX_KW_ASCIIFY: 1624 case BC_LEX_KW_MODEXP: 1625 case BC_LEX_KW_DIVMOD: 1626 case BC_LEX_KW_READ: 1627 #if BC_ENABLE_EXTRA_MATH 1628 case BC_LEX_KW_RAND: 1629 #endif // BC_ENABLE_EXTRA_MATH 1630 case BC_LEX_KW_MAXIBASE: 1631 case BC_LEX_KW_MAXOBASE: 1632 case BC_LEX_KW_MAXSCALE: 1633 #if BC_ENABLE_EXTRA_MATH 1634 case BC_LEX_KW_MAXRAND: 1635 #endif // BC_ENABLE_EXTRA_MATH 1636 case BC_LEX_KW_LINE_LENGTH: 1637 case BC_LEX_KW_GLOBAL_STACKS: 1638 case BC_LEX_KW_LEADING_ZERO: 1639 { 1640 bc_parse_expr_status(p, BC_PARSE_PRINT, bc_parse_next_expr); 1641 break; 1642 } 1643 1644 case BC_LEX_KW_ELSE: 1645 { 1646 bc_parse_else(p); 1647 break; 1648 } 1649 1650 // Just eat. 1651 case BC_LEX_SCOLON: 1652 { 1653 // Do nothing. 1654 break; 1655 } 1656 1657 case BC_LEX_RBRACE: 1658 { 1659 bc_parse_endBody(p, true); 1660 break; 1661 } 1662 1663 case BC_LEX_STR: 1664 { 1665 bc_parse_str(p, BC_INST_PRINT_STR); 1666 break; 1667 } 1668 1669 case BC_LEX_KW_BREAK: 1670 case BC_LEX_KW_CONTINUE: 1671 { 1672 bc_parse_loopExit(p, p->l.t); 1673 break; 1674 } 1675 1676 case BC_LEX_KW_FOR: 1677 { 1678 bc_parse_for(p); 1679 break; 1680 } 1681 1682 case BC_LEX_KW_HALT: 1683 { 1684 bc_parse_push(p, BC_INST_HALT); 1685 bc_lex_next(&p->l); 1686 break; 1687 } 1688 1689 case BC_LEX_KW_IF: 1690 { 1691 bc_parse_if(p); 1692 break; 1693 } 1694 1695 case BC_LEX_KW_LIMITS: 1696 { 1697 // `limits` is a compile-time command, so execute it right away. 1698 bc_vm_printf("BC_LONG_BIT = %lu\n", (ulong) BC_LONG_BIT); 1699 bc_vm_printf("BC_BASE_DIGS = %lu\n", (ulong) BC_BASE_DIGS); 1700 bc_vm_printf("BC_BASE_POW = %lu\n", (ulong) BC_BASE_POW); 1701 bc_vm_printf("BC_OVERFLOW_MAX = %lu\n", (ulong) BC_NUM_BIGDIG_MAX); 1702 bc_vm_printf("\n"); 1703 bc_vm_printf("BC_BASE_MAX = %lu\n", BC_MAX_OBASE); 1704 bc_vm_printf("BC_DIM_MAX = %lu\n", BC_MAX_DIM); 1705 bc_vm_printf("BC_SCALE_MAX = %lu\n", BC_MAX_SCALE); 1706 bc_vm_printf("BC_STRING_MAX = %lu\n", BC_MAX_STRING); 1707 bc_vm_printf("BC_NAME_MAX = %lu\n", BC_MAX_NAME); 1708 bc_vm_printf("BC_NUM_MAX = %lu\n", BC_MAX_NUM); 1709 #if BC_ENABLE_EXTRA_MATH 1710 bc_vm_printf("BC_RAND_MAX = %lu\n", BC_MAX_RAND); 1711 #endif // BC_ENABLE_EXTRA_MATH 1712 bc_vm_printf("MAX Exponent = %lu\n", BC_MAX_EXP); 1713 bc_vm_printf("Number of vars = %lu\n", BC_MAX_VARS); 1714 1715 bc_lex_next(&p->l); 1716 1717 break; 1718 } 1719 1720 case BC_LEX_KW_STREAM: 1721 case BC_LEX_KW_PRINT: 1722 { 1723 bc_parse_print(p, type); 1724 break; 1725 } 1726 1727 case BC_LEX_KW_QUIT: 1728 { 1729 // Quit is a compile-time command. We don't exit directly, so the vm 1730 // can clean up. 1731 vm.status = BC_STATUS_QUIT; 1732 BC_JMP; 1733 break; 1734 } 1735 1736 case BC_LEX_KW_RETURN: 1737 { 1738 bc_parse_return(p); 1739 break; 1740 } 1741 1742 case BC_LEX_KW_WHILE: 1743 { 1744 bc_parse_while(p); 1745 break; 1746 } 1747 1748 default: 1749 { 1750 bc_parse_err(p, BC_ERR_PARSE_TOKEN); 1751 } 1752 } 1753 1754 // If the flags did not change, we expect a delimiter. 1755 if (len == p->flags.len && flags == BC_PARSE_TOP_FLAG(p)) { 1756 if (BC_ERR(!bc_parse_isDelimiter(p))) 1757 bc_parse_err(p, BC_ERR_PARSE_TOKEN); 1758 } 1759 1760 // Make sure semicolons are eaten. 1761 while (p->l.t == BC_LEX_SCOLON) bc_lex_next(&p->l); 1762 1763 // POSIX's grammar does not allow a function definition after a semicolon 1764 // without a newline, so check specifically for that case and error if 1765 // the POSIX standard flag is set. 1766 if (p->l.last == BC_LEX_SCOLON && p->l.t == BC_LEX_KW_DEFINE && BC_IS_POSIX) 1767 { 1768 bc_parse_err(p, BC_ERR_POSIX_FUNC_AFTER_SEMICOLON); 1769 } 1770 } 1771 1772 void bc_parse_parse(BcParse *p) { 1773 1774 assert(p); 1775 1776 BC_SETJMP_LOCKED(exit); 1777 1778 // We should not let an EOF get here unless some partial parse was not 1779 // completed, in which case, it's the user's fault. 1780 if (BC_ERR(p->l.t == BC_LEX_EOF)) bc_parse_err(p, BC_ERR_PARSE_EOF); 1781 1782 // Functions need special parsing. 1783 else if (p->l.t == BC_LEX_KW_DEFINE) { 1784 if (BC_ERR(BC_PARSE_NO_EXEC(p))) { 1785 bc_parse_endif(p); 1786 if (BC_ERR(BC_PARSE_NO_EXEC(p))) 1787 bc_parse_err(p, BC_ERR_PARSE_TOKEN); 1788 } 1789 bc_parse_func(p); 1790 } 1791 1792 // Otherwise, parse a normal statement. 1793 else bc_parse_stmt(p); 1794 1795 exit: 1796 1797 // We need to reset on error. 1798 if (BC_ERR(((vm.status && vm.status != BC_STATUS_QUIT) || vm.sig))) 1799 bc_parse_reset(p); 1800 1801 BC_LONGJMP_CONT; 1802 BC_SIG_MAYLOCK; 1803 } 1804 1805 /** 1806 * Parse an expression. This is the actual implementation of the Shunting-Yard 1807 * Algorithm. 1808 * @param p The parser. 1809 * @param flags The flags for what is valid in the expression. 1810 * @param next A set of tokens for what is valid *after* the expression. 1811 * @return A parse status. In some places, an empty expression is an 1812 * error, and sometimes, it is required. This allows this function 1813 * to tell the caller if the expression was empty and let the 1814 * caller handle it. 1815 */ 1816 static BcParseStatus bc_parse_expr_err(BcParse *p, uint8_t flags, 1817 BcParseNext next) 1818 { 1819 BcInst prev = BC_INST_PRINT; 1820 uchar inst = BC_INST_INVALID; 1821 BcLexType top, t; 1822 size_t nexprs, ops_bgn; 1823 uint32_t i, nparens, nrelops; 1824 bool pfirst, rprn, done, get_token, assign, bin_last, incdec, can_assign; 1825 1826 // One of these *must* be true. 1827 assert(!(flags & BC_PARSE_PRINT) || !(flags & BC_PARSE_NEEDVAL)); 1828 1829 // These are set very carefully. In fact, controlling the values of these 1830 // locals is the biggest part of making this work. ops_bgn especially is 1831 // important because it marks where the operator stack begins for *this* 1832 // invocation of this function. That's because bc_parse_expr_err() is 1833 // recursive (the Shunting-Yard Algorithm is most easily expressed 1834 // recursively when parsing subexpressions), and each invocation needs to 1835 // know where to stop. 1836 // 1837 // - nparens is the number of left parens without matches. 1838 // - nrelops is the number of relational operators that appear in the expr. 1839 // - nexprs is the number of unused expressions. 1840 // - rprn is a right paren encountered last. 1841 // - done means the expression has been fully parsed. 1842 // - get_token is true when a token is needed at the end of an iteration. 1843 // - assign is true when an assignment statement was parsed last. 1844 // - incdec is true when the previous operator was an inc or dec operator. 1845 // - can_assign is true when an assignemnt is valid. 1846 // - bin_last is true when the previous instruction was a binary operator. 1847 t = p->l.t; 1848 pfirst = (p->l.t == BC_LEX_LPAREN); 1849 nparens = nrelops = 0; 1850 nexprs = 0; 1851 ops_bgn = p->ops.len; 1852 rprn = done = get_token = assign = incdec = can_assign = false; 1853 bin_last = true; 1854 1855 // We want to eat newlines if newlines are not a valid ending token. 1856 // This is for spacing in things like for loop headers. 1857 if (!(flags & BC_PARSE_NOREAD)) { 1858 while ((t = p->l.t) == BC_LEX_NLINE) bc_lex_next(&p->l); 1859 } 1860 1861 // This is the Shunting-Yard algorithm loop. 1862 for (; !done && BC_PARSE_EXPR(t); t = p->l.t) 1863 { 1864 switch (t) { 1865 1866 case BC_LEX_OP_INC: 1867 case BC_LEX_OP_DEC: 1868 { 1869 // These operators can only be used with items that can be 1870 // assigned to. 1871 if (BC_ERR(incdec)) bc_parse_err(p, BC_ERR_PARSE_ASSIGN); 1872 1873 bc_parse_incdec(p, &prev, &can_assign, &nexprs, flags); 1874 1875 rprn = get_token = bin_last = false; 1876 incdec = true; 1877 flags &= ~(BC_PARSE_ARRAY); 1878 1879 break; 1880 } 1881 1882 #if BC_ENABLE_EXTRA_MATH 1883 case BC_LEX_OP_TRUNC: 1884 { 1885 // The previous token must have been a leaf expression, or the 1886 // operator is in the wrong place. 1887 if (BC_ERR(!BC_PARSE_LEAF(prev, bin_last, rprn))) 1888 bc_parse_err(p, BC_ERR_PARSE_TOKEN); 1889 1890 // I can just add the instruction because 1891 // negative will already be taken care of. 1892 bc_parse_push(p, BC_INST_TRUNC); 1893 1894 rprn = can_assign = incdec = false; 1895 get_token = true; 1896 flags &= ~(BC_PARSE_ARRAY); 1897 1898 break; 1899 } 1900 #endif // BC_ENABLE_EXTRA_MATH 1901 1902 case BC_LEX_OP_MINUS: 1903 { 1904 bc_parse_minus(p, &prev, ops_bgn, rprn, bin_last, &nexprs); 1905 1906 rprn = get_token = can_assign = false; 1907 1908 // This is true if it was a binary operator last. 1909 bin_last = (prev == BC_INST_MINUS); 1910 if (bin_last) incdec = false; 1911 1912 flags &= ~(BC_PARSE_ARRAY); 1913 1914 break; 1915 } 1916 1917 // All of this group, including the fallthrough, is to parse binary 1918 // operators. 1919 case BC_LEX_OP_ASSIGN_POWER: 1920 case BC_LEX_OP_ASSIGN_MULTIPLY: 1921 case BC_LEX_OP_ASSIGN_DIVIDE: 1922 case BC_LEX_OP_ASSIGN_MODULUS: 1923 case BC_LEX_OP_ASSIGN_PLUS: 1924 case BC_LEX_OP_ASSIGN_MINUS: 1925 #if BC_ENABLE_EXTRA_MATH 1926 case BC_LEX_OP_ASSIGN_PLACES: 1927 case BC_LEX_OP_ASSIGN_LSHIFT: 1928 case BC_LEX_OP_ASSIGN_RSHIFT: 1929 #endif // BC_ENABLE_EXTRA_MATH 1930 case BC_LEX_OP_ASSIGN: 1931 { 1932 // We need to make sure the assignment is valid. 1933 if (!BC_PARSE_INST_VAR(prev)) 1934 bc_parse_err(p, BC_ERR_PARSE_ASSIGN); 1935 } 1936 // Fallthrough. 1937 BC_FALLTHROUGH 1938 1939 case BC_LEX_OP_POWER: 1940 case BC_LEX_OP_MULTIPLY: 1941 case BC_LEX_OP_DIVIDE: 1942 case BC_LEX_OP_MODULUS: 1943 case BC_LEX_OP_PLUS: 1944 #if BC_ENABLE_EXTRA_MATH 1945 case BC_LEX_OP_PLACES: 1946 case BC_LEX_OP_LSHIFT: 1947 case BC_LEX_OP_RSHIFT: 1948 #endif // BC_ENABLE_EXTRA_MATH 1949 case BC_LEX_OP_REL_EQ: 1950 case BC_LEX_OP_REL_LE: 1951 case BC_LEX_OP_REL_GE: 1952 case BC_LEX_OP_REL_NE: 1953 case BC_LEX_OP_REL_LT: 1954 case BC_LEX_OP_REL_GT: 1955 case BC_LEX_OP_BOOL_NOT: 1956 case BC_LEX_OP_BOOL_OR: 1957 case BC_LEX_OP_BOOL_AND: 1958 { 1959 // This is true if the operator if the token is a prefix 1960 // operator. This is only for boolean not. 1961 if (BC_PARSE_OP_PREFIX(t)) { 1962 1963 // Prefix operators are only allowed after binary operators 1964 // or prefix operators. 1965 if (BC_ERR(!bin_last && !BC_PARSE_OP_PREFIX(p->l.last))) 1966 bc_parse_err(p, BC_ERR_PARSE_EXPR); 1967 } 1968 // If we execute the else, that means we have a binary operator. 1969 // If the previous operator was a prefix or a binary operator, 1970 // then a binary operator is not allowed. 1971 else if (BC_ERR(BC_PARSE_PREV_PREFIX(prev) || bin_last)) 1972 bc_parse_err(p, BC_ERR_PARSE_EXPR); 1973 1974 nrelops += (t >= BC_LEX_OP_REL_EQ && t <= BC_LEX_OP_REL_GT); 1975 prev = BC_PARSE_TOKEN_INST(t); 1976 1977 bc_parse_operator(p, t, ops_bgn, &nexprs); 1978 1979 rprn = incdec = can_assign = false; 1980 get_token = true; 1981 bin_last = !BC_PARSE_OP_PREFIX(t); 1982 flags &= ~(BC_PARSE_ARRAY); 1983 1984 break; 1985 } 1986 1987 case BC_LEX_LPAREN: 1988 { 1989 // A left paren is *not* allowed right after a leaf expr. 1990 if (BC_ERR(BC_PARSE_LEAF(prev, bin_last, rprn))) 1991 bc_parse_err(p, BC_ERR_PARSE_EXPR); 1992 1993 nparens += 1; 1994 rprn = incdec = can_assign = false; 1995 get_token = true; 1996 1997 // Push the paren onto the operator stack. 1998 bc_vec_push(&p->ops, &t); 1999 2000 break; 2001 } 2002 2003 case BC_LEX_RPAREN: 2004 { 2005 // This needs to be a status. The error is handled in 2006 // bc_parse_expr_status(). 2007 if (BC_ERR(p->l.last == BC_LEX_LPAREN)) 2008 return BC_PARSE_STATUS_EMPTY_EXPR; 2009 2010 // The right paren must not come after a prefix or binary 2011 // operator. 2012 if (BC_ERR(bin_last || BC_PARSE_PREV_PREFIX(prev))) 2013 bc_parse_err(p, BC_ERR_PARSE_EXPR); 2014 2015 // If there are no parens left, we are done, but we need another 2016 // token. 2017 if (!nparens) { 2018 done = true; 2019 get_token = false; 2020 break; 2021 } 2022 2023 nparens -= 1; 2024 rprn = true; 2025 get_token = bin_last = incdec = false; 2026 2027 bc_parse_rightParen(p, &nexprs); 2028 2029 break; 2030 } 2031 2032 case BC_LEX_STR: 2033 { 2034 // POSIX only allows strings alone. 2035 if (BC_IS_POSIX) bc_parse_err(p, BC_ERR_POSIX_EXPR_STRING); 2036 2037 // A string is a leaf and cannot come right after a leaf. 2038 if (BC_ERR(BC_PARSE_LEAF(prev, bin_last, rprn))) 2039 bc_parse_err(p, BC_ERR_PARSE_EXPR); 2040 2041 bc_parse_addString(p); 2042 2043 get_token = true; 2044 bin_last = rprn = false; 2045 nexprs += 1; 2046 2047 break; 2048 } 2049 2050 case BC_LEX_NAME: 2051 { 2052 // A name is a leaf and cannot come right after a leaf. 2053 if (BC_ERR(BC_PARSE_LEAF(prev, bin_last, rprn))) 2054 bc_parse_err(p, BC_ERR_PARSE_EXPR); 2055 2056 get_token = bin_last = false; 2057 2058 bc_parse_name(p, &prev, &can_assign, flags & ~BC_PARSE_NOCALL); 2059 2060 rprn = (prev == BC_INST_CALL); 2061 nexprs += 1; 2062 flags &= ~(BC_PARSE_ARRAY); 2063 2064 break; 2065 } 2066 2067 case BC_LEX_NUMBER: 2068 { 2069 // A number is a leaf and cannot come right after a leaf. 2070 if (BC_ERR(BC_PARSE_LEAF(prev, bin_last, rprn))) 2071 bc_parse_err(p, BC_ERR_PARSE_EXPR); 2072 2073 // The number instruction is pushed in here. 2074 bc_parse_number(p); 2075 2076 nexprs += 1; 2077 prev = BC_INST_NUM; 2078 get_token = true; 2079 rprn = bin_last = can_assign = false; 2080 flags &= ~(BC_PARSE_ARRAY); 2081 2082 break; 2083 } 2084 2085 case BC_LEX_KW_IBASE: 2086 case BC_LEX_KW_LAST: 2087 case BC_LEX_KW_OBASE: 2088 #if BC_ENABLE_EXTRA_MATH 2089 case BC_LEX_KW_SEED: 2090 #endif // BC_ENABLE_EXTRA_MATH 2091 { 2092 // All of these are leaves and cannot come right after a leaf. 2093 if (BC_ERR(BC_PARSE_LEAF(prev, bin_last, rprn))) 2094 bc_parse_err(p, BC_ERR_PARSE_EXPR); 2095 2096 prev = t - BC_LEX_KW_LAST + BC_INST_LAST; 2097 bc_parse_push(p, prev); 2098 2099 get_token = can_assign = true; 2100 rprn = bin_last = false; 2101 nexprs += 1; 2102 flags &= ~(BC_PARSE_ARRAY); 2103 2104 break; 2105 } 2106 2107 case BC_LEX_KW_LENGTH: 2108 case BC_LEX_KW_SQRT: 2109 case BC_LEX_KW_ABS: 2110 #if BC_ENABLE_EXTRA_MATH 2111 case BC_LEX_KW_IRAND: 2112 #endif // BC_ENABLE_EXTRA_MATH 2113 case BC_LEX_KW_ASCIIFY: 2114 { 2115 // All of these are leaves and cannot come right after a leaf. 2116 if (BC_ERR(BC_PARSE_LEAF(prev, bin_last, rprn))) 2117 bc_parse_err(p, BC_ERR_PARSE_EXPR); 2118 2119 bc_parse_builtin(p, t, flags, &prev); 2120 2121 rprn = get_token = bin_last = incdec = can_assign = false; 2122 nexprs += 1; 2123 flags &= ~(BC_PARSE_ARRAY); 2124 2125 break; 2126 } 2127 2128 case BC_LEX_KW_READ: 2129 #if BC_ENABLE_EXTRA_MATH 2130 case BC_LEX_KW_RAND: 2131 #endif // BC_ENABLE_EXTRA_MATH 2132 case BC_LEX_KW_MAXIBASE: 2133 case BC_LEX_KW_MAXOBASE: 2134 case BC_LEX_KW_MAXSCALE: 2135 #if BC_ENABLE_EXTRA_MATH 2136 case BC_LEX_KW_MAXRAND: 2137 #endif // BC_ENABLE_EXTRA_MATH 2138 case BC_LEX_KW_LINE_LENGTH: 2139 case BC_LEX_KW_GLOBAL_STACKS: 2140 case BC_LEX_KW_LEADING_ZERO: 2141 { 2142 // All of these are leaves and cannot come right after a leaf. 2143 if (BC_ERR(BC_PARSE_LEAF(prev, bin_last, rprn))) 2144 bc_parse_err(p, BC_ERR_PARSE_EXPR); 2145 2146 // Error if we have read and it's not allowed. 2147 else if (t == BC_LEX_KW_READ && BC_ERR(flags & BC_PARSE_NOREAD)) 2148 bc_parse_err(p, BC_ERR_EXEC_REC_READ); 2149 2150 prev = t - BC_LEX_KW_READ + BC_INST_READ; 2151 bc_parse_noArgBuiltin(p, prev); 2152 2153 rprn = get_token = bin_last = incdec = can_assign = false; 2154 nexprs += 1; 2155 flags &= ~(BC_PARSE_ARRAY); 2156 2157 break; 2158 } 2159 2160 case BC_LEX_KW_SCALE: 2161 { 2162 // This is a leaf and cannot come right after a leaf. 2163 if (BC_ERR(BC_PARSE_LEAF(prev, bin_last, rprn))) 2164 bc_parse_err(p, BC_ERR_PARSE_EXPR); 2165 2166 // Scale needs special work because it can be a variable *or* a 2167 // function. 2168 bc_parse_scale(p, &prev, &can_assign, flags); 2169 2170 rprn = get_token = bin_last = false; 2171 nexprs += 1; 2172 flags &= ~(BC_PARSE_ARRAY); 2173 2174 break; 2175 } 2176 2177 case BC_LEX_KW_MODEXP: 2178 case BC_LEX_KW_DIVMOD: 2179 { 2180 // This is a leaf and cannot come right after a leaf. 2181 if (BC_ERR(BC_PARSE_LEAF(prev, bin_last, rprn))) 2182 bc_parse_err(p, BC_ERR_PARSE_EXPR); 2183 2184 bc_parse_builtin3(p, t, flags, &prev); 2185 2186 rprn = get_token = bin_last = incdec = can_assign = false; 2187 nexprs += 1; 2188 flags &= ~(BC_PARSE_ARRAY); 2189 2190 break; 2191 } 2192 2193 default: 2194 { 2195 #ifndef NDEBUG 2196 // We should never get here, even in debug builds. 2197 bc_parse_err(p, BC_ERR_PARSE_TOKEN); 2198 break; 2199 #endif // NDEBUG 2200 } 2201 } 2202 2203 if (get_token) bc_lex_next(&p->l); 2204 } 2205 2206 // Now that we have parsed the expression, we need to empty the operator 2207 // stack. 2208 while (p->ops.len > ops_bgn) { 2209 2210 top = BC_PARSE_TOP_OP(p); 2211 assign = top >= BC_LEX_OP_ASSIGN_POWER && top <= BC_LEX_OP_ASSIGN; 2212 2213 // There should not be *any* parens on the stack anymore. 2214 if (BC_ERR(top == BC_LEX_LPAREN || top == BC_LEX_RPAREN)) 2215 bc_parse_err(p, BC_ERR_PARSE_EXPR); 2216 2217 bc_parse_push(p, BC_PARSE_TOKEN_INST(top)); 2218 2219 // Adjust the number of unused expressions. 2220 nexprs -= !BC_PARSE_OP_PREFIX(top); 2221 bc_vec_pop(&p->ops); 2222 2223 incdec = false; 2224 } 2225 2226 // There must be only one expression at the top. 2227 if (BC_ERR(nexprs != 1)) bc_parse_err(p, BC_ERR_PARSE_EXPR); 2228 2229 // Check that the next token is correct. 2230 for (i = 0; i < next.len && t != next.tokens[i]; ++i); 2231 if (BC_ERR(i == next.len && !bc_parse_isDelimiter(p))) 2232 bc_parse_err(p, BC_ERR_PARSE_EXPR); 2233 2234 // Check that POSIX would be happy with the number of relational operators. 2235 if (!(flags & BC_PARSE_REL) && nrelops) 2236 bc_parse_err(p, BC_ERR_POSIX_REL_POS); 2237 else if ((flags & BC_PARSE_REL) && nrelops > 1) 2238 bc_parse_err(p, BC_ERR_POSIX_MULTIREL); 2239 2240 // If this is true, then we might be in a situation where we don't print. 2241 // We would want to have the increment/decrement operator not make an extra 2242 // copy if it's not necessary. 2243 if (!(flags & BC_PARSE_NEEDVAL) && !pfirst) { 2244 2245 // We have the easy case if the last operator was an assignment 2246 // operator. 2247 if (assign) { 2248 inst = *((uchar*) bc_vec_top(&p->func->code)); 2249 inst += (BC_INST_ASSIGN_POWER_NO_VAL - BC_INST_ASSIGN_POWER); 2250 incdec = false; 2251 } 2252 // If we have an inc/dec operator and we are *not* printing, implement 2253 // the optimization to get rid of the extra copy. 2254 else if (incdec && !(flags & BC_PARSE_PRINT)) { 2255 inst = *((uchar*) bc_vec_top(&p->func->code)); 2256 incdec = (inst <= BC_INST_DEC); 2257 inst = BC_INST_ASSIGN_PLUS_NO_VAL + (inst != BC_INST_INC && 2258 inst != BC_INST_ASSIGN_PLUS); 2259 } 2260 2261 // This condition allows us to change the previous assignment 2262 // instruction (which does a copy) for a NO_VAL version, which does not. 2263 // This condition is set if either of the above if statements ends up 2264 // being true. 2265 if (inst >= BC_INST_ASSIGN_POWER_NO_VAL && 2266 inst <= BC_INST_ASSIGN_NO_VAL) 2267 { 2268 // Pop the previous assignment instruction and push a new one. 2269 // Inc/dec needs the extra instruction because it is now a binary 2270 // operator and needs a second operand. 2271 bc_vec_pop(&p->func->code); 2272 if (incdec) bc_parse_push(p, BC_INST_ONE); 2273 bc_parse_push(p, inst); 2274 } 2275 } 2276 2277 // If we might have to print... 2278 if ((flags & BC_PARSE_PRINT)) { 2279 2280 // With a paren first or the last operator not being an assignment, we 2281 // *do* want to print. 2282 if (pfirst || !assign) bc_parse_push(p, BC_INST_PRINT); 2283 } 2284 // We need to make sure to push a pop instruction for assignment statements 2285 // that will not print. The print will pop, but without it, we need to pop. 2286 else if (!(flags & BC_PARSE_NEEDVAL) && 2287 (inst < BC_INST_ASSIGN_POWER_NO_VAL || 2288 inst > BC_INST_ASSIGN_NO_VAL)) 2289 { 2290 bc_parse_push(p, BC_INST_POP); 2291 } 2292 2293 // We want to eat newlines if newlines are not a valid ending token. 2294 // This is for spacing in things like for loop headers. 2295 // 2296 // Yes, this is one case where I reuse a variable for a different purpose; 2297 // in this case, incdec being true now means that newlines are not valid. 2298 for (incdec = true, i = 0; i < next.len && incdec; ++i) 2299 incdec = (next.tokens[i] != BC_LEX_NLINE); 2300 if (incdec) { 2301 while (p->l.t == BC_LEX_NLINE) bc_lex_next(&p->l); 2302 } 2303 2304 return BC_PARSE_STATUS_SUCCESS; 2305 } 2306 2307 /** 2308 * Parses an expression with bc_parse_expr_err(), but throws an error if it gets 2309 * an empty expression. 2310 * @param p The parser. 2311 * @param flags The flags for what is valid in the expression. 2312 * @param next A set of tokens for what is valid *after* the expression. 2313 */ 2314 static void bc_parse_expr_status(BcParse *p, uint8_t flags, BcParseNext next) { 2315 2316 BcParseStatus s = bc_parse_expr_err(p, flags, next); 2317 2318 if (BC_ERR(s == BC_PARSE_STATUS_EMPTY_EXPR)) 2319 bc_parse_err(p, BC_ERR_PARSE_EMPTY_EXPR); 2320 } 2321 2322 void bc_parse_expr(BcParse *p, uint8_t flags) { 2323 assert(p); 2324 bc_parse_expr_status(p, flags, bc_parse_next_read); 2325 } 2326 #endif // BC_ENABLED 2327