1 /* 2 * ***************************************************************************** 3 * 4 * SPDX-License-Identifier: BSD-2-Clause 5 * 6 * Copyright (c) 2018-2021 Gavin D. Howard and contributors. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions are met: 10 * 11 * * Redistributions of source code must retain the above copyright notice, this 12 * list of conditions and the following disclaimer. 13 * 14 * * Redistributions in binary form must reproduce the above copyright notice, 15 * this list of conditions and the following disclaimer in the documentation 16 * and/or other materials provided with the distribution. 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 19 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 21 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE 22 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 23 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 24 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 25 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 26 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 27 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 28 * POSSIBILITY OF SUCH DAMAGE. 29 * 30 * ***************************************************************************** 31 * 32 * The parser for bc. 33 * 34 */ 35 36 #if BC_ENABLED 37 38 #include <assert.h> 39 #include <stdbool.h> 40 #include <stdlib.h> 41 #include <string.h> 42 43 #include <setjmp.h> 44 45 #include <bc.h> 46 #include <num.h> 47 #include <vm.h> 48 49 // Before you embark on trying to understand this code, have you read the 50 // Development manual (manuals/development.md) and the comment in include/bc.h 51 // yet? No? Do that first. I'm serious. 52 // 53 // The reason is because this file holds the most sensitive and finicky code in 54 // the entire codebase. Even getting history to work on Windows was nothing 55 // compared to this. This is where dreams go to die, where dragons live, and 56 // from which Ken Thompson himself would flee. 57 58 static void bc_parse_else(BcParse *p); 59 static void bc_parse_stmt(BcParse *p); 60 static BcParseStatus bc_parse_expr_err(BcParse *p, uint8_t flags, 61 BcParseNext next); 62 static void bc_parse_expr_status(BcParse *p, uint8_t flags, BcParseNext next); 63 64 /** 65 * Returns true if an instruction could only have come from a "leaf" expression. 66 * For more on what leaf expressions are, read the comment for BC_PARSE_LEAF(). 67 * @param t The instruction to test. 68 */ 69 static bool bc_parse_inst_isLeaf(BcInst t) { 70 return (t >= BC_INST_NUM && t <= BC_INST_MAXSCALE) || 71 #if BC_ENABLE_EXTRA_MATH 72 t == BC_INST_TRUNC || 73 #endif // BC_ENABLE_EXTRA_MATH 74 t <= BC_INST_DEC; 75 } 76 77 /** 78 * Returns true if the *previous* token was a delimiter. A delimiter is anything 79 * that can legally end a statement. In bc's case, it could be a newline, a 80 * semicolon, and a brace in certain cases. 81 * @param p The parser. 82 */ 83 static bool bc_parse_isDelimiter(const BcParse *p) { 84 85 BcLexType t = p->l.t; 86 bool good; 87 88 // If it's an obvious delimiter, say so. 89 if (BC_PARSE_DELIMITER(t)) return true; 90 91 good = false; 92 93 // If the current token is a keyword, then...beware. That means that we need 94 // to check for a "dangling" else, where there was no brace-delimited block 95 // on the previous if. 96 if (t == BC_LEX_KW_ELSE) { 97 98 size_t i; 99 uint16_t *fptr = NULL, flags = BC_PARSE_FLAG_ELSE; 100 101 // As long as going up the stack is valid for a dangling else, keep on. 102 for (i = 0; i < p->flags.len && BC_PARSE_BLOCK_STMT(flags); ++i) { 103 104 fptr = bc_vec_item_rev(&p->flags, i); 105 flags = *fptr; 106 107 // If we need a brace and don't have one, then we don't have a 108 // delimiter. 109 if ((flags & BC_PARSE_FLAG_BRACE) && p->l.last != BC_LEX_RBRACE) 110 return false; 111 } 112 113 // Oh, and we had also better have an if statement somewhere. 114 good = ((flags & BC_PARSE_FLAG_IF) != 0); 115 } 116 else if (t == BC_LEX_RBRACE) { 117 118 size_t i; 119 120 // Since we have a brace, we need to just check if a brace was needed. 121 for (i = 0; !good && i < p->flags.len; ++i) { 122 uint16_t *fptr = bc_vec_item_rev(&p->flags, i); 123 good = (((*fptr) & BC_PARSE_FLAG_BRACE) != 0); 124 } 125 } 126 127 return good; 128 } 129 130 /** 131 * Sets a previously defined exit label. What are labels? See the bc Parsing 132 * section of the Development manual (manuals/development.md). 133 * @param p The parser. 134 */ 135 static void bc_parse_setLabel(BcParse *p) { 136 137 BcFunc *func = p->func; 138 BcInstPtr *ip = bc_vec_top(&p->exits); 139 size_t *label; 140 141 assert(func == bc_vec_item(&p->prog->fns, p->fidx)); 142 143 // Set the preallocated label to the correct index. 144 label = bc_vec_item(&func->labels, ip->idx); 145 *label = func->code.len; 146 147 // Now, we don't need the exit label; it is done. 148 bc_vec_pop(&p->exits); 149 } 150 151 /** 152 * Creates a label and sets it to idx. If this is an exit label, then idx is 153 * actually invalid, but it doesn't matter because it will be fixed by 154 * bc_parse_setLabel() later. 155 * @param p The parser. 156 * @param idx The index of the label. 157 */ 158 static void bc_parse_createLabel(BcParse *p, size_t idx) { 159 bc_vec_push(&p->func->labels, &idx); 160 } 161 162 /** 163 * Creates a conditional label. Unlike an exit label, this label is set at 164 * creation time because it comes *before* the code that will target it. 165 * @param p The parser. 166 * @param idx The index of the label. 167 */ 168 static void bc_parse_createCondLabel(BcParse *p, size_t idx) { 169 bc_parse_createLabel(p, p->func->code.len); 170 bc_vec_push(&p->conds, &idx); 171 } 172 173 /* 174 * Creates an exit label to be filled in later by bc_parse_setLabel(). Also, why 175 * create a label to be filled in later? Because exit labels are meant to be 176 * targeted by code that comes *before* the label. Since we have to parse that 177 * code first, and don't know how long it will be, we need to just make sure to 178 * reserve a slot to be filled in later when we know. 179 * 180 * By the way, this uses BcInstPtr because it was convenient. The field idx 181 * holds the index, and the field func holds the loop boolean. 182 * 183 * @param p The parser. 184 * @param idx The index of the label's position. 185 * @param loop True if the exit label is for a loop or not. 186 */ 187 static void bc_parse_createExitLabel(BcParse *p, size_t idx, bool loop) { 188 189 BcInstPtr ip; 190 191 assert(p->func == bc_vec_item(&p->prog->fns, p->fidx)); 192 193 ip.func = loop; 194 ip.idx = idx; 195 ip.len = 0; 196 197 bc_vec_push(&p->exits, &ip); 198 bc_parse_createLabel(p, SIZE_MAX); 199 } 200 201 /** 202 * Pops the correct operators off of the operator stack based on the current 203 * operator. This is because of the Shunting-Yard algorithm. Lower prec means 204 * higher precedence. 205 * @param p The parser. 206 * @param type The operator. 207 * @param start The previous start of the operator stack. For more 208 * information, see the bc Parsing section of the Development 209 * manual (manuals/development.md). 210 * @param nexprs A pointer to the current number of expressions that have not 211 * been consumed yet. This is an IN and OUT parameter. 212 */ 213 static void bc_parse_operator(BcParse *p, BcLexType type, 214 size_t start, size_t *nexprs) 215 { 216 BcLexType t; 217 uchar l, r = BC_PARSE_OP_PREC(type); 218 uchar left = BC_PARSE_OP_LEFT(type); 219 220 // While we haven't hit the stop point yet. 221 while (p->ops.len > start) { 222 223 // Get the top operator. 224 t = BC_PARSE_TOP_OP(p); 225 226 // If it's a right paren, we have reached the end of whatever expression 227 // this is no matter what. 228 if (t == BC_LEX_LPAREN) break; 229 230 // Break for precedence. Precedence operates differently on left and 231 // right associativity, by the way. A left associative operator that 232 // matches the current precedence should take priority, but a right 233 // associative operator should not. 234 l = BC_PARSE_OP_PREC(t); 235 if (l >= r && (l != r || !left)) break; 236 237 // Do the housekeeping. In particular, make sure to note that one 238 // expression was consumed. (Two were, but another was added.) 239 bc_parse_push(p, BC_PARSE_TOKEN_INST(t)); 240 bc_vec_pop(&p->ops); 241 *nexprs -= !BC_PARSE_OP_PREFIX(t); 242 } 243 244 bc_vec_push(&p->ops, &type); 245 } 246 247 /** 248 * Parses a right paren. In the Shunting-Yard algorithm, it needs to be put on 249 * the operator stack. But before that, it needs to consume whatever operators 250 * there are until it hits a left paren. 251 * @param p The parser. 252 * @param nexprs A pointer to the current number of expressions that have not 253 * been consumed yet. This is an IN and OUT parameter. 254 */ 255 static void bc_parse_rightParen(BcParse *p, size_t *nexprs) { 256 257 BcLexType top; 258 259 // Consume operators until a left paren. 260 while ((top = BC_PARSE_TOP_OP(p)) != BC_LEX_LPAREN) { 261 bc_parse_push(p, BC_PARSE_TOKEN_INST(top)); 262 bc_vec_pop(&p->ops); 263 *nexprs -= !BC_PARSE_OP_PREFIX(top); 264 } 265 266 // We need to pop the left paren as well. 267 bc_vec_pop(&p->ops); 268 269 // Oh, and we also want the next token. 270 bc_lex_next(&p->l); 271 } 272 273 /** 274 * Parses function arguments. 275 * @param p The parser. 276 * @param flags Flags restricting what kind of expressions the arguments can 277 * be. 278 */ 279 static void bc_parse_args(BcParse *p, uint8_t flags) { 280 281 bool comma = false; 282 size_t nargs; 283 284 bc_lex_next(&p->l); 285 286 // Print and comparison operators not allowed. Well, comparison operators 287 // only for POSIX. But we do allow arrays, and we *must* get a value. 288 flags &= ~(BC_PARSE_PRINT | BC_PARSE_REL); 289 flags |= (BC_PARSE_ARRAY | BC_PARSE_NEEDVAL); 290 291 // Count the arguments and parse them. 292 for (nargs = 0; p->l.t != BC_LEX_RPAREN; ++nargs) { 293 294 bc_parse_expr_status(p, flags, bc_parse_next_arg); 295 296 comma = (p->l.t == BC_LEX_COMMA); 297 if (comma) bc_lex_next(&p->l); 298 } 299 300 // An ending comma is FAIL. 301 if (BC_ERR(comma)) bc_parse_err(p, BC_ERR_PARSE_TOKEN); 302 303 // Now do the call with the number of arguments. 304 bc_parse_push(p, BC_INST_CALL); 305 bc_parse_pushIndex(p, nargs); 306 } 307 308 /** 309 * Parses a function call. 310 * @param p The parser. 311 * @param flags Flags restricting what kind of expressions the arguments can 312 * be. 313 */ 314 static void bc_parse_call(BcParse *p, const char *name, uint8_t flags) { 315 316 size_t idx; 317 318 bc_parse_args(p, flags); 319 320 // We just assert this because bc_parse_args() should 321 // ensure that the next token is what it should be. 322 assert(p->l.t == BC_LEX_RPAREN); 323 324 // We cannot use bc_program_insertFunc() here 325 // because it will overwrite an existing function. 326 idx = bc_map_index(&p->prog->fn_map, name); 327 328 // The function does not exist yet. Create a space for it. If the user does 329 // not define it, it's a *runtime* error, not a parse error. 330 if (idx == BC_VEC_INVALID_IDX) { 331 332 BC_SIG_LOCK; 333 334 idx = bc_program_insertFunc(p->prog, name); 335 336 BC_SIG_UNLOCK; 337 338 assert(idx != BC_VEC_INVALID_IDX); 339 340 // Make sure that this pointer was not invalidated. 341 p->func = bc_vec_item(&p->prog->fns, p->fidx); 342 } 343 // The function exists, so set the right function index. 344 else idx = ((BcId*) bc_vec_item(&p->prog->fn_map, idx))->idx; 345 346 bc_parse_pushIndex(p, idx); 347 348 // Make sure to get the next token. 349 bc_lex_next(&p->l); 350 } 351 352 /** 353 * Parses a name/identifier-based expression. It could be a variable, an array 354 * element, an array itself (for function arguments), a function call, etc. 355 * 356 */ 357 static void bc_parse_name(BcParse *p, BcInst *type, 358 bool *can_assign, uint8_t flags) 359 { 360 char *name; 361 362 BC_SIG_LOCK; 363 364 // We want a copy of the name since the lexer might overwrite its copy. 365 name = bc_vm_strdup(p->l.str.v); 366 367 BC_SETJMP_LOCKED(err); 368 369 BC_SIG_UNLOCK; 370 371 // We need the next token to see if it's just a variable or something more. 372 bc_lex_next(&p->l); 373 374 // Array element or array. 375 if (p->l.t == BC_LEX_LBRACKET) { 376 377 bc_lex_next(&p->l); 378 379 // Array only. This has to be a function parameter. 380 if (p->l.t == BC_LEX_RBRACKET) { 381 382 // Error if arrays are not allowed. 383 if (BC_ERR(!(flags & BC_PARSE_ARRAY))) 384 bc_parse_err(p, BC_ERR_PARSE_EXPR); 385 386 *type = BC_INST_ARRAY; 387 *can_assign = false; 388 } 389 else { 390 391 // If we are here, we have an array element. We need to set the 392 // expression parsing flags. 393 uint8_t flags2 = (flags & ~(BC_PARSE_PRINT | BC_PARSE_REL)) | 394 BC_PARSE_NEEDVAL; 395 396 bc_parse_expr_status(p, flags2, bc_parse_next_elem); 397 398 // The next token *must* be a right bracket. 399 if (BC_ERR(p->l.t != BC_LEX_RBRACKET)) 400 bc_parse_err(p, BC_ERR_PARSE_TOKEN); 401 402 *type = BC_INST_ARRAY_ELEM; 403 *can_assign = true; 404 } 405 406 // Make sure to get the next token. 407 bc_lex_next(&p->l); 408 409 // Push the instruction and the name of the identifier. 410 bc_parse_push(p, *type); 411 bc_parse_pushName(p, name, false); 412 } 413 else if (p->l.t == BC_LEX_LPAREN) { 414 415 // We are parsing a function call; error if not allowed. 416 if (BC_ERR(flags & BC_PARSE_NOCALL)) 417 bc_parse_err(p, BC_ERR_PARSE_TOKEN); 418 419 *type = BC_INST_CALL; 420 *can_assign = false; 421 422 bc_parse_call(p, name, flags); 423 } 424 else { 425 // Just a variable. 426 *type = BC_INST_VAR; 427 *can_assign = true; 428 bc_parse_push(p, BC_INST_VAR); 429 bc_parse_pushName(p, name, true); 430 } 431 432 err: 433 // Need to make sure to unallocate the name. 434 BC_SIG_MAYLOCK; 435 free(name); 436 BC_LONGJMP_CONT; 437 } 438 439 /** 440 * Parses a builtin function that takes no arguments. This includes read(), 441 * rand(), maxibase(), maxobase(), maxscale(), and maxrand(). 442 * @param p The parser. 443 * @param inst The instruction corresponding to the builtin. 444 */ 445 static void bc_parse_noArgBuiltin(BcParse *p, BcInst inst) { 446 447 // Must have a left paren. 448 bc_lex_next(&p->l); 449 if (BC_ERR(p->l.t != BC_LEX_LPAREN)) bc_parse_err(p, BC_ERR_PARSE_TOKEN); 450 451 // Must have a right paren. 452 bc_lex_next(&p->l); 453 if ((p->l.t != BC_LEX_RPAREN)) bc_parse_err(p, BC_ERR_PARSE_TOKEN); 454 455 bc_parse_push(p, inst); 456 457 bc_lex_next(&p->l); 458 } 459 460 /** 461 * Parses a builtin function that takes 1 argument. This includes length(), 462 * sqrt(), abs(), scale(), and irand(). 463 * @param p The parser. 464 * @param type The lex token. 465 * @param flags The expression parsing flags for parsing the argument. 466 * @param prev An out parameter; the previous instruction pointer. 467 */ 468 static void bc_parse_builtin(BcParse *p, BcLexType type, 469 uint8_t flags, BcInst *prev) 470 { 471 // Must have a left paren. 472 bc_lex_next(&p->l); 473 if (BC_ERR(p->l.t != BC_LEX_LPAREN)) 474 bc_parse_err(p, BC_ERR_PARSE_TOKEN); 475 476 bc_lex_next(&p->l); 477 478 // Change the flags as needed for parsing the argument. 479 flags &= ~(BC_PARSE_PRINT | BC_PARSE_REL); 480 flags |= BC_PARSE_NEEDVAL; 481 482 // Since length can take arrays, we need to specially add that flag. 483 if (type == BC_LEX_KW_LENGTH) flags |= BC_PARSE_ARRAY; 484 485 bc_parse_expr_status(p, flags, bc_parse_next_rel); 486 487 // Must have a right paren. 488 if (BC_ERR(p->l.t != BC_LEX_RPAREN)) 489 bc_parse_err(p, BC_ERR_PARSE_TOKEN); 490 491 // Adjust previous based on the token and push it. 492 *prev = type - BC_LEX_KW_LENGTH + BC_INST_LENGTH; 493 bc_parse_push(p, *prev); 494 495 bc_lex_next(&p->l); 496 } 497 498 /** 499 * Parses a builtin function that takes 3 arguments. This includes modexp() and 500 * divmod(). 501 */ 502 static void bc_parse_builtin3(BcParse *p, BcLexType type, 503 uint8_t flags, BcInst *prev) 504 { 505 assert(type == BC_LEX_KW_MODEXP || type == BC_LEX_KW_DIVMOD); 506 507 // Must have a left paren. 508 bc_lex_next(&p->l); 509 if (BC_ERR(p->l.t != BC_LEX_LPAREN)) 510 bc_parse_err(p, BC_ERR_PARSE_TOKEN); 511 512 bc_lex_next(&p->l); 513 514 // Change the flags as needed for parsing the argument. 515 flags &= ~(BC_PARSE_PRINT | BC_PARSE_REL); 516 flags |= BC_PARSE_NEEDVAL; 517 518 bc_parse_expr_status(p, flags, bc_parse_next_builtin); 519 520 // Must have a comma. 521 if (BC_ERR(p->l.t != BC_LEX_COMMA)) 522 bc_parse_err(p, BC_ERR_PARSE_TOKEN); 523 524 bc_lex_next(&p->l); 525 526 bc_parse_expr_status(p, flags, bc_parse_next_builtin); 527 528 // Must have a comma. 529 if (BC_ERR(p->l.t != BC_LEX_COMMA)) 530 bc_parse_err(p, BC_ERR_PARSE_TOKEN); 531 532 bc_lex_next(&p->l); 533 534 // If it is a divmod, parse an array name. Otherwise, just parse another 535 // expression. 536 if (type == BC_LEX_KW_DIVMOD) { 537 538 // Must have a name. 539 if (BC_ERR(p->l.t != BC_LEX_NAME)) bc_parse_err(p, BC_ERR_PARSE_TOKEN); 540 541 // This is safe because the next token should not overwrite the name. 542 bc_lex_next(&p->l); 543 544 // Must have a left bracket. 545 if (BC_ERR(p->l.t != BC_LEX_LBRACKET)) 546 bc_parse_err(p, BC_ERR_PARSE_TOKEN); 547 548 // This is safe because the next token should not overwrite the name. 549 bc_lex_next(&p->l); 550 551 // Must have a right bracket. 552 if (BC_ERR(p->l.t != BC_LEX_RBRACKET)) 553 bc_parse_err(p, BC_ERR_PARSE_TOKEN); 554 555 // This is safe because the next token should not overwrite the name. 556 bc_lex_next(&p->l); 557 } 558 else bc_parse_expr_status(p, flags, bc_parse_next_rel); 559 560 // Must have a right paren. 561 if (BC_ERR(p->l.t != BC_LEX_RPAREN)) 562 bc_parse_err(p, BC_ERR_PARSE_TOKEN); 563 564 // Adjust previous based on the token and push it. 565 *prev = type - BC_LEX_KW_MODEXP + BC_INST_MODEXP; 566 bc_parse_push(p, *prev); 567 568 // If we have divmod, we need to assign the modulus to the array element, so 569 // we need to push the instructions for doing so. 570 if (type == BC_LEX_KW_DIVMOD) { 571 572 // The zeroth element. 573 bc_parse_push(p, BC_INST_ZERO); 574 bc_parse_push(p, BC_INST_ARRAY_ELEM); 575 576 // Push the array. 577 bc_parse_pushName(p, p->l.str.v, false); 578 579 // Swap them and assign. After this, the top item on the stack should 580 // be the quotient. 581 bc_parse_push(p, BC_INST_SWAP); 582 bc_parse_push(p, BC_INST_ASSIGN_NO_VAL); 583 } 584 585 bc_lex_next(&p->l); 586 } 587 588 /** 589 * Parses the scale keyword. This is special because scale can be a value or a 590 * builtin function. 591 * @param p The parser. 592 * @param type An out parameter; the instruction for the parse. 593 * @param can_assign An out parameter; whether the expression can be assigned 594 * to. 595 * @param flags The expression parsing flags for parsing a scale() arg. 596 */ 597 static void bc_parse_scale(BcParse *p, BcInst *type, 598 bool *can_assign, uint8_t flags) 599 { 600 bc_lex_next(&p->l); 601 602 // Without the left paren, it's just the keyword. 603 if (p->l.t != BC_LEX_LPAREN) { 604 605 // Set, push, and return. 606 *type = BC_INST_SCALE; 607 *can_assign = true; 608 bc_parse_push(p, BC_INST_SCALE); 609 return; 610 } 611 612 // Handle the scale function. 613 *type = BC_INST_SCALE_FUNC; 614 *can_assign = false; 615 616 // Once again, adjust the flags. 617 flags &= ~(BC_PARSE_PRINT | BC_PARSE_REL); 618 flags |= BC_PARSE_NEEDVAL; 619 620 bc_lex_next(&p->l); 621 622 bc_parse_expr_status(p, flags, bc_parse_next_rel); 623 624 // Must have a right paren. 625 if (BC_ERR(p->l.t != BC_LEX_RPAREN)) 626 bc_parse_err(p, BC_ERR_PARSE_TOKEN); 627 628 bc_parse_push(p, BC_INST_SCALE_FUNC); 629 630 bc_lex_next(&p->l); 631 } 632 633 /** 634 * Parses and increment or decrement operator. This is a bit complex. 635 * @param p The parser. 636 * @param prev An out parameter; the previous instruction pointer. 637 * @param can_assign An out parameter; whether the expression can be assigned 638 * to. 639 * @param nexs An in/out parameter; the number of expressions in the 640 * parse tree that are not used. 641 * @param flags The expression parsing flags for parsing a scale() arg. 642 */ 643 static void bc_parse_incdec(BcParse *p, BcInst *prev, bool *can_assign, 644 size_t *nexs, uint8_t flags) 645 { 646 BcLexType type; 647 uchar inst; 648 BcInst etype = *prev; 649 BcLexType last = p->l.last; 650 651 assert(prev != NULL && can_assign != NULL); 652 653 // If we can't assign to the previous token, then we have an error. 654 if (BC_ERR(last == BC_LEX_OP_INC || last == BC_LEX_OP_DEC || 655 last == BC_LEX_RPAREN)) 656 { 657 bc_parse_err(p, BC_ERR_PARSE_ASSIGN); 658 } 659 660 // Is the previous instruction for a variable? 661 if (BC_PARSE_INST_VAR(etype)) { 662 663 // If so, this is a postfix operator. 664 if (!*can_assign) bc_parse_err(p, BC_ERR_PARSE_ASSIGN); 665 666 // Only postfix uses BC_INST_INC and BC_INST_DEC. 667 *prev = inst = BC_INST_INC + (p->l.t != BC_LEX_OP_INC); 668 bc_parse_push(p, inst); 669 bc_lex_next(&p->l); 670 *can_assign = false; 671 } 672 else { 673 674 // This is a prefix operator. In that case, we just convert it to 675 // an assignment instruction. 676 *prev = inst = BC_INST_ASSIGN_PLUS + (p->l.t != BC_LEX_OP_INC); 677 678 bc_lex_next(&p->l); 679 type = p->l.t; 680 681 // Because we parse the next part of the expression 682 // right here, we need to increment this. 683 *nexs = *nexs + 1; 684 685 // Is the next token a normal identifier? 686 if (type == BC_LEX_NAME) { 687 688 // Parse the name. 689 uint8_t flags2 = flags & ~BC_PARSE_ARRAY; 690 bc_parse_name(p, prev, can_assign, flags2 | BC_PARSE_NOCALL); 691 } 692 // Is the next token a global? 693 else if (type >= BC_LEX_KW_LAST && type <= BC_LEX_KW_OBASE) { 694 bc_parse_push(p, type - BC_LEX_KW_LAST + BC_INST_LAST); 695 bc_lex_next(&p->l); 696 } 697 // Is the next token specifically scale, which needs special treatment? 698 else if (BC_NO_ERR(type == BC_LEX_KW_SCALE)) { 699 700 bc_lex_next(&p->l); 701 702 // Check that scale() was not used. 703 if (BC_ERR(p->l.t == BC_LEX_LPAREN)) 704 bc_parse_err(p, BC_ERR_PARSE_TOKEN); 705 else bc_parse_push(p, BC_INST_SCALE); 706 } 707 // Now we know we have an error. 708 else bc_parse_err(p, BC_ERR_PARSE_TOKEN); 709 710 *can_assign = false; 711 712 bc_parse_push(p, BC_INST_ONE); 713 bc_parse_push(p, inst); 714 } 715 } 716 717 /** 718 * Parses the minus operator. This needs special treatment because it is either 719 * subtract or negation. 720 * @param p The parser. 721 * @param prev An in/out parameter; the previous instruction. 722 * @param ops_bgn The size of the operator stack. 723 * @param rparen True if the last token was a right paren. 724 * @param binlast True if the last token was a binary operator. 725 * @param nexprs An in/out parameter; the number of unused expressions. 726 */ 727 static void bc_parse_minus(BcParse *p, BcInst *prev, size_t ops_bgn, 728 bool rparen, bool binlast, size_t *nexprs) 729 { 730 BcLexType type; 731 732 bc_lex_next(&p->l); 733 734 // Figure out if it's a minus or a negation. 735 type = BC_PARSE_LEAF(*prev, binlast, rparen) ? BC_LEX_OP_MINUS : BC_LEX_NEG; 736 *prev = BC_PARSE_TOKEN_INST(type); 737 738 // We can just push onto the op stack because this is the largest 739 // precedence operator that gets pushed. Inc/dec does not. 740 if (type != BC_LEX_OP_MINUS) bc_vec_push(&p->ops, &type); 741 else bc_parse_operator(p, type, ops_bgn, nexprs); 742 } 743 744 /** 745 * Parses a string. 746 * @param p The parser. 747 * @param inst The instruction corresponding to how the string was found and 748 * how it should be printed. 749 */ 750 static void bc_parse_str(BcParse *p, BcInst inst) { 751 bc_parse_addString(p); 752 bc_parse_push(p, inst); 753 bc_lex_next(&p->l); 754 } 755 756 /** 757 * Parses a print statement. 758 * @param p The parser. 759 */ 760 static void bc_parse_print(BcParse *p, BcLexType type) { 761 762 BcLexType t; 763 bool comma = false; 764 BcInst inst = type == BC_LEX_KW_STREAM ? 765 BC_INST_PRINT_STREAM : BC_INST_PRINT_POP; 766 767 bc_lex_next(&p->l); 768 769 t = p->l.t; 770 771 // A print or stream statement has to have *something*. 772 if (bc_parse_isDelimiter(p)) bc_parse_err(p, BC_ERR_PARSE_PRINT); 773 774 do { 775 776 // If the token is a string, then print it with escapes. 777 // BC_INST_PRINT_POP plays that role for bc. 778 if (t == BC_LEX_STR) bc_parse_str(p, inst); 779 else { 780 // We have an actual number; parse and add a print instruction. 781 bc_parse_expr_status(p, BC_PARSE_NEEDVAL, bc_parse_next_print); 782 bc_parse_push(p, inst); 783 } 784 785 // Is the next token a comma? 786 comma = (p->l.t == BC_LEX_COMMA); 787 788 // Get the next token if we have a comma. 789 if (comma) bc_lex_next(&p->l); 790 else { 791 792 // If we don't have a comma, the statement needs to end. 793 if (!bc_parse_isDelimiter(p)) 794 bc_parse_err(p, BC_ERR_PARSE_TOKEN); 795 else break; 796 } 797 798 t = p->l.t; 799 800 } while (true); 801 802 // If we have a comma but no token, that's bad. 803 if (BC_ERR(comma)) bc_parse_err(p, BC_ERR_PARSE_TOKEN); 804 } 805 806 /** 807 * Parses a return statement. 808 * @param p The parser. 809 */ 810 static void bc_parse_return(BcParse *p) { 811 812 BcLexType t; 813 bool paren; 814 uchar inst = BC_INST_RET0; 815 816 // If we are not in a function, that's an error. 817 if (BC_ERR(!BC_PARSE_FUNC(p))) bc_parse_err(p, BC_ERR_PARSE_TOKEN); 818 819 // If we are in a void function, make sure to return void. 820 if (p->func->voidfn) inst = BC_INST_RET_VOID; 821 822 bc_lex_next(&p->l); 823 824 t = p->l.t; 825 paren = (t == BC_LEX_LPAREN); 826 827 // An empty return statement just needs to push the selected instruction. 828 if (bc_parse_isDelimiter(p)) bc_parse_push(p, inst); 829 else { 830 831 BcParseStatus s; 832 833 // Need to parse the expression whose value will be returned. 834 s = bc_parse_expr_err(p, BC_PARSE_NEEDVAL, bc_parse_next_expr); 835 836 // If the expression was empty, just push the selected instruction. 837 if (s == BC_PARSE_STATUS_EMPTY_EXPR) { 838 bc_parse_push(p, inst); 839 bc_lex_next(&p->l); 840 } 841 842 // POSIX requires parentheses. 843 if (!paren || p->l.last != BC_LEX_RPAREN) { 844 bc_parse_err(p, BC_ERR_POSIX_RET); 845 } 846 847 // Void functions require an empty expression. 848 if (BC_ERR(p->func->voidfn)) { 849 if (s != BC_PARSE_STATUS_EMPTY_EXPR) 850 bc_parse_verr(p, BC_ERR_PARSE_RET_VOID, p->func->name); 851 } 852 // If we got here, we want to be sure to end the function with a real 853 // return instruction, just in case. 854 else bc_parse_push(p, BC_INST_RET); 855 } 856 } 857 858 /** 859 * Clears flags that indicate the end of an if statement and its block and sets 860 * the jump location. 861 * @param p The parser. 862 */ 863 static void bc_parse_noElse(BcParse *p) { 864 uint16_t *flag_ptr = BC_PARSE_TOP_FLAG_PTR(p); 865 *flag_ptr = (*flag_ptr & ~(BC_PARSE_FLAG_IF_END)); 866 bc_parse_setLabel(p); 867 } 868 869 /** 870 * Ends (finishes parsing) the body of a control statement or a function. 871 * @param p The parser. 872 * @param brace True if the body was ended by a brace, false otherwise. 873 */ 874 static void bc_parse_endBody(BcParse *p, bool brace) { 875 876 bool has_brace, new_else = false; 877 878 // We cannot be ending a body if there are no bodies to end. 879 if (BC_ERR(p->flags.len <= 1)) bc_parse_err(p, BC_ERR_PARSE_TOKEN); 880 881 if (brace) { 882 883 // The brace was already gotten; make sure that the caller did not lie. 884 // We check for the requirement of braces later. 885 assert(p->l.t == BC_LEX_RBRACE); 886 887 bc_lex_next(&p->l); 888 889 // If the next token is not a delimiter, that is a problem. 890 if (BC_ERR(!bc_parse_isDelimiter(p))) 891 bc_parse_err(p, BC_ERR_PARSE_TOKEN); 892 } 893 894 // Do we have a brace flag? 895 has_brace = (BC_PARSE_BRACE(p) != 0); 896 897 do { 898 size_t len = p->flags.len; 899 bool loop; 900 901 // If we have a brace flag but not a brace, that's a problem. 902 if (has_brace && !brace) bc_parse_err(p, BC_ERR_PARSE_TOKEN); 903 904 // Are we inside a loop? 905 loop = (BC_PARSE_LOOP_INNER(p) != 0); 906 907 // If we are ending a loop or an else... 908 if (loop || BC_PARSE_ELSE(p)) { 909 910 // Loops have condition labels that we have to take care of as well. 911 if (loop) { 912 913 size_t *label = bc_vec_top(&p->conds); 914 915 bc_parse_push(p, BC_INST_JUMP); 916 bc_parse_pushIndex(p, *label); 917 918 bc_vec_pop(&p->conds); 919 } 920 921 bc_parse_setLabel(p); 922 bc_vec_pop(&p->flags); 923 } 924 // If we are ending a function... 925 else if (BC_PARSE_FUNC_INNER(p)) { 926 BcInst inst = (p->func->voidfn ? BC_INST_RET_VOID : BC_INST_RET0); 927 bc_parse_push(p, inst); 928 bc_parse_updateFunc(p, BC_PROG_MAIN); 929 bc_vec_pop(&p->flags); 930 } 931 // If we have a brace flag and not an if statement, we can pop the top 932 // of the flags stack because they have been taken care of above. 933 else if (has_brace && !BC_PARSE_IF(p)) bc_vec_pop(&p->flags); 934 935 // This needs to be last to parse nested if's properly. 936 if (BC_PARSE_IF(p) && (len == p->flags.len || !BC_PARSE_BRACE(p))) { 937 938 // Eat newlines. 939 while (p->l.t == BC_LEX_NLINE) bc_lex_next(&p->l); 940 941 // *Now* we can pop the flags. 942 bc_vec_pop(&p->flags); 943 944 // If we are allowed non-POSIX stuff... 945 if (!BC_S) { 946 947 // Have we found yet another dangling else? 948 *(BC_PARSE_TOP_FLAG_PTR(p)) |= BC_PARSE_FLAG_IF_END; 949 new_else = (p->l.t == BC_LEX_KW_ELSE); 950 951 // Parse the else or end the if statement body. 952 if (new_else) bc_parse_else(p); 953 else if (!has_brace && (!BC_PARSE_IF_END(p) || brace)) 954 bc_parse_noElse(p); 955 } 956 // POSIX requires us to do the bare minimum only. 957 else bc_parse_noElse(p); 958 } 959 960 // If these are both true, we have "used" the braces that we found. 961 if (brace && has_brace) brace = false; 962 963 // This condition was perhaps the hardest single part of the parser. If the 964 // flags stack does not have enough, we should stop. If we have a new else 965 // statement, we should stop. If we do have the end of an if statement and 966 // we have eaten the brace, we should stop. If we do have a brace flag, we 967 // should stop. 968 } while (p->flags.len > 1 && !new_else && (!BC_PARSE_IF_END(p) || brace) && 969 !(has_brace = (BC_PARSE_BRACE(p) != 0))); 970 971 // If we have a brace, yet no body for it, that's a problem. 972 if (BC_ERR(p->flags.len == 1 && brace)) 973 bc_parse_err(p, BC_ERR_PARSE_TOKEN); 974 else if (brace && BC_PARSE_BRACE(p)) { 975 976 // If we make it here, we have a brace and a flag for it. 977 uint16_t flags = BC_PARSE_TOP_FLAG(p); 978 979 // This condition ensure that the *last* body is correctly finished by 980 // popping its flags. 981 if (!(flags & (BC_PARSE_FLAG_FUNC_INNER | BC_PARSE_FLAG_LOOP_INNER)) && 982 !(flags & (BC_PARSE_FLAG_IF | BC_PARSE_FLAG_ELSE)) && 983 !(flags & (BC_PARSE_FLAG_IF_END))) 984 { 985 bc_vec_pop(&p->flags); 986 } 987 } 988 } 989 990 /** 991 * Starts the body of a control statement or function. 992 * @param p The parser. 993 * @param flags The current flags (will be edited). 994 */ 995 static void bc_parse_startBody(BcParse *p, uint16_t flags) { 996 assert(flags); 997 flags |= (BC_PARSE_TOP_FLAG(p) & (BC_PARSE_FLAG_FUNC | BC_PARSE_FLAG_LOOP)); 998 flags |= BC_PARSE_FLAG_BODY; 999 bc_vec_push(&p->flags, &flags); 1000 } 1001 1002 void bc_parse_endif(BcParse *p) { 1003 1004 size_t i; 1005 bool good; 1006 1007 // Not a problem if this is true. 1008 if (BC_NO_ERR(!BC_PARSE_NO_EXEC(p))) return; 1009 1010 good = true; 1011 1012 // Find an instance of a body that needs closing, i.e., a statement that did 1013 // not have a right brace when it should have. 1014 for (i = 0; good && i < p->flags.len; ++i) { 1015 uint16_t flag = *((uint16_t*) bc_vec_item(&p->flags, i)); 1016 good = ((flag & BC_PARSE_FLAG_BRACE) != BC_PARSE_FLAG_BRACE); 1017 } 1018 1019 // If we did not find such an instance... 1020 if (good) { 1021 1022 // We set this to restore it later. We don't want the parser thinking 1023 // that we are on stdin for this one because it will want more. 1024 bool is_stdin = vm.is_stdin; 1025 1026 vm.is_stdin = false; 1027 1028 // End all of the if statements and loops. 1029 while (p->flags.len > 1 || BC_PARSE_IF_END(p)) { 1030 if (BC_PARSE_IF_END(p)) bc_parse_noElse(p); 1031 if (p->flags.len > 1) bc_parse_endBody(p, false); 1032 } 1033 1034 vm.is_stdin = is_stdin; 1035 } 1036 // If we reach here, a block was not properly closed, and we should error. 1037 else bc_parse_err(&vm.prs, BC_ERR_PARSE_BLOCK); 1038 } 1039 1040 /** 1041 * Parses an if statement. 1042 * @param p The parser. 1043 */ 1044 static void bc_parse_if(BcParse *p) { 1045 1046 // We are allowed relational operators, and we must have a value. 1047 size_t idx; 1048 uint8_t flags = (BC_PARSE_REL | BC_PARSE_NEEDVAL); 1049 1050 // Get the left paren and barf if necessary. 1051 bc_lex_next(&p->l); 1052 if (BC_ERR(p->l.t != BC_LEX_LPAREN)) 1053 bc_parse_err(p, BC_ERR_PARSE_TOKEN); 1054 1055 // Parse the condition. 1056 bc_lex_next(&p->l); 1057 bc_parse_expr_status(p, flags, bc_parse_next_rel); 1058 1059 // Must have a right paren. 1060 if (BC_ERR(p->l.t != BC_LEX_RPAREN)) 1061 bc_parse_err(p, BC_ERR_PARSE_TOKEN); 1062 1063 bc_lex_next(&p->l); 1064 1065 // Insert the conditional jump instruction. 1066 bc_parse_push(p, BC_INST_JUMP_ZERO); 1067 1068 idx = p->func->labels.len; 1069 1070 // Push the index for the instruction and create an exit label for an else 1071 // statement. 1072 bc_parse_pushIndex(p, idx); 1073 bc_parse_createExitLabel(p, idx, false); 1074 1075 bc_parse_startBody(p, BC_PARSE_FLAG_IF); 1076 } 1077 1078 /** 1079 * Parses an else statement. 1080 * @param p The parser. 1081 */ 1082 static void bc_parse_else(BcParse *p) { 1083 1084 size_t idx = p->func->labels.len; 1085 1086 // We must be at the end of an if statement. 1087 if (BC_ERR(!BC_PARSE_IF_END(p))) 1088 bc_parse_err(p, BC_ERR_PARSE_TOKEN); 1089 1090 // Push an unconditional jump to make bc jump over the else statement if it 1091 // executed the original if statement. 1092 bc_parse_push(p, BC_INST_JUMP); 1093 bc_parse_pushIndex(p, idx); 1094 1095 // Clear the else stuff. Yes, that function is misnamed for its use here, 1096 // but deal with it. 1097 bc_parse_noElse(p); 1098 1099 // Create the exit label and parse the body. 1100 bc_parse_createExitLabel(p, idx, false); 1101 bc_parse_startBody(p, BC_PARSE_FLAG_ELSE); 1102 1103 bc_lex_next(&p->l); 1104 } 1105 1106 /** 1107 * Parse a while loop. 1108 * @param p The parser. 1109 */ 1110 static void bc_parse_while(BcParse *p) { 1111 1112 // We are allowed relational operators, and we must have a value. 1113 size_t idx; 1114 uint8_t flags = (BC_PARSE_REL | BC_PARSE_NEEDVAL); 1115 1116 // Get the left paren and barf if necessary. 1117 bc_lex_next(&p->l); 1118 if (BC_ERR(p->l.t != BC_LEX_LPAREN)) 1119 bc_parse_err(p, BC_ERR_PARSE_TOKEN); 1120 bc_lex_next(&p->l); 1121 1122 // Create the labels. Loops need both. 1123 bc_parse_createCondLabel(p, p->func->labels.len); 1124 idx = p->func->labels.len; 1125 bc_parse_createExitLabel(p, idx, true); 1126 1127 // Parse the actual condition and barf on non-right paren. 1128 bc_parse_expr_status(p, flags, bc_parse_next_rel); 1129 if (BC_ERR(p->l.t != BC_LEX_RPAREN)) 1130 bc_parse_err(p, BC_ERR_PARSE_TOKEN); 1131 bc_lex_next(&p->l); 1132 1133 // Now we can push the conditional jump and start the body. 1134 bc_parse_push(p, BC_INST_JUMP_ZERO); 1135 bc_parse_pushIndex(p, idx); 1136 bc_parse_startBody(p, BC_PARSE_FLAG_LOOP | BC_PARSE_FLAG_LOOP_INNER); 1137 } 1138 1139 /** 1140 * Parse a for loop. 1141 * @param p The parser. 1142 */ 1143 static void bc_parse_for(BcParse *p) { 1144 1145 size_t cond_idx, exit_idx, body_idx, update_idx; 1146 1147 // Barf on the missing left paren. 1148 bc_lex_next(&p->l); 1149 if (BC_ERR(p->l.t != BC_LEX_LPAREN)) 1150 bc_parse_err(p, BC_ERR_PARSE_TOKEN); 1151 bc_lex_next(&p->l); 1152 1153 // The first statement can be empty, but if it is, check for error in POSIX 1154 // mode. Otherwise, parse it. 1155 if (p->l.t != BC_LEX_SCOLON) 1156 bc_parse_expr_status(p, 0, bc_parse_next_for); 1157 else bc_parse_err(p, BC_ERR_POSIX_FOR); 1158 1159 // Must have a semicolon. 1160 if (BC_ERR(p->l.t != BC_LEX_SCOLON)) bc_parse_err(p, BC_ERR_PARSE_TOKEN); 1161 bc_lex_next(&p->l); 1162 1163 // These are indices for labels. There are so many of them because the end 1164 // of the loop must unconditionally jump to the update code. Then the update 1165 // code must unconditionally jump to the condition code. Then the condition 1166 // code must *conditionally* jump to the exit. 1167 cond_idx = p->func->labels.len; 1168 update_idx = cond_idx + 1; 1169 body_idx = update_idx + 1; 1170 exit_idx = body_idx + 1; 1171 1172 // This creates the condition label. 1173 bc_parse_createLabel(p, p->func->code.len); 1174 1175 // Parse an expression if it exists. 1176 if (p->l.t != BC_LEX_SCOLON) { 1177 uint8_t flags = (BC_PARSE_REL | BC_PARSE_NEEDVAL); 1178 bc_parse_expr_status(p, flags, bc_parse_next_for); 1179 } 1180 else { 1181 1182 // Set this for the next call to bc_parse_number because an empty 1183 // condition means that it is an infinite loop, so the condition must be 1184 // non-zero. This is safe to set because the current token is a 1185 // semicolon, which has no string requirement. 1186 bc_vec_string(&p->l.str, sizeof(bc_parse_one) - 1, bc_parse_one); 1187 bc_parse_number(p); 1188 1189 // An empty condition makes POSIX mad. 1190 bc_parse_err(p, BC_ERR_POSIX_FOR); 1191 } 1192 1193 // Must have a semicolon. 1194 if (BC_ERR(p->l.t != BC_LEX_SCOLON)) 1195 bc_parse_err(p, BC_ERR_PARSE_TOKEN); 1196 bc_lex_next(&p->l); 1197 1198 // Now we can set up the conditional jump to the exit and an unconditional 1199 // jump to the body right after. The unconditional jump to the body is 1200 // because there is update code coming right after the condition, so we need 1201 // to skip it to get to the body. 1202 bc_parse_push(p, BC_INST_JUMP_ZERO); 1203 bc_parse_pushIndex(p, exit_idx); 1204 bc_parse_push(p, BC_INST_JUMP); 1205 bc_parse_pushIndex(p, body_idx); 1206 1207 // Now create the label for the update code. 1208 bc_parse_createCondLabel(p, update_idx); 1209 1210 // Parse if not empty, and if it is, let POSIX yell if necessary. 1211 if (p->l.t != BC_LEX_RPAREN) 1212 bc_parse_expr_status(p, 0, bc_parse_next_rel); 1213 else bc_parse_err(p, BC_ERR_POSIX_FOR); 1214 1215 // Must have a right paren. 1216 if (BC_ERR(p->l.t != BC_LEX_RPAREN)) 1217 bc_parse_err(p, BC_ERR_PARSE_TOKEN); 1218 1219 // Set up a jump to the condition right after the update code. 1220 bc_parse_push(p, BC_INST_JUMP); 1221 bc_parse_pushIndex(p, cond_idx); 1222 bc_parse_createLabel(p, p->func->code.len); 1223 1224 // Create an exit label for the body and start the body. 1225 bc_parse_createExitLabel(p, exit_idx, true); 1226 bc_lex_next(&p->l); 1227 bc_parse_startBody(p, BC_PARSE_FLAG_LOOP | BC_PARSE_FLAG_LOOP_INNER); 1228 } 1229 1230 /** 1231 * Parse a statement or token that indicates a loop exit. This includes an 1232 * actual loop exit, the break keyword, or the continue keyword. 1233 * @param p The parser. 1234 * @param type The type of exit. 1235 */ 1236 static void bc_parse_loopExit(BcParse *p, BcLexType type) { 1237 1238 size_t i; 1239 BcInstPtr *ip; 1240 1241 // Must have a loop. If we don't, that's an error. 1242 if (BC_ERR(!BC_PARSE_LOOP(p))) bc_parse_err(p, BC_ERR_PARSE_TOKEN); 1243 1244 // If we have a break statement... 1245 if (type == BC_LEX_KW_BREAK) { 1246 1247 // If there are no exits, something went wrong somewhere. 1248 if (BC_ERR(!p->exits.len)) bc_parse_err(p, BC_ERR_PARSE_TOKEN); 1249 1250 // Get the exit. 1251 i = p->exits.len - 1; 1252 ip = bc_vec_item(&p->exits, i); 1253 1254 // The condition !ip->func is true if the exit is not for a loop, so we 1255 // need to find the first actual loop exit. 1256 while (!ip->func && i < p->exits.len) ip = bc_vec_item(&p->exits, i--); 1257 1258 // Make sure everything is hunky dory. 1259 assert(ip != NULL && (i < p->exits.len || ip->func)); 1260 1261 // Set the index for the exit. 1262 i = ip->idx; 1263 } 1264 // If we have a continue statement or just the loop end, jump to the 1265 // condition (or update for a foor loop). 1266 else i = *((size_t*) bc_vec_top(&p->conds)); 1267 1268 // Add the unconditional jump. 1269 bc_parse_push(p, BC_INST_JUMP); 1270 bc_parse_pushIndex(p, i); 1271 1272 bc_lex_next(&p->l); 1273 } 1274 1275 /** 1276 * Parse a function (header). 1277 * @param p The parser. 1278 */ 1279 static void bc_parse_func(BcParse *p) { 1280 1281 bool comma = false, voidfn; 1282 uint16_t flags; 1283 size_t idx; 1284 1285 bc_lex_next(&p->l); 1286 1287 // Must have a name. 1288 if (BC_ERR(p->l.t != BC_LEX_NAME)) bc_parse_err(p, BC_ERR_PARSE_FUNC); 1289 1290 // If the name is "void", and POSIX is not on, mark as void. 1291 voidfn = (!BC_IS_POSIX && p->l.t == BC_LEX_NAME && 1292 !strcmp(p->l.str.v, "void")); 1293 1294 // We can safely do this because the expected token should not overwrite the 1295 // function name. 1296 bc_lex_next(&p->l); 1297 1298 // If we *don't* have another name, then void is the name of the function. 1299 voidfn = (voidfn && p->l.t == BC_LEX_NAME); 1300 1301 // With a void function, allow POSIX to complain and get a new token. 1302 if (voidfn) { 1303 1304 bc_parse_err(p, BC_ERR_POSIX_VOID); 1305 1306 // We can safely do this because the expected token should not overwrite 1307 // the function name. 1308 bc_lex_next(&p->l); 1309 } 1310 1311 // Must have a left paren. 1312 if (BC_ERR(p->l.t != BC_LEX_LPAREN)) 1313 bc_parse_err(p, BC_ERR_PARSE_FUNC); 1314 1315 // Make sure the functions map and vector are synchronized. 1316 assert(p->prog->fns.len == p->prog->fn_map.len); 1317 1318 // Must lock signals because vectors are changed, and the vector functions 1319 // expect signals to be locked. 1320 BC_SIG_LOCK; 1321 1322 // Insert the function by name into the map and vector. 1323 idx = bc_program_insertFunc(p->prog, p->l.str.v); 1324 1325 BC_SIG_UNLOCK; 1326 1327 // Make sure the insert worked. 1328 assert(idx); 1329 1330 // Update the function pointer and stuff in the parser and set its void. 1331 bc_parse_updateFunc(p, idx); 1332 p->func->voidfn = voidfn; 1333 1334 bc_lex_next(&p->l); 1335 1336 // While we do not have a right paren, we are still parsing arguments. 1337 while (p->l.t != BC_LEX_RPAREN) { 1338 1339 BcType t = BC_TYPE_VAR; 1340 1341 // If we have an asterisk, we are parsing a reference argument. 1342 if (p->l.t == BC_LEX_OP_MULTIPLY) { 1343 1344 t = BC_TYPE_REF; 1345 bc_lex_next(&p->l); 1346 1347 // Let POSIX complain if necessary. 1348 bc_parse_err(p, BC_ERR_POSIX_REF); 1349 } 1350 1351 // If we don't have a name, the argument will not have a name. Barf. 1352 if (BC_ERR(p->l.t != BC_LEX_NAME)) 1353 bc_parse_err(p, BC_ERR_PARSE_FUNC); 1354 1355 // Increment the number of parameters. 1356 p->func->nparams += 1; 1357 1358 // Copy the string in the lexer so that we can use the lexer again. 1359 bc_vec_string(&p->buf, p->l.str.len, p->l.str.v); 1360 1361 bc_lex_next(&p->l); 1362 1363 // We are parsing an array parameter if this is true. 1364 if (p->l.t == BC_LEX_LBRACKET) { 1365 1366 // Set the array type, unless we are already parsing a reference. 1367 if (t == BC_TYPE_VAR) t = BC_TYPE_ARRAY; 1368 1369 bc_lex_next(&p->l); 1370 1371 // The brackets *must* be empty. 1372 if (BC_ERR(p->l.t != BC_LEX_RBRACKET)) 1373 bc_parse_err(p, BC_ERR_PARSE_FUNC); 1374 1375 bc_lex_next(&p->l); 1376 } 1377 // If we did *not* get a bracket, but we are expecting a reference, we 1378 // have a problem. 1379 else if (BC_ERR(t == BC_TYPE_REF)) 1380 bc_parse_verr(p, BC_ERR_PARSE_REF_VAR, p->buf.v); 1381 1382 // Test for comma and get the next token if it exists. 1383 comma = (p->l.t == BC_LEX_COMMA); 1384 if (comma) bc_lex_next(&p->l); 1385 1386 // Insert the parameter into the function. 1387 bc_func_insert(p->func, p->prog, p->buf.v, t, p->l.line); 1388 } 1389 1390 // If we have a comma, but no parameter, barf. 1391 if (BC_ERR(comma)) bc_parse_err(p, BC_ERR_PARSE_FUNC); 1392 1393 // Start the body. 1394 flags = BC_PARSE_FLAG_FUNC | BC_PARSE_FLAG_FUNC_INNER; 1395 bc_parse_startBody(p, flags); 1396 1397 bc_lex_next(&p->l); 1398 1399 // POSIX requires that a brace be on the same line as the function header. 1400 // If we don't have a brace, let POSIX throw an error. 1401 if (p->l.t != BC_LEX_LBRACE) bc_parse_err(p, BC_ERR_POSIX_BRACE); 1402 } 1403 1404 /** 1405 * Parse an auto list. 1406 * @param p The parser. 1407 */ 1408 static void bc_parse_auto(BcParse *p) { 1409 1410 bool comma, one; 1411 1412 // Error if the auto keyword appeared in the wrong place. 1413 if (BC_ERR(!p->auto_part)) bc_parse_err(p, BC_ERR_PARSE_TOKEN); 1414 bc_lex_next(&p->l); 1415 1416 p->auto_part = comma = false; 1417 1418 // We need at least one variable or array. 1419 one = (p->l.t == BC_LEX_NAME); 1420 1421 // While we have a variable or array. 1422 while (p->l.t == BC_LEX_NAME) { 1423 1424 BcType t; 1425 1426 // Copy the name from the lexer, so we can use it again. 1427 bc_vec_string(&p->buf, p->l.str.len - 1, p->l.str.v); 1428 1429 bc_lex_next(&p->l); 1430 1431 // If we are parsing an array... 1432 if (p->l.t == BC_LEX_LBRACKET) { 1433 1434 t = BC_TYPE_ARRAY; 1435 1436 bc_lex_next(&p->l); 1437 1438 // The brackets *must* be empty. 1439 if (BC_ERR(p->l.t != BC_LEX_RBRACKET)) 1440 bc_parse_err(p, BC_ERR_PARSE_FUNC); 1441 1442 bc_lex_next(&p->l); 1443 } 1444 else t = BC_TYPE_VAR; 1445 1446 // Test for comma and get the next token if it exists. 1447 comma = (p->l.t == BC_LEX_COMMA); 1448 if (comma) bc_lex_next(&p->l); 1449 1450 // Insert the auto into the function. 1451 bc_func_insert(p->func, p->prog, p->buf.v, t, p->l.line); 1452 } 1453 1454 // If we have a comma, but no auto, barf. 1455 if (BC_ERR(comma)) bc_parse_err(p, BC_ERR_PARSE_FUNC); 1456 1457 // If we don't have any variables or arrays, barf. 1458 if (BC_ERR(!one)) bc_parse_err(p, BC_ERR_PARSE_NO_AUTO); 1459 1460 // The auto statement should be all that's in the statement. 1461 if (BC_ERR(!bc_parse_isDelimiter(p))) 1462 bc_parse_err(p, BC_ERR_PARSE_TOKEN); 1463 } 1464 1465 /** 1466 * Parses a body. 1467 * @param p The parser. 1468 * @param brace True if a brace was encountered, false otherwise. 1469 */ 1470 static void bc_parse_body(BcParse *p, bool brace) { 1471 1472 uint16_t *flag_ptr = BC_PARSE_TOP_FLAG_PTR(p); 1473 1474 assert(flag_ptr != NULL); 1475 assert(p->flags.len >= 2); 1476 1477 // The body flag is for when we expect a body. We got a body, so clear the 1478 // flag. 1479 *flag_ptr &= ~(BC_PARSE_FLAG_BODY); 1480 1481 // If we are inside a function, that means we just barely entered it, and 1482 // we can expect an auto list. 1483 if (*flag_ptr & BC_PARSE_FLAG_FUNC_INNER) { 1484 1485 // We *must* have a brace in this case. 1486 if (BC_ERR(!brace)) bc_parse_err(p, BC_ERR_PARSE_TOKEN); 1487 1488 p->auto_part = (p->l.t != BC_LEX_KW_AUTO); 1489 1490 if (!p->auto_part) { 1491 1492 // Make sure this is true to not get a parse error. 1493 p->auto_part = true; 1494 1495 // Since we already have the auto keyword, parse. 1496 bc_parse_auto(p); 1497 } 1498 1499 // Eat a newline. 1500 if (p->l.t == BC_LEX_NLINE) bc_lex_next(&p->l); 1501 } 1502 else { 1503 1504 // This is the easy part. 1505 size_t len = p->flags.len; 1506 1507 assert(*flag_ptr); 1508 1509 // Parse a statement. 1510 bc_parse_stmt(p); 1511 1512 // This is a very important condition to get right. If there is no 1513 // brace, and no body flag, and the flags len hasn't shrunk, then we 1514 // have a body that was not delimited by braces, so we need to end it 1515 // now, after just one statement. 1516 if (!brace && !BC_PARSE_BODY(p) && len <= p->flags.len) 1517 bc_parse_endBody(p, false); 1518 } 1519 } 1520 1521 /** 1522 * Parses a statement. This is the entry point for just about everything, except 1523 * function definitions. 1524 * @param p The parser. 1525 */ 1526 static void bc_parse_stmt(BcParse *p) { 1527 1528 size_t len; 1529 uint16_t flags; 1530 BcLexType type = p->l.t; 1531 1532 // Eat newline. 1533 if (type == BC_LEX_NLINE) { 1534 bc_lex_next(&p->l); 1535 return; 1536 } 1537 1538 // Eat auto list. 1539 if (type == BC_LEX_KW_AUTO) { 1540 bc_parse_auto(p); 1541 return; 1542 } 1543 1544 // If we reach this point, no auto list is allowed. 1545 p->auto_part = false; 1546 1547 // Everything but an else needs to be taken care of here, but else is 1548 // special. 1549 if (type != BC_LEX_KW_ELSE) { 1550 1551 // After an if, no else found. 1552 if (BC_PARSE_IF_END(p)) { 1553 1554 // Clear the expectation for else, end body, and return. Returning 1555 // gives us a clean slate for parsing again. 1556 bc_parse_noElse(p); 1557 if (p->flags.len > 1 && !BC_PARSE_BRACE(p)) 1558 bc_parse_endBody(p, false); 1559 return; 1560 } 1561 // With a left brace, we are parsing a body. 1562 else if (type == BC_LEX_LBRACE) { 1563 1564 // We need to start a body if we are not expecting one yet. 1565 if (!BC_PARSE_BODY(p)) { 1566 bc_parse_startBody(p, BC_PARSE_FLAG_BRACE); 1567 bc_lex_next(&p->l); 1568 } 1569 // If we *are* expecting a body, that body should get a brace. This 1570 // takes care of braces being on a different line than if and loop 1571 // headers. 1572 else { 1573 *(BC_PARSE_TOP_FLAG_PTR(p)) |= BC_PARSE_FLAG_BRACE; 1574 bc_lex_next(&p->l); 1575 bc_parse_body(p, true); 1576 } 1577 1578 // If we have reached this point, we need to return for a clean 1579 // slate. 1580 return; 1581 } 1582 // This happens when we are expecting a body and get a single statement, 1583 // i.e., a body with no braces surrounding it. Returns after for a clean 1584 // slate. 1585 else if (BC_PARSE_BODY(p) && !BC_PARSE_BRACE(p)) { 1586 bc_parse_body(p, false); 1587 return; 1588 } 1589 } 1590 1591 len = p->flags.len; 1592 flags = BC_PARSE_TOP_FLAG(p); 1593 1594 switch (type) { 1595 1596 // All of these are valid for expressions. 1597 case BC_LEX_OP_INC: 1598 case BC_LEX_OP_DEC: 1599 case BC_LEX_OP_MINUS: 1600 case BC_LEX_OP_BOOL_NOT: 1601 case BC_LEX_LPAREN: 1602 case BC_LEX_NAME: 1603 case BC_LEX_NUMBER: 1604 case BC_LEX_KW_IBASE: 1605 case BC_LEX_KW_LAST: 1606 case BC_LEX_KW_LENGTH: 1607 case BC_LEX_KW_OBASE: 1608 case BC_LEX_KW_SCALE: 1609 #if BC_ENABLE_EXTRA_MATH 1610 case BC_LEX_KW_SEED: 1611 #endif // BC_ENABLE_EXTRA_MATH 1612 case BC_LEX_KW_SQRT: 1613 case BC_LEX_KW_ABS: 1614 #if BC_ENABLE_EXTRA_MATH 1615 case BC_LEX_KW_IRAND: 1616 #endif // BC_ENABLE_EXTRA_MATH 1617 case BC_LEX_KW_ASCIIFY: 1618 case BC_LEX_KW_MODEXP: 1619 case BC_LEX_KW_DIVMOD: 1620 case BC_LEX_KW_READ: 1621 #if BC_ENABLE_EXTRA_MATH 1622 case BC_LEX_KW_RAND: 1623 #endif // BC_ENABLE_EXTRA_MATH 1624 case BC_LEX_KW_MAXIBASE: 1625 case BC_LEX_KW_MAXOBASE: 1626 case BC_LEX_KW_MAXSCALE: 1627 #if BC_ENABLE_EXTRA_MATH 1628 case BC_LEX_KW_MAXRAND: 1629 #endif // BC_ENABLE_EXTRA_MATH 1630 case BC_LEX_KW_LINE_LENGTH: 1631 case BC_LEX_KW_GLOBAL_STACKS: 1632 case BC_LEX_KW_LEADING_ZERO: 1633 { 1634 bc_parse_expr_status(p, BC_PARSE_PRINT, bc_parse_next_expr); 1635 break; 1636 } 1637 1638 case BC_LEX_KW_ELSE: 1639 { 1640 bc_parse_else(p); 1641 break; 1642 } 1643 1644 // Just eat. 1645 case BC_LEX_SCOLON: 1646 { 1647 // Do nothing. 1648 break; 1649 } 1650 1651 case BC_LEX_RBRACE: 1652 { 1653 bc_parse_endBody(p, true); 1654 break; 1655 } 1656 1657 case BC_LEX_STR: 1658 { 1659 bc_parse_str(p, BC_INST_PRINT_STR); 1660 break; 1661 } 1662 1663 case BC_LEX_KW_BREAK: 1664 case BC_LEX_KW_CONTINUE: 1665 { 1666 bc_parse_loopExit(p, p->l.t); 1667 break; 1668 } 1669 1670 case BC_LEX_KW_FOR: 1671 { 1672 bc_parse_for(p); 1673 break; 1674 } 1675 1676 case BC_LEX_KW_HALT: 1677 { 1678 bc_parse_push(p, BC_INST_HALT); 1679 bc_lex_next(&p->l); 1680 break; 1681 } 1682 1683 case BC_LEX_KW_IF: 1684 { 1685 bc_parse_if(p); 1686 break; 1687 } 1688 1689 case BC_LEX_KW_LIMITS: 1690 { 1691 // `limits` is a compile-time command, so execute it right away. 1692 bc_vm_printf("BC_LONG_BIT = %lu\n", (ulong) BC_LONG_BIT); 1693 bc_vm_printf("BC_BASE_DIGS = %lu\n", (ulong) BC_BASE_DIGS); 1694 bc_vm_printf("BC_BASE_POW = %lu\n", (ulong) BC_BASE_POW); 1695 bc_vm_printf("BC_OVERFLOW_MAX = %lu\n", (ulong) BC_NUM_BIGDIG_MAX); 1696 bc_vm_printf("\n"); 1697 bc_vm_printf("BC_BASE_MAX = %lu\n", BC_MAX_OBASE); 1698 bc_vm_printf("BC_DIM_MAX = %lu\n", BC_MAX_DIM); 1699 bc_vm_printf("BC_SCALE_MAX = %lu\n", BC_MAX_SCALE); 1700 bc_vm_printf("BC_STRING_MAX = %lu\n", BC_MAX_STRING); 1701 bc_vm_printf("BC_NAME_MAX = %lu\n", BC_MAX_NAME); 1702 bc_vm_printf("BC_NUM_MAX = %lu\n", BC_MAX_NUM); 1703 #if BC_ENABLE_EXTRA_MATH 1704 bc_vm_printf("BC_RAND_MAX = %lu\n", BC_MAX_RAND); 1705 #endif // BC_ENABLE_EXTRA_MATH 1706 bc_vm_printf("MAX Exponent = %lu\n", BC_MAX_EXP); 1707 bc_vm_printf("Number of vars = %lu\n", BC_MAX_VARS); 1708 1709 bc_lex_next(&p->l); 1710 1711 break; 1712 } 1713 1714 case BC_LEX_KW_STREAM: 1715 case BC_LEX_KW_PRINT: 1716 { 1717 bc_parse_print(p, type); 1718 break; 1719 } 1720 1721 case BC_LEX_KW_QUIT: 1722 { 1723 // Quit is a compile-time command. We don't exit directly, so the vm 1724 // can clean up. 1725 vm.status = BC_STATUS_QUIT; 1726 BC_JMP; 1727 break; 1728 } 1729 1730 case BC_LEX_KW_RETURN: 1731 { 1732 bc_parse_return(p); 1733 break; 1734 } 1735 1736 case BC_LEX_KW_WHILE: 1737 { 1738 bc_parse_while(p); 1739 break; 1740 } 1741 1742 default: 1743 { 1744 bc_parse_err(p, BC_ERR_PARSE_TOKEN); 1745 } 1746 } 1747 1748 // If the flags did not change, we expect a delimiter. 1749 if (len == p->flags.len && flags == BC_PARSE_TOP_FLAG(p)) { 1750 if (BC_ERR(!bc_parse_isDelimiter(p))) 1751 bc_parse_err(p, BC_ERR_PARSE_TOKEN); 1752 } 1753 1754 // Make sure semicolons are eaten. 1755 while (p->l.t == BC_LEX_SCOLON) bc_lex_next(&p->l); 1756 } 1757 1758 void bc_parse_parse(BcParse *p) { 1759 1760 assert(p); 1761 1762 BC_SETJMP(exit); 1763 1764 // We should not let an EOF get here unless some partial parse was not 1765 // completed, in which case, it's the user's fault. 1766 if (BC_ERR(p->l.t == BC_LEX_EOF)) bc_parse_err(p, BC_ERR_PARSE_EOF); 1767 1768 // Functions need special parsing. 1769 else if (p->l.t == BC_LEX_KW_DEFINE) { 1770 if (BC_ERR(BC_PARSE_NO_EXEC(p))) { 1771 bc_parse_endif(p); 1772 if (BC_ERR(BC_PARSE_NO_EXEC(p))) 1773 bc_parse_err(p, BC_ERR_PARSE_TOKEN); 1774 } 1775 bc_parse_func(p); 1776 } 1777 1778 // Otherwise, parse a normal statement. 1779 else bc_parse_stmt(p); 1780 1781 exit: 1782 1783 BC_SIG_MAYLOCK; 1784 1785 // We need to reset on error. 1786 if (BC_ERR(((vm.status && vm.status != BC_STATUS_QUIT) || vm.sig))) 1787 bc_parse_reset(p); 1788 1789 BC_LONGJMP_CONT; 1790 } 1791 1792 /** 1793 * Parse an expression. This is the actual implementation of the Shunting-Yard 1794 * Algorithm. 1795 * @param p The parser. 1796 * @param flags The flags for what is valid in the expression. 1797 * @param next A set of tokens for what is valid *after* the expression. 1798 * @return A parse status. In some places, an empty expression is an 1799 * error, and sometimes, it is required. This allows this function 1800 * to tell the caller if the expression was empty and let the 1801 * caller handle it. 1802 */ 1803 static BcParseStatus bc_parse_expr_err(BcParse *p, uint8_t flags, 1804 BcParseNext next) 1805 { 1806 BcInst prev = BC_INST_PRINT; 1807 uchar inst = BC_INST_INVALID; 1808 BcLexType top, t; 1809 size_t nexprs, ops_bgn; 1810 uint32_t i, nparens, nrelops; 1811 bool pfirst, rprn, done, get_token, assign, bin_last, incdec, can_assign; 1812 1813 // One of these *must* be true. 1814 assert(!(flags & BC_PARSE_PRINT) || !(flags & BC_PARSE_NEEDVAL)); 1815 1816 // These are set very carefully. In fact, controlling the values of these 1817 // locals is the biggest part of making this work. ops_bgn especially is 1818 // important because it marks where the operator stack begins for *this* 1819 // invocation of this function. That's because bc_parse_expr_err() is 1820 // recursive (the Shunting-Yard Algorithm is most easily expressed 1821 // recursively when parsing subexpressions), and each invocation needs to 1822 // know where to stop. 1823 // 1824 // - nparens is the number of left parens without matches. 1825 // - nrelops is the number of relational operators that appear in the expr. 1826 // - nexprs is the number of unused expressions. 1827 // - rprn is a right paren encountered last. 1828 // - done means the expression has been fully parsed. 1829 // - get_token is true when a token is needed at the end of an iteration. 1830 // - assign is true when an assignment statement was parsed last. 1831 // - incdec is true when the previous operator was an inc or dec operator. 1832 // - can_assign is true when an assignemnt is valid. 1833 // - bin_last is true when the previous instruction was a binary operator. 1834 t = p->l.t; 1835 pfirst = (p->l.t == BC_LEX_LPAREN); 1836 nparens = nrelops = 0; 1837 nexprs = 0; 1838 ops_bgn = p->ops.len; 1839 rprn = done = get_token = assign = incdec = can_assign = false; 1840 bin_last = true; 1841 1842 // We want to eat newlines if newlines are not a valid ending token. 1843 // This is for spacing in things like for loop headers. 1844 if (!(flags & BC_PARSE_NOREAD)) { 1845 while ((t = p->l.t) == BC_LEX_NLINE) bc_lex_next(&p->l); 1846 } 1847 1848 // This is the Shunting-Yard algorithm loop. 1849 for (; !done && BC_PARSE_EXPR(t); t = p->l.t) 1850 { 1851 switch (t) { 1852 1853 case BC_LEX_OP_INC: 1854 case BC_LEX_OP_DEC: 1855 { 1856 // These operators can only be used with items that can be 1857 // assigned to. 1858 if (BC_ERR(incdec)) bc_parse_err(p, BC_ERR_PARSE_ASSIGN); 1859 1860 bc_parse_incdec(p, &prev, &can_assign, &nexprs, flags); 1861 1862 rprn = get_token = bin_last = false; 1863 incdec = true; 1864 flags &= ~(BC_PARSE_ARRAY); 1865 1866 break; 1867 } 1868 1869 #if BC_ENABLE_EXTRA_MATH 1870 case BC_LEX_OP_TRUNC: 1871 { 1872 // The previous token must have been a leaf expression, or the 1873 // operator is in the wrong place. 1874 if (BC_ERR(!BC_PARSE_LEAF(prev, bin_last, rprn))) 1875 bc_parse_err(p, BC_ERR_PARSE_TOKEN); 1876 1877 // I can just add the instruction because 1878 // negative will already be taken care of. 1879 bc_parse_push(p, BC_INST_TRUNC); 1880 1881 rprn = can_assign = incdec = false; 1882 get_token = true; 1883 flags &= ~(BC_PARSE_ARRAY); 1884 1885 break; 1886 } 1887 #endif // BC_ENABLE_EXTRA_MATH 1888 1889 case BC_LEX_OP_MINUS: 1890 { 1891 bc_parse_minus(p, &prev, ops_bgn, rprn, bin_last, &nexprs); 1892 1893 rprn = get_token = can_assign = false; 1894 1895 // This is true if it was a binary operator last. 1896 bin_last = (prev == BC_INST_MINUS); 1897 if (bin_last) incdec = false; 1898 1899 flags &= ~(BC_PARSE_ARRAY); 1900 1901 break; 1902 } 1903 1904 // All of this group, including the fallthrough, is to parse binary 1905 // operators. 1906 case BC_LEX_OP_ASSIGN_POWER: 1907 case BC_LEX_OP_ASSIGN_MULTIPLY: 1908 case BC_LEX_OP_ASSIGN_DIVIDE: 1909 case BC_LEX_OP_ASSIGN_MODULUS: 1910 case BC_LEX_OP_ASSIGN_PLUS: 1911 case BC_LEX_OP_ASSIGN_MINUS: 1912 #if BC_ENABLE_EXTRA_MATH 1913 case BC_LEX_OP_ASSIGN_PLACES: 1914 case BC_LEX_OP_ASSIGN_LSHIFT: 1915 case BC_LEX_OP_ASSIGN_RSHIFT: 1916 #endif // BC_ENABLE_EXTRA_MATH 1917 case BC_LEX_OP_ASSIGN: 1918 { 1919 // We need to make sure the assignment is valid. 1920 if (!BC_PARSE_INST_VAR(prev)) 1921 bc_parse_err(p, BC_ERR_PARSE_ASSIGN); 1922 } 1923 // Fallthrough. 1924 BC_FALLTHROUGH 1925 1926 case BC_LEX_OP_POWER: 1927 case BC_LEX_OP_MULTIPLY: 1928 case BC_LEX_OP_DIVIDE: 1929 case BC_LEX_OP_MODULUS: 1930 case BC_LEX_OP_PLUS: 1931 #if BC_ENABLE_EXTRA_MATH 1932 case BC_LEX_OP_PLACES: 1933 case BC_LEX_OP_LSHIFT: 1934 case BC_LEX_OP_RSHIFT: 1935 #endif // BC_ENABLE_EXTRA_MATH 1936 case BC_LEX_OP_REL_EQ: 1937 case BC_LEX_OP_REL_LE: 1938 case BC_LEX_OP_REL_GE: 1939 case BC_LEX_OP_REL_NE: 1940 case BC_LEX_OP_REL_LT: 1941 case BC_LEX_OP_REL_GT: 1942 case BC_LEX_OP_BOOL_NOT: 1943 case BC_LEX_OP_BOOL_OR: 1944 case BC_LEX_OP_BOOL_AND: 1945 { 1946 // This is true if the operator if the token is a prefix 1947 // operator. This is only for boolean not. 1948 if (BC_PARSE_OP_PREFIX(t)) { 1949 1950 // Prefix operators are only allowed after binary operators 1951 // or prefix operators. 1952 if (BC_ERR(!bin_last && !BC_PARSE_OP_PREFIX(p->l.last))) 1953 bc_parse_err(p, BC_ERR_PARSE_EXPR); 1954 } 1955 // If we execute the else, that means we have a binary operator. 1956 // If the previous operator was a prefix or a binary operator, 1957 // then a binary operator is not allowed. 1958 else if (BC_ERR(BC_PARSE_PREV_PREFIX(prev) || bin_last)) 1959 bc_parse_err(p, BC_ERR_PARSE_EXPR); 1960 1961 nrelops += (t >= BC_LEX_OP_REL_EQ && t <= BC_LEX_OP_REL_GT); 1962 prev = BC_PARSE_TOKEN_INST(t); 1963 1964 bc_parse_operator(p, t, ops_bgn, &nexprs); 1965 1966 rprn = incdec = can_assign = false; 1967 get_token = true; 1968 bin_last = !BC_PARSE_OP_PREFIX(t); 1969 flags &= ~(BC_PARSE_ARRAY); 1970 1971 break; 1972 } 1973 1974 case BC_LEX_LPAREN: 1975 { 1976 // A left paren is *not* allowed right after a leaf expr. 1977 if (BC_ERR(BC_PARSE_LEAF(prev, bin_last, rprn))) 1978 bc_parse_err(p, BC_ERR_PARSE_EXPR); 1979 1980 nparens += 1; 1981 rprn = incdec = can_assign = false; 1982 get_token = true; 1983 1984 // Push the paren onto the operator stack. 1985 bc_vec_push(&p->ops, &t); 1986 1987 break; 1988 } 1989 1990 case BC_LEX_RPAREN: 1991 { 1992 // This needs to be a status. The error is handled in 1993 // bc_parse_expr_status(). 1994 if (BC_ERR(p->l.last == BC_LEX_LPAREN)) 1995 return BC_PARSE_STATUS_EMPTY_EXPR; 1996 1997 // The right paren must not come after a prefix or binary 1998 // operator. 1999 if (BC_ERR(bin_last || BC_PARSE_PREV_PREFIX(prev))) 2000 bc_parse_err(p, BC_ERR_PARSE_EXPR); 2001 2002 // If there are no parens left, we are done, but we need another 2003 // token. 2004 if (!nparens) { 2005 done = true; 2006 get_token = false; 2007 break; 2008 } 2009 2010 nparens -= 1; 2011 rprn = true; 2012 get_token = bin_last = incdec = false; 2013 2014 bc_parse_rightParen(p, &nexprs); 2015 2016 break; 2017 } 2018 2019 case BC_LEX_STR: 2020 { 2021 // POSIX only allows strings alone. 2022 if (BC_IS_POSIX) bc_parse_err(p, BC_ERR_POSIX_EXPR_STRING); 2023 2024 // A string is a leaf and cannot come right after a leaf. 2025 if (BC_ERR(BC_PARSE_LEAF(prev, bin_last, rprn))) 2026 bc_parse_err(p, BC_ERR_PARSE_EXPR); 2027 2028 bc_parse_addString(p); 2029 2030 get_token = true; 2031 bin_last = rprn = false; 2032 nexprs += 1; 2033 2034 break; 2035 } 2036 2037 case BC_LEX_NAME: 2038 { 2039 // A name is a leaf and cannot come right after a leaf. 2040 if (BC_ERR(BC_PARSE_LEAF(prev, bin_last, rprn))) 2041 bc_parse_err(p, BC_ERR_PARSE_EXPR); 2042 2043 get_token = bin_last = false; 2044 2045 bc_parse_name(p, &prev, &can_assign, flags & ~BC_PARSE_NOCALL); 2046 2047 rprn = (prev == BC_INST_CALL); 2048 nexprs += 1; 2049 flags &= ~(BC_PARSE_ARRAY); 2050 2051 break; 2052 } 2053 2054 case BC_LEX_NUMBER: 2055 { 2056 // A number is a leaf and cannot come right after a leaf. 2057 if (BC_ERR(BC_PARSE_LEAF(prev, bin_last, rprn))) 2058 bc_parse_err(p, BC_ERR_PARSE_EXPR); 2059 2060 // The number instruction is pushed in here. 2061 bc_parse_number(p); 2062 2063 nexprs += 1; 2064 prev = BC_INST_NUM; 2065 get_token = true; 2066 rprn = bin_last = can_assign = false; 2067 flags &= ~(BC_PARSE_ARRAY); 2068 2069 break; 2070 } 2071 2072 case BC_LEX_KW_IBASE: 2073 case BC_LEX_KW_LAST: 2074 case BC_LEX_KW_OBASE: 2075 #if BC_ENABLE_EXTRA_MATH 2076 case BC_LEX_KW_SEED: 2077 #endif // BC_ENABLE_EXTRA_MATH 2078 { 2079 // All of these are leaves and cannot come right after a leaf. 2080 if (BC_ERR(BC_PARSE_LEAF(prev, bin_last, rprn))) 2081 bc_parse_err(p, BC_ERR_PARSE_EXPR); 2082 2083 prev = t - BC_LEX_KW_LAST + BC_INST_LAST; 2084 bc_parse_push(p, prev); 2085 2086 get_token = can_assign = true; 2087 rprn = bin_last = false; 2088 nexprs += 1; 2089 flags &= ~(BC_PARSE_ARRAY); 2090 2091 break; 2092 } 2093 2094 case BC_LEX_KW_LENGTH: 2095 case BC_LEX_KW_SQRT: 2096 case BC_LEX_KW_ABS: 2097 #if BC_ENABLE_EXTRA_MATH 2098 case BC_LEX_KW_IRAND: 2099 #endif // BC_ENABLE_EXTRA_MATH 2100 case BC_LEX_KW_ASCIIFY: 2101 { 2102 // All of these are leaves and cannot come right after a leaf. 2103 if (BC_ERR(BC_PARSE_LEAF(prev, bin_last, rprn))) 2104 bc_parse_err(p, BC_ERR_PARSE_EXPR); 2105 2106 bc_parse_builtin(p, t, flags, &prev); 2107 2108 rprn = get_token = bin_last = incdec = can_assign = false; 2109 nexprs += 1; 2110 flags &= ~(BC_PARSE_ARRAY); 2111 2112 break; 2113 } 2114 2115 case BC_LEX_KW_READ: 2116 #if BC_ENABLE_EXTRA_MATH 2117 case BC_LEX_KW_RAND: 2118 #endif // BC_ENABLE_EXTRA_MATH 2119 case BC_LEX_KW_MAXIBASE: 2120 case BC_LEX_KW_MAXOBASE: 2121 case BC_LEX_KW_MAXSCALE: 2122 #if BC_ENABLE_EXTRA_MATH 2123 case BC_LEX_KW_MAXRAND: 2124 #endif // BC_ENABLE_EXTRA_MATH 2125 case BC_LEX_KW_LINE_LENGTH: 2126 case BC_LEX_KW_GLOBAL_STACKS: 2127 case BC_LEX_KW_LEADING_ZERO: 2128 { 2129 // All of these are leaves and cannot come right after a leaf. 2130 if (BC_ERR(BC_PARSE_LEAF(prev, bin_last, rprn))) 2131 bc_parse_err(p, BC_ERR_PARSE_EXPR); 2132 2133 // Error if we have read and it's not allowed. 2134 else if (t == BC_LEX_KW_READ && BC_ERR(flags & BC_PARSE_NOREAD)) 2135 bc_parse_err(p, BC_ERR_EXEC_REC_READ); 2136 2137 prev = t - BC_LEX_KW_READ + BC_INST_READ; 2138 bc_parse_noArgBuiltin(p, prev); 2139 2140 rprn = get_token = bin_last = incdec = can_assign = false; 2141 nexprs += 1; 2142 flags &= ~(BC_PARSE_ARRAY); 2143 2144 break; 2145 } 2146 2147 case BC_LEX_KW_SCALE: 2148 { 2149 // This is a leaf and cannot come right after a leaf. 2150 if (BC_ERR(BC_PARSE_LEAF(prev, bin_last, rprn))) 2151 bc_parse_err(p, BC_ERR_PARSE_EXPR); 2152 2153 // Scale needs special work because it can be a variable *or* a 2154 // function. 2155 bc_parse_scale(p, &prev, &can_assign, flags); 2156 2157 rprn = get_token = bin_last = false; 2158 nexprs += 1; 2159 flags &= ~(BC_PARSE_ARRAY); 2160 2161 break; 2162 } 2163 2164 case BC_LEX_KW_MODEXP: 2165 case BC_LEX_KW_DIVMOD: 2166 { 2167 // This is a leaf and cannot come right after a leaf. 2168 if (BC_ERR(BC_PARSE_LEAF(prev, bin_last, rprn))) 2169 bc_parse_err(p, BC_ERR_PARSE_EXPR); 2170 2171 bc_parse_builtin3(p, t, flags, &prev); 2172 2173 rprn = get_token = bin_last = incdec = can_assign = false; 2174 nexprs += 1; 2175 flags &= ~(BC_PARSE_ARRAY); 2176 2177 break; 2178 } 2179 2180 default: 2181 { 2182 #ifndef NDEBUG 2183 // We should never get here, even in debug builds. 2184 bc_parse_err(p, BC_ERR_PARSE_TOKEN); 2185 break; 2186 #endif // NDEBUG 2187 } 2188 } 2189 2190 if (get_token) bc_lex_next(&p->l); 2191 } 2192 2193 // Now that we have parsed the expression, we need to empty the operator 2194 // stack. 2195 while (p->ops.len > ops_bgn) { 2196 2197 top = BC_PARSE_TOP_OP(p); 2198 assign = top >= BC_LEX_OP_ASSIGN_POWER && top <= BC_LEX_OP_ASSIGN; 2199 2200 // There should not be *any* parens on the stack anymore. 2201 if (BC_ERR(top == BC_LEX_LPAREN || top == BC_LEX_RPAREN)) 2202 bc_parse_err(p, BC_ERR_PARSE_EXPR); 2203 2204 bc_parse_push(p, BC_PARSE_TOKEN_INST(top)); 2205 2206 // Adjust the number of unused expressions. 2207 nexprs -= !BC_PARSE_OP_PREFIX(top); 2208 bc_vec_pop(&p->ops); 2209 2210 incdec = false; 2211 } 2212 2213 // There must be only one expression at the top. 2214 if (BC_ERR(nexprs != 1)) bc_parse_err(p, BC_ERR_PARSE_EXPR); 2215 2216 // Check that the next token is correct. 2217 for (i = 0; i < next.len && t != next.tokens[i]; ++i); 2218 if (BC_ERR(i == next.len && !bc_parse_isDelimiter(p))) 2219 bc_parse_err(p, BC_ERR_PARSE_EXPR); 2220 2221 // Check that POSIX would be happy with the number of relational operators. 2222 if (!(flags & BC_PARSE_REL) && nrelops) 2223 bc_parse_err(p, BC_ERR_POSIX_REL_POS); 2224 else if ((flags & BC_PARSE_REL) && nrelops > 1) 2225 bc_parse_err(p, BC_ERR_POSIX_MULTIREL); 2226 2227 // If this is true, then we might be in a situation where we don't print. 2228 // We would want to have the increment/decrement operator not make an extra 2229 // copy if it's not necessary. 2230 if (!(flags & BC_PARSE_NEEDVAL) && !pfirst) { 2231 2232 // We have the easy case if the last operator was an assignment 2233 // operator. 2234 if (assign) { 2235 inst = *((uchar*) bc_vec_top(&p->func->code)); 2236 inst += (BC_INST_ASSIGN_POWER_NO_VAL - BC_INST_ASSIGN_POWER); 2237 incdec = false; 2238 } 2239 // If we have an inc/dec operator and we are *not* printing, implement 2240 // the optimization to get rid of the extra copy. 2241 else if (incdec && !(flags & BC_PARSE_PRINT)) { 2242 inst = *((uchar*) bc_vec_top(&p->func->code)); 2243 incdec = (inst <= BC_INST_DEC); 2244 inst = BC_INST_ASSIGN_PLUS_NO_VAL + (inst != BC_INST_INC && 2245 inst != BC_INST_ASSIGN_PLUS); 2246 } 2247 2248 // This condition allows us to change the previous assignment 2249 // instruction (which does a copy) for a NO_VAL version, which does not. 2250 // This condition is set if either of the above if statements ends up 2251 // being true. 2252 if (inst >= BC_INST_ASSIGN_POWER_NO_VAL && 2253 inst <= BC_INST_ASSIGN_NO_VAL) 2254 { 2255 // Pop the previous assignment instruction and push a new one. 2256 // Inc/dec needs the extra instruction because it is now a binary 2257 // operator and needs a second operand. 2258 bc_vec_pop(&p->func->code); 2259 if (incdec) bc_parse_push(p, BC_INST_ONE); 2260 bc_parse_push(p, inst); 2261 } 2262 } 2263 2264 // If we might have to print... 2265 if ((flags & BC_PARSE_PRINT)) { 2266 2267 // With a paren first or the last operator not being an assignment, we 2268 // *do* want to print. 2269 if (pfirst || !assign) bc_parse_push(p, BC_INST_PRINT); 2270 } 2271 // We need to make sure to push a pop instruction for assignment statements 2272 // that will not print. The print will pop, but without it, we need to pop. 2273 else if (!(flags & BC_PARSE_NEEDVAL) && 2274 (inst < BC_INST_ASSIGN_POWER_NO_VAL || 2275 inst > BC_INST_ASSIGN_NO_VAL)) 2276 { 2277 bc_parse_push(p, BC_INST_POP); 2278 } 2279 2280 // We want to eat newlines if newlines are not a valid ending token. 2281 // This is for spacing in things like for loop headers. 2282 // 2283 // Yes, this is one case where I reuse a variable for a different purpose; 2284 // in this case, incdec being true now means that newlines are not valid. 2285 for (incdec = true, i = 0; i < next.len && incdec; ++i) 2286 incdec = (next.tokens[i] != BC_LEX_NLINE); 2287 if (incdec) { 2288 while (p->l.t == BC_LEX_NLINE) bc_lex_next(&p->l); 2289 } 2290 2291 return BC_PARSE_STATUS_SUCCESS; 2292 } 2293 2294 /** 2295 * Parses an expression with bc_parse_expr_err(), but throws an error if it gets 2296 * an empty expression. 2297 * @param p The parser. 2298 * @param flags The flags for what is valid in the expression. 2299 * @param next A set of tokens for what is valid *after* the expression. 2300 */ 2301 static void bc_parse_expr_status(BcParse *p, uint8_t flags, BcParseNext next) { 2302 2303 BcParseStatus s = bc_parse_expr_err(p, flags, next); 2304 2305 if (BC_ERR(s == BC_PARSE_STATUS_EMPTY_EXPR)) 2306 bc_parse_err(p, BC_ERR_PARSE_EMPTY_EXPR); 2307 } 2308 2309 void bc_parse_expr(BcParse *p, uint8_t flags) { 2310 assert(p); 2311 bc_parse_expr_status(p, flags, bc_parse_next_read); 2312 } 2313 #endif // BC_ENABLED 2314