1 /* 2 * ***************************************************************************** 3 * 4 * SPDX-License-Identifier: BSD-2-Clause 5 * 6 * Copyright (c) 2018-2021 Gavin D. Howard and contributors. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions are met: 10 * 11 * * Redistributions of source code must retain the above copyright notice, this 12 * list of conditions and the following disclaimer. 13 * 14 * * Redistributions in binary form must reproduce the above copyright notice, 15 * this list of conditions and the following disclaimer in the documentation 16 * and/or other materials provided with the distribution. 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 19 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 21 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE 22 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 23 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 24 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 25 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 26 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 27 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 28 * POSSIBILITY OF SUCH DAMAGE. 29 * 30 * ***************************************************************************** 31 * 32 * The parser for bc. 33 * 34 */ 35 36 #if BC_ENABLED 37 38 #include <assert.h> 39 #include <stdbool.h> 40 #include <stdlib.h> 41 #include <string.h> 42 43 #include <setjmp.h> 44 45 #include <bc.h> 46 #include <num.h> 47 #include <vm.h> 48 49 // Before you embark on trying to understand this code, have you read the 50 // Development manual (manuals/development.md) and the comment in include/bc.h 51 // yet? No? Do that first. I'm serious. 52 // 53 // The reason is because this file holds the most sensitive and finicky code in 54 // the entire codebase. Even getting history to work on Windows was nothing 55 // compared to this. This is where dreams go to die, where dragons live, and 56 // from which Ken Thompson himself would flee. 57 58 static void bc_parse_else(BcParse *p); 59 static void bc_parse_stmt(BcParse *p); 60 static BcParseStatus bc_parse_expr_err(BcParse *p, uint8_t flags, 61 BcParseNext next); 62 static void bc_parse_expr_status(BcParse *p, uint8_t flags, BcParseNext next); 63 64 /** 65 * Returns true if an instruction could only have come from a "leaf" expression. 66 * For more on what leaf expressions are, read the comment for BC_PARSE_LEAF(). 67 * @param t The instruction to test. 68 */ 69 static bool bc_parse_inst_isLeaf(BcInst t) { 70 return (t >= BC_INST_NUM && t <= BC_INST_MAXSCALE) || 71 #if BC_ENABLE_EXTRA_MATH 72 t == BC_INST_TRUNC || 73 #endif // BC_ENABLE_EXTRA_MATH 74 t <= BC_INST_DEC; 75 } 76 77 /** 78 * Returns true if the *previous* token was a delimiter. A delimiter is anything 79 * that can legally end a statement. In bc's case, it could be a newline, a 80 * semicolon, and a brace in certain cases. 81 * @param p The parser. 82 */ 83 static bool bc_parse_isDelimiter(const BcParse *p) { 84 85 BcLexType t = p->l.t; 86 bool good; 87 88 // If it's an obvious delimiter, say so. 89 if (BC_PARSE_DELIMITER(t)) return true; 90 91 good = false; 92 93 // If the current token is a keyword, then...beware. That means that we need 94 // to check for a "dangling" else, where there was no brace-delimited block 95 // on the previous if. 96 if (t == BC_LEX_KW_ELSE) { 97 98 size_t i; 99 uint16_t *fptr = NULL, flags = BC_PARSE_FLAG_ELSE; 100 101 // As long as going up the stack is valid for a dangling else, keep on. 102 for (i = 0; i < p->flags.len && BC_PARSE_BLOCK_STMT(flags); ++i) { 103 104 fptr = bc_vec_item_rev(&p->flags, i); 105 flags = *fptr; 106 107 // If we need a brace and don't have one, then we don't have a 108 // delimiter. 109 if ((flags & BC_PARSE_FLAG_BRACE) && p->l.last != BC_LEX_RBRACE) 110 return false; 111 } 112 113 // Oh, and we had also better have an if statement somewhere. 114 good = ((flags & BC_PARSE_FLAG_IF) != 0); 115 } 116 else if (t == BC_LEX_RBRACE) { 117 118 size_t i; 119 120 // Since we have a brace, we need to just check if a brace was needed. 121 for (i = 0; !good && i < p->flags.len; ++i) { 122 uint16_t *fptr = bc_vec_item_rev(&p->flags, i); 123 good = (((*fptr) & BC_PARSE_FLAG_BRACE) != 0); 124 } 125 } 126 127 return good; 128 } 129 130 /** 131 * Sets a previously defined exit label. What are labels? See the bc Parsing 132 * section of the Development manual (manuals/development.md). 133 * @param p The parser. 134 */ 135 static void bc_parse_setLabel(BcParse *p) { 136 137 BcFunc *func = p->func; 138 BcInstPtr *ip = bc_vec_top(&p->exits); 139 size_t *label; 140 141 assert(func == bc_vec_item(&p->prog->fns, p->fidx)); 142 143 // Set the preallocated label to the correct index. 144 label = bc_vec_item(&func->labels, ip->idx); 145 *label = func->code.len; 146 147 // Now, we don't need the exit label; it is done. 148 bc_vec_pop(&p->exits); 149 } 150 151 /** 152 * Creates a label and sets it to idx. If this is an exit label, then idx is 153 * actually invalid, but it doesn't matter because it will be fixed by 154 * bc_parse_setLabel() later. 155 * @param p The parser. 156 * @param idx The index of the label. 157 */ 158 static void bc_parse_createLabel(BcParse *p, size_t idx) { 159 bc_vec_push(&p->func->labels, &idx); 160 } 161 162 /** 163 * Creates a conditional label. Unlike an exit label, this label is set at 164 * creation time because it comes *before* the code that will target it. 165 * @param p The parser. 166 * @param idx The index of the label. 167 */ 168 static void bc_parse_createCondLabel(BcParse *p, size_t idx) { 169 bc_parse_createLabel(p, p->func->code.len); 170 bc_vec_push(&p->conds, &idx); 171 } 172 173 /* 174 * Creates an exit label to be filled in later by bc_parse_setLabel(). Also, why 175 * create a label to be filled in later? Because exit labels are meant to be 176 * targeted by code that comes *before* the label. Since we have to parse that 177 * code first, and don't know how long it will be, we need to just make sure to 178 * reserve a slot to be filled in later when we know. 179 * 180 * By the way, this uses BcInstPtr because it was convenient. The field idx 181 * holds the index, and the field func holds the loop boolean. 182 * 183 * @param p The parser. 184 * @param idx The index of the label's position. 185 * @param loop True if the exit label is for a loop or not. 186 */ 187 static void bc_parse_createExitLabel(BcParse *p, size_t idx, bool loop) { 188 189 BcInstPtr ip; 190 191 assert(p->func == bc_vec_item(&p->prog->fns, p->fidx)); 192 193 ip.func = loop; 194 ip.idx = idx; 195 ip.len = 0; 196 197 bc_vec_push(&p->exits, &ip); 198 bc_parse_createLabel(p, SIZE_MAX); 199 } 200 201 /** 202 * Pops the correct operators off of the operator stack based on the current 203 * operator. This is because of the Shunting-Yard algorithm. Lower prec means 204 * higher precedence. 205 * @param p The parser. 206 * @param type The operator. 207 * @param start The previous start of the operator stack. For more 208 * information, see the bc Parsing section of the Development 209 * manual (manuals/development.md). 210 * @param nexprs A pointer to the current number of expressions that have not 211 * been consumed yet. This is an IN and OUT parameter. 212 */ 213 static void bc_parse_operator(BcParse *p, BcLexType type, 214 size_t start, size_t *nexprs) 215 { 216 BcLexType t; 217 uchar l, r = BC_PARSE_OP_PREC(type); 218 uchar left = BC_PARSE_OP_LEFT(type); 219 220 // While we haven't hit the stop point yet. 221 while (p->ops.len > start) { 222 223 // Get the top operator. 224 t = BC_PARSE_TOP_OP(p); 225 226 // If it's a right paren, we have reached the end of whatever expression 227 // this is no matter what. 228 if (t == BC_LEX_LPAREN) break; 229 230 // Break for precedence. Precedence operates differently on left and 231 // right associativity, by the way. A left associative operator that 232 // matches the current precedence should take priority, but a right 233 // associative operator should not. 234 l = BC_PARSE_OP_PREC(t); 235 if (l >= r && (l != r || !left)) break; 236 237 // Do the housekeeping. In particular, make sure to note that one 238 // expression was consumed. (Two were, but another was added.) 239 bc_parse_push(p, BC_PARSE_TOKEN_INST(t)); 240 bc_vec_pop(&p->ops); 241 *nexprs -= !BC_PARSE_OP_PREFIX(t); 242 } 243 244 bc_vec_push(&p->ops, &type); 245 } 246 247 /** 248 * Parses a right paren. In the Shunting-Yard algorithm, it needs to be put on 249 * the operator stack. But before that, it needs to consume whatever operators 250 * there are until it hits a left paren. 251 * @param p The parser. 252 * @param nexprs A pointer to the current number of expressions that have not 253 * been consumed yet. This is an IN and OUT parameter. 254 */ 255 static void bc_parse_rightParen(BcParse *p, size_t *nexprs) { 256 257 BcLexType top; 258 259 // Consume operators until a left paren. 260 while ((top = BC_PARSE_TOP_OP(p)) != BC_LEX_LPAREN) { 261 bc_parse_push(p, BC_PARSE_TOKEN_INST(top)); 262 bc_vec_pop(&p->ops); 263 *nexprs -= !BC_PARSE_OP_PREFIX(top); 264 } 265 266 // We need to pop the left paren as well. 267 bc_vec_pop(&p->ops); 268 269 // Oh, and we also want the next token. 270 bc_lex_next(&p->l); 271 } 272 273 /** 274 * Parses function arguments. 275 * @param p The parser. 276 * @param flags Flags restricting what kind of expressions the arguments can 277 * be. 278 */ 279 static void bc_parse_args(BcParse *p, uint8_t flags) { 280 281 bool comma = false; 282 size_t nargs; 283 284 bc_lex_next(&p->l); 285 286 // Print and comparison operators not allowed. Well, comparison operators 287 // only for POSIX. But we do allow arrays, and we *must* get a value. 288 flags &= ~(BC_PARSE_PRINT | BC_PARSE_REL); 289 flags |= (BC_PARSE_ARRAY | BC_PARSE_NEEDVAL); 290 291 // Count the arguments and parse them. 292 for (nargs = 0; p->l.t != BC_LEX_RPAREN; ++nargs) { 293 294 bc_parse_expr_status(p, flags, bc_parse_next_arg); 295 296 comma = (p->l.t == BC_LEX_COMMA); 297 if (comma) bc_lex_next(&p->l); 298 } 299 300 // An ending comma is FAIL. 301 if (BC_ERR(comma)) bc_parse_err(p, BC_ERR_PARSE_TOKEN); 302 303 // Now do the call with the number of arguments. 304 bc_parse_push(p, BC_INST_CALL); 305 bc_parse_pushIndex(p, nargs); 306 } 307 308 /** 309 * Parses a function call. 310 * @param p The parser. 311 * @param flags Flags restricting what kind of expressions the arguments can 312 * be. 313 */ 314 static void bc_parse_call(BcParse *p, const char *name, uint8_t flags) { 315 316 size_t idx; 317 318 bc_parse_args(p, flags); 319 320 // We just assert this because bc_parse_args() should 321 // ensure that the next token is what it should be. 322 assert(p->l.t == BC_LEX_RPAREN); 323 324 // We cannot use bc_program_insertFunc() here 325 // because it will overwrite an existing function. 326 idx = bc_map_index(&p->prog->fn_map, name); 327 328 // The function does not exist yet. Create a space for it. If the user does 329 // not define it, it's a *runtime* error, not a parse error. 330 if (idx == BC_VEC_INVALID_IDX) { 331 332 BC_SIG_LOCK; 333 334 idx = bc_program_insertFunc(p->prog, name); 335 336 BC_SIG_UNLOCK; 337 338 assert(idx != BC_VEC_INVALID_IDX); 339 340 // Make sure that this pointer was not invalidated. 341 p->func = bc_vec_item(&p->prog->fns, p->fidx); 342 } 343 // The function exists, so set the right function index. 344 else idx = ((BcId*) bc_vec_item(&p->prog->fn_map, idx))->idx; 345 346 bc_parse_pushIndex(p, idx); 347 348 // Make sure to get the next token. 349 bc_lex_next(&p->l); 350 } 351 352 /** 353 * Parses a name/identifier-based expression. It could be a variable, an array 354 * element, an array itself (for function arguments), a function call, etc. 355 * 356 */ 357 static void bc_parse_name(BcParse *p, BcInst *type, 358 bool *can_assign, uint8_t flags) 359 { 360 char *name; 361 362 BC_SIG_LOCK; 363 364 // We want a copy of the name since the lexer might overwrite its copy. 365 name = bc_vm_strdup(p->l.str.v); 366 367 BC_SETJMP_LOCKED(err); 368 369 BC_SIG_UNLOCK; 370 371 // We need the next token to see if it's just a variable or something more. 372 bc_lex_next(&p->l); 373 374 // Array element or array. 375 if (p->l.t == BC_LEX_LBRACKET) { 376 377 bc_lex_next(&p->l); 378 379 // Array only. This has to be a function parameter. 380 if (p->l.t == BC_LEX_RBRACKET) { 381 382 // Error if arrays are not allowed. 383 if (BC_ERR(!(flags & BC_PARSE_ARRAY))) 384 bc_parse_err(p, BC_ERR_PARSE_EXPR); 385 386 *type = BC_INST_ARRAY; 387 *can_assign = false; 388 } 389 else { 390 391 // If we are here, we have an array element. We need to set the 392 // expression parsing flags. 393 uint8_t flags2 = (flags & ~(BC_PARSE_PRINT | BC_PARSE_REL)) | 394 BC_PARSE_NEEDVAL; 395 396 bc_parse_expr_status(p, flags2, bc_parse_next_elem); 397 398 // The next token *must* be a right bracket. 399 if (BC_ERR(p->l.t != BC_LEX_RBRACKET)) 400 bc_parse_err(p, BC_ERR_PARSE_TOKEN); 401 402 *type = BC_INST_ARRAY_ELEM; 403 *can_assign = true; 404 } 405 406 // Make sure to get the next token. 407 bc_lex_next(&p->l); 408 409 // Push the instruction and the name of the identifier. 410 bc_parse_push(p, *type); 411 bc_parse_pushName(p, name, false); 412 } 413 else if (p->l.t == BC_LEX_LPAREN) { 414 415 // We are parsing a function call; error if not allowed. 416 if (BC_ERR(flags & BC_PARSE_NOCALL)) 417 bc_parse_err(p, BC_ERR_PARSE_TOKEN); 418 419 *type = BC_INST_CALL; 420 *can_assign = false; 421 422 bc_parse_call(p, name, flags); 423 } 424 else { 425 // Just a variable. 426 *type = BC_INST_VAR; 427 *can_assign = true; 428 bc_parse_push(p, BC_INST_VAR); 429 bc_parse_pushName(p, name, true); 430 } 431 432 err: 433 // Need to make sure to unallocate the name. 434 BC_SIG_MAYLOCK; 435 free(name); 436 BC_LONGJMP_CONT; 437 } 438 439 /** 440 * Parses a builtin function that takes no arguments. This includes read(), 441 * rand(), maxibase(), maxobase(), maxscale(), and maxrand(). 442 * @param p The parser. 443 * @param inst The instruction corresponding to the builtin. 444 */ 445 static void bc_parse_noArgBuiltin(BcParse *p, BcInst inst) { 446 447 // Must have a left paren. 448 bc_lex_next(&p->l); 449 if (BC_ERR(p->l.t != BC_LEX_LPAREN)) bc_parse_err(p, BC_ERR_PARSE_TOKEN); 450 451 // Must have a right paren. 452 bc_lex_next(&p->l); 453 if ((p->l.t != BC_LEX_RPAREN)) bc_parse_err(p, BC_ERR_PARSE_TOKEN); 454 455 bc_parse_push(p, inst); 456 457 bc_lex_next(&p->l); 458 } 459 460 /** 461 * Parses a builtin function that takes 1 argument. This includes length(), 462 * sqrt(), abs(), scale(), and irand(). 463 * @param p The parser. 464 * @param type The lex token. 465 * @param flags The expression parsing flags for parsing the argument. 466 * @param prev An out parameter; the previous instruction pointer. 467 */ 468 static void bc_parse_builtin(BcParse *p, BcLexType type, 469 uint8_t flags, BcInst *prev) 470 { 471 // Must have a left paren. 472 bc_lex_next(&p->l); 473 if (BC_ERR(p->l.t != BC_LEX_LPAREN)) 474 bc_parse_err(p, BC_ERR_PARSE_TOKEN); 475 476 bc_lex_next(&p->l); 477 478 // Change the flags as needed for parsing the argument. 479 flags &= ~(BC_PARSE_PRINT | BC_PARSE_REL); 480 flags |= BC_PARSE_NEEDVAL; 481 482 // Since length can take arrays, we need to specially add that flag. 483 if (type == BC_LEX_KW_LENGTH) flags |= BC_PARSE_ARRAY; 484 485 bc_parse_expr_status(p, flags, bc_parse_next_rel); 486 487 // Must have a right paren. 488 if (BC_ERR(p->l.t != BC_LEX_RPAREN)) 489 bc_parse_err(p, BC_ERR_PARSE_TOKEN); 490 491 // Adjust previous based on the token and push it. 492 *prev = type - BC_LEX_KW_LENGTH + BC_INST_LENGTH; 493 bc_parse_push(p, *prev); 494 495 bc_lex_next(&p->l); 496 } 497 498 /** 499 * Parses a builtin function that takes 3 arguments. This includes modexp() and 500 * divmod(). 501 */ 502 static void bc_parse_builtin3(BcParse *p, BcLexType type, 503 uint8_t flags, BcInst *prev) 504 { 505 assert(type == BC_LEX_KW_MODEXP || type == BC_LEX_KW_DIVMOD); 506 507 // Must have a left paren. 508 bc_lex_next(&p->l); 509 if (BC_ERR(p->l.t != BC_LEX_LPAREN)) 510 bc_parse_err(p, BC_ERR_PARSE_TOKEN); 511 512 bc_lex_next(&p->l); 513 514 // Change the flags as needed for parsing the argument. 515 flags &= ~(BC_PARSE_PRINT | BC_PARSE_REL); 516 flags |= BC_PARSE_NEEDVAL; 517 518 bc_parse_expr_status(p, flags, bc_parse_next_builtin); 519 520 // Must have a comma. 521 if (BC_ERR(p->l.t != BC_LEX_COMMA)) 522 bc_parse_err(p, BC_ERR_PARSE_TOKEN); 523 524 bc_lex_next(&p->l); 525 526 bc_parse_expr_status(p, flags, bc_parse_next_builtin); 527 528 // Must have a comma. 529 if (BC_ERR(p->l.t != BC_LEX_COMMA)) 530 bc_parse_err(p, BC_ERR_PARSE_TOKEN); 531 532 bc_lex_next(&p->l); 533 534 // If it is a divmod, parse an array name. Otherwise, just parse another 535 // expression. 536 if (type == BC_LEX_KW_DIVMOD) { 537 538 // Must have a name. 539 if (BC_ERR(p->l.t != BC_LEX_NAME)) bc_parse_err(p, BC_ERR_PARSE_TOKEN); 540 541 // This is safe because the next token should not overwrite the name. 542 bc_lex_next(&p->l); 543 544 // Must have a left bracket. 545 if (BC_ERR(p->l.t != BC_LEX_LBRACKET)) 546 bc_parse_err(p, BC_ERR_PARSE_TOKEN); 547 548 // This is safe because the next token should not overwrite the name. 549 bc_lex_next(&p->l); 550 551 // Must have a right bracket. 552 if (BC_ERR(p->l.t != BC_LEX_RBRACKET)) 553 bc_parse_err(p, BC_ERR_PARSE_TOKEN); 554 555 // This is safe because the next token should not overwrite the name. 556 bc_lex_next(&p->l); 557 } 558 else bc_parse_expr_status(p, flags, bc_parse_next_rel); 559 560 // Must have a right paren. 561 if (BC_ERR(p->l.t != BC_LEX_RPAREN)) 562 bc_parse_err(p, BC_ERR_PARSE_TOKEN); 563 564 // Adjust previous based on the token and push it. 565 *prev = type - BC_LEX_KW_MODEXP + BC_INST_MODEXP; 566 bc_parse_push(p, *prev); 567 568 // If we have divmod, we need to assign the modulus to the array element, so 569 // we need to push the instructions for doing so. 570 if (type == BC_LEX_KW_DIVMOD) { 571 572 // The zeroth element. 573 bc_parse_push(p, BC_INST_ZERO); 574 bc_parse_push(p, BC_INST_ARRAY_ELEM); 575 576 // Push the array. 577 bc_parse_pushName(p, p->l.str.v, false); 578 579 // Swap them and assign. After this, the top item on the stack should 580 // be the quotient. 581 bc_parse_push(p, BC_INST_SWAP); 582 bc_parse_push(p, BC_INST_ASSIGN_NO_VAL); 583 } 584 585 bc_lex_next(&p->l); 586 } 587 588 /** 589 * Parses the scale keyword. This is special because scale can be a value or a 590 * builtin function. 591 * @param p The parser. 592 * @param type An out parameter; the instruction for the parse. 593 * @param can_assign An out parameter; whether the expression can be assigned 594 * to. 595 * @param flags The expression parsing flags for parsing a scale() arg. 596 */ 597 static void bc_parse_scale(BcParse *p, BcInst *type, 598 bool *can_assign, uint8_t flags) 599 { 600 bc_lex_next(&p->l); 601 602 // Without the left paren, it's just the keyword. 603 if (p->l.t != BC_LEX_LPAREN) { 604 605 // Set, push, and return. 606 *type = BC_INST_SCALE; 607 *can_assign = true; 608 bc_parse_push(p, BC_INST_SCALE); 609 return; 610 } 611 612 // Handle the scale function. 613 *type = BC_INST_SCALE_FUNC; 614 *can_assign = false; 615 616 // Once again, adjust the flags. 617 flags &= ~(BC_PARSE_PRINT | BC_PARSE_REL); 618 flags |= BC_PARSE_NEEDVAL; 619 620 bc_lex_next(&p->l); 621 622 bc_parse_expr_status(p, flags, bc_parse_next_rel); 623 624 // Must have a right paren. 625 if (BC_ERR(p->l.t != BC_LEX_RPAREN)) 626 bc_parse_err(p, BC_ERR_PARSE_TOKEN); 627 628 bc_parse_push(p, BC_INST_SCALE_FUNC); 629 630 bc_lex_next(&p->l); 631 } 632 633 /** 634 * Parses and increment or decrement operator. This is a bit complex. 635 * @param p The parser. 636 * @param prev An out parameter; the previous instruction pointer. 637 * @param can_assign An out parameter; whether the expression can be assigned 638 * to. 639 * @param nexs An in/out parameter; the number of expressions in the 640 * parse tree that are not used. 641 * @param flags The expression parsing flags for parsing a scale() arg. 642 */ 643 static void bc_parse_incdec(BcParse *p, BcInst *prev, bool *can_assign, 644 size_t *nexs, uint8_t flags) 645 { 646 BcLexType type; 647 uchar inst; 648 BcInst etype = *prev; 649 BcLexType last = p->l.last; 650 651 assert(prev != NULL && can_assign != NULL); 652 653 // If we can't assign to the previous token, then we have an error. 654 if (BC_ERR(last == BC_LEX_OP_INC || last == BC_LEX_OP_DEC || 655 last == BC_LEX_RPAREN)) 656 { 657 bc_parse_err(p, BC_ERR_PARSE_ASSIGN); 658 } 659 660 // Is the previous instruction for a variable? 661 if (BC_PARSE_INST_VAR(etype)) { 662 663 // If so, this is a postfix operator. 664 if (!*can_assign) bc_parse_err(p, BC_ERR_PARSE_ASSIGN); 665 666 // Only postfix uses BC_INST_INC and BC_INST_DEC. 667 *prev = inst = BC_INST_INC + (p->l.t != BC_LEX_OP_INC); 668 bc_parse_push(p, inst); 669 bc_lex_next(&p->l); 670 *can_assign = false; 671 } 672 else { 673 674 // This is a prefix operator. In that case, we just convert it to 675 // an assignment instruction. 676 *prev = inst = BC_INST_ASSIGN_PLUS + (p->l.t != BC_LEX_OP_INC); 677 678 bc_lex_next(&p->l); 679 type = p->l.t; 680 681 // Because we parse the next part of the expression 682 // right here, we need to increment this. 683 *nexs = *nexs + 1; 684 685 // Is the next token a normal identifier? 686 if (type == BC_LEX_NAME) { 687 688 // Parse the name. 689 uint8_t flags2 = flags & ~BC_PARSE_ARRAY; 690 bc_parse_name(p, prev, can_assign, flags2 | BC_PARSE_NOCALL); 691 } 692 // Is the next token a global? 693 else if (type >= BC_LEX_KW_LAST && type <= BC_LEX_KW_OBASE) { 694 bc_parse_push(p, type - BC_LEX_KW_LAST + BC_INST_LAST); 695 bc_lex_next(&p->l); 696 } 697 // Is the next token specifically scale, which needs special treatment? 698 else if (BC_NO_ERR(type == BC_LEX_KW_SCALE)) { 699 700 bc_lex_next(&p->l); 701 702 // Check that scale() was not used. 703 if (BC_ERR(p->l.t == BC_LEX_LPAREN)) 704 bc_parse_err(p, BC_ERR_PARSE_TOKEN); 705 else bc_parse_push(p, BC_INST_SCALE); 706 } 707 // Now we know we have an error. 708 else bc_parse_err(p, BC_ERR_PARSE_TOKEN); 709 710 *can_assign = false; 711 712 bc_parse_push(p, BC_INST_ONE); 713 bc_parse_push(p, inst); 714 } 715 } 716 717 /** 718 * Parses the minus operator. This needs special treatment because it is either 719 * subtract or negation. 720 * @param p The parser. 721 * @param prev An in/out parameter; the previous instruction. 722 * @param ops_bgn The size of the operator stack. 723 * @param rparen True if the last token was a right paren. 724 * @param binlast True if the last token was a binary operator. 725 * @param nexprs An in/out parameter; the number of unused expressions. 726 */ 727 static void bc_parse_minus(BcParse *p, BcInst *prev, size_t ops_bgn, 728 bool rparen, bool binlast, size_t *nexprs) 729 { 730 BcLexType type; 731 732 bc_lex_next(&p->l); 733 734 // Figure out if it's a minus or a negation. 735 type = BC_PARSE_LEAF(*prev, binlast, rparen) ? BC_LEX_OP_MINUS : BC_LEX_NEG; 736 *prev = BC_PARSE_TOKEN_INST(type); 737 738 // We can just push onto the op stack because this is the largest 739 // precedence operator that gets pushed. Inc/dec does not. 740 if (type != BC_LEX_OP_MINUS) bc_vec_push(&p->ops, &type); 741 else bc_parse_operator(p, type, ops_bgn, nexprs); 742 } 743 744 /** 745 * Parses a string. 746 * @param p The parser. 747 * @param inst The instruction corresponding to how the string was found and 748 * how it should be printed. 749 */ 750 static void bc_parse_str(BcParse *p, BcInst inst) { 751 bc_parse_addString(p); 752 bc_parse_push(p, inst); 753 bc_lex_next(&p->l); 754 } 755 756 /** 757 * Parses a print statement. 758 * @param p The parser. 759 */ 760 static void bc_parse_print(BcParse *p, BcLexType type) { 761 762 BcLexType t; 763 bool comma = false; 764 BcInst inst = type == BC_LEX_KW_STREAM ? 765 BC_INST_PRINT_STREAM : BC_INST_PRINT_POP; 766 767 bc_lex_next(&p->l); 768 769 t = p->l.t; 770 771 // A print or stream statement has to have *something*. 772 if (bc_parse_isDelimiter(p)) bc_parse_err(p, BC_ERR_PARSE_PRINT); 773 774 do { 775 776 // If the token is a string, then print it with escapes. 777 // BC_INST_PRINT_POP plays that role for bc. 778 if (t == BC_LEX_STR) bc_parse_str(p, inst); 779 else { 780 // We have an actual number; parse and add a print instruction. 781 bc_parse_expr_status(p, BC_PARSE_NEEDVAL, bc_parse_next_print); 782 bc_parse_push(p, inst); 783 } 784 785 // Is the next token a comma? 786 comma = (p->l.t == BC_LEX_COMMA); 787 788 // Get the next token if we have a comma. 789 if (comma) bc_lex_next(&p->l); 790 else { 791 792 // If we don't have a comma, the statement needs to end. 793 if (!bc_parse_isDelimiter(p)) 794 bc_parse_err(p, BC_ERR_PARSE_TOKEN); 795 else break; 796 } 797 798 t = p->l.t; 799 800 } while (true); 801 802 // If we have a comma but no token, that's bad. 803 if (BC_ERR(comma)) bc_parse_err(p, BC_ERR_PARSE_TOKEN); 804 } 805 806 /** 807 * Parses a return statement. 808 * @param p The parser. 809 */ 810 static void bc_parse_return(BcParse *p) { 811 812 BcLexType t; 813 bool paren; 814 uchar inst = BC_INST_RET0; 815 816 // If we are not in a function, that's an error. 817 if (BC_ERR(!BC_PARSE_FUNC(p))) bc_parse_err(p, BC_ERR_PARSE_TOKEN); 818 819 // If we are in a void function, make sure to return void. 820 if (p->func->voidfn) inst = BC_INST_RET_VOID; 821 822 bc_lex_next(&p->l); 823 824 t = p->l.t; 825 paren = (t == BC_LEX_LPAREN); 826 827 // An empty return statement just needs to push the selected instruction. 828 if (bc_parse_isDelimiter(p)) bc_parse_push(p, inst); 829 else { 830 831 BcParseStatus s; 832 833 // Need to parse the expression whose value will be returned. 834 s = bc_parse_expr_err(p, BC_PARSE_NEEDVAL, bc_parse_next_expr); 835 836 // If the expression was empty, just push the selected instruction. 837 if (s == BC_PARSE_STATUS_EMPTY_EXPR) { 838 bc_parse_push(p, inst); 839 bc_lex_next(&p->l); 840 } 841 842 // POSIX requires parentheses. 843 if (!paren || p->l.last != BC_LEX_RPAREN) { 844 bc_parse_err(p, BC_ERR_POSIX_RET); 845 } 846 847 // Void functions require an empty expression. 848 if (BC_ERR(p->func->voidfn)) { 849 if (s != BC_PARSE_STATUS_EMPTY_EXPR) 850 bc_parse_verr(p, BC_ERR_PARSE_RET_VOID, p->func->name); 851 } 852 // If we got here, we want to be sure to end the function with a real 853 // return instruction, just in case. 854 else bc_parse_push(p, BC_INST_RET); 855 } 856 } 857 858 /** 859 * Clears flags that indicate the end of an if statement and its block and sets 860 * the jump location. 861 * @param p The parser. 862 */ 863 static void bc_parse_noElse(BcParse *p) { 864 uint16_t *flag_ptr = BC_PARSE_TOP_FLAG_PTR(p); 865 *flag_ptr = (*flag_ptr & ~(BC_PARSE_FLAG_IF_END)); 866 bc_parse_setLabel(p); 867 } 868 869 /** 870 * Ends (finishes parsing) the body of a control statement or a function. 871 * @param p The parser. 872 * @param brace True if the body was ended by a brace, false otherwise. 873 */ 874 static void bc_parse_endBody(BcParse *p, bool brace) { 875 876 bool has_brace, new_else = false; 877 878 // We cannot be ending a body if there are no bodies to end. 879 if (BC_ERR(p->flags.len <= 1)) bc_parse_err(p, BC_ERR_PARSE_TOKEN); 880 881 if (brace) { 882 883 // The brace was already gotten; make sure that the caller did not lie. 884 // We check for the requirement of braces later. 885 assert(p->l.t == BC_LEX_RBRACE); 886 887 bc_lex_next(&p->l); 888 889 // If the next token is not a delimiter, that is a problem. 890 if (BC_ERR(!bc_parse_isDelimiter(p))) 891 bc_parse_err(p, BC_ERR_PARSE_TOKEN); 892 } 893 894 // Do we have a brace flag? 895 has_brace = (BC_PARSE_BRACE(p) != 0); 896 897 do { 898 size_t len = p->flags.len; 899 bool loop; 900 901 // If we have a brace flag but not a brace, that's a problem. 902 if (has_brace && !brace) bc_parse_err(p, BC_ERR_PARSE_TOKEN); 903 904 // Are we inside a loop? 905 loop = (BC_PARSE_LOOP_INNER(p) != 0); 906 907 // If we are ending a loop or an else... 908 if (loop || BC_PARSE_ELSE(p)) { 909 910 // Loops have condition labels that we have to take care of as well. 911 if (loop) { 912 913 size_t *label = bc_vec_top(&p->conds); 914 915 bc_parse_push(p, BC_INST_JUMP); 916 bc_parse_pushIndex(p, *label); 917 918 bc_vec_pop(&p->conds); 919 } 920 921 bc_parse_setLabel(p); 922 bc_vec_pop(&p->flags); 923 } 924 // If we are ending a function... 925 else if (BC_PARSE_FUNC_INNER(p)) { 926 BcInst inst = (p->func->voidfn ? BC_INST_RET_VOID : BC_INST_RET0); 927 bc_parse_push(p, inst); 928 bc_parse_updateFunc(p, BC_PROG_MAIN); 929 bc_vec_pop(&p->flags); 930 } 931 // If we have a brace flag and not an if statement, we can pop the top 932 // of the flags stack because they have been taken care of above. 933 else if (has_brace && !BC_PARSE_IF(p)) bc_vec_pop(&p->flags); 934 935 // This needs to be last to parse nested if's properly. 936 if (BC_PARSE_IF(p) && (len == p->flags.len || !BC_PARSE_BRACE(p))) { 937 938 // Eat newlines. 939 while (p->l.t == BC_LEX_NLINE) bc_lex_next(&p->l); 940 941 // *Now* we can pop the flags. 942 bc_vec_pop(&p->flags); 943 944 // If we are allowed non-POSIX stuff... 945 if (!BC_S) { 946 947 // Have we found yet another dangling else? 948 *(BC_PARSE_TOP_FLAG_PTR(p)) |= BC_PARSE_FLAG_IF_END; 949 new_else = (p->l.t == BC_LEX_KW_ELSE); 950 951 // Parse the else or end the if statement body. 952 if (new_else) bc_parse_else(p); 953 else if (!has_brace && (!BC_PARSE_IF_END(p) || brace)) 954 bc_parse_noElse(p); 955 } 956 // POSIX requires us to do the bare minimum only. 957 else bc_parse_noElse(p); 958 } 959 960 // If these are both true, we have "used" the braces that we found. 961 if (brace && has_brace) brace = false; 962 963 // This condition was perhaps the hardest single part of the parser. If the 964 // flags stack does not have enough, we should stop. If we have a new else 965 // statement, we should stop. If we do have the end of an if statement and 966 // we have eaten the brace, we should stop. If we do have a brace flag, we 967 // should stop. 968 } while (p->flags.len > 1 && !new_else && (!BC_PARSE_IF_END(p) || brace) && 969 !(has_brace = (BC_PARSE_BRACE(p) != 0))); 970 971 // If we have a brace, yet no body for it, that's a problem. 972 if (BC_ERR(p->flags.len == 1 && brace)) 973 bc_parse_err(p, BC_ERR_PARSE_TOKEN); 974 else if (brace && BC_PARSE_BRACE(p)) { 975 976 // If we make it here, we have a brace and a flag for it. 977 uint16_t flags = BC_PARSE_TOP_FLAG(p); 978 979 // This condition ensure that the *last* body is correctly finished by 980 // popping its flags. 981 if (!(flags & (BC_PARSE_FLAG_FUNC_INNER | BC_PARSE_FLAG_LOOP_INNER)) && 982 !(flags & (BC_PARSE_FLAG_IF | BC_PARSE_FLAG_ELSE)) && 983 !(flags & (BC_PARSE_FLAG_IF_END))) 984 { 985 bc_vec_pop(&p->flags); 986 } 987 } 988 } 989 990 /** 991 * Starts the body of a control statement or function. 992 * @param p The parser. 993 * @param flags The current flags (will be edited). 994 */ 995 static void bc_parse_startBody(BcParse *p, uint16_t flags) { 996 assert(flags); 997 flags |= (BC_PARSE_TOP_FLAG(p) & (BC_PARSE_FLAG_FUNC | BC_PARSE_FLAG_LOOP)); 998 flags |= BC_PARSE_FLAG_BODY; 999 bc_vec_push(&p->flags, &flags); 1000 } 1001 1002 /** 1003 * Parses an if statement. 1004 * @param p The parser. 1005 */ 1006 static void bc_parse_if(BcParse *p) { 1007 1008 // We are allowed relational operators, and we must have a value. 1009 size_t idx; 1010 uint8_t flags = (BC_PARSE_REL | BC_PARSE_NEEDVAL); 1011 1012 // Get the left paren and barf if necessary. 1013 bc_lex_next(&p->l); 1014 if (BC_ERR(p->l.t != BC_LEX_LPAREN)) 1015 bc_parse_err(p, BC_ERR_PARSE_TOKEN); 1016 1017 // Parse the condition. 1018 bc_lex_next(&p->l); 1019 bc_parse_expr_status(p, flags, bc_parse_next_rel); 1020 1021 // Must have a right paren. 1022 if (BC_ERR(p->l.t != BC_LEX_RPAREN)) 1023 bc_parse_err(p, BC_ERR_PARSE_TOKEN); 1024 1025 bc_lex_next(&p->l); 1026 1027 // Insert the conditional jump instruction. 1028 bc_parse_push(p, BC_INST_JUMP_ZERO); 1029 1030 idx = p->func->labels.len; 1031 1032 // Push the index for the instruction and create an exit label for an else 1033 // statement. 1034 bc_parse_pushIndex(p, idx); 1035 bc_parse_createExitLabel(p, idx, false); 1036 1037 bc_parse_startBody(p, BC_PARSE_FLAG_IF); 1038 } 1039 1040 /** 1041 * Parses an else statement. 1042 * @param p The parser. 1043 */ 1044 static void bc_parse_else(BcParse *p) { 1045 1046 size_t idx = p->func->labels.len; 1047 1048 // We must be at the end of an if statement. 1049 if (BC_ERR(!BC_PARSE_IF_END(p))) 1050 bc_parse_err(p, BC_ERR_PARSE_TOKEN); 1051 1052 // Push an unconditional jump to make bc jump over the else statement if it 1053 // executed the original if statement. 1054 bc_parse_push(p, BC_INST_JUMP); 1055 bc_parse_pushIndex(p, idx); 1056 1057 // Clear the else stuff. Yes, that function is misnamed for its use here, 1058 // but deal with it. 1059 bc_parse_noElse(p); 1060 1061 // Create the exit label and parse the body. 1062 bc_parse_createExitLabel(p, idx, false); 1063 bc_parse_startBody(p, BC_PARSE_FLAG_ELSE); 1064 1065 bc_lex_next(&p->l); 1066 } 1067 1068 /** 1069 * Parse a while loop. 1070 * @param p The parser. 1071 */ 1072 static void bc_parse_while(BcParse *p) { 1073 1074 // We are allowed relational operators, and we must have a value. 1075 size_t idx; 1076 uint8_t flags = (BC_PARSE_REL | BC_PARSE_NEEDVAL); 1077 1078 // Get the left paren and barf if necessary. 1079 bc_lex_next(&p->l); 1080 if (BC_ERR(p->l.t != BC_LEX_LPAREN)) 1081 bc_parse_err(p, BC_ERR_PARSE_TOKEN); 1082 bc_lex_next(&p->l); 1083 1084 // Create the labels. Loops need both. 1085 bc_parse_createCondLabel(p, p->func->labels.len); 1086 idx = p->func->labels.len; 1087 bc_parse_createExitLabel(p, idx, true); 1088 1089 // Parse the actual condition and barf on non-right paren. 1090 bc_parse_expr_status(p, flags, bc_parse_next_rel); 1091 if (BC_ERR(p->l.t != BC_LEX_RPAREN)) 1092 bc_parse_err(p, BC_ERR_PARSE_TOKEN); 1093 bc_lex_next(&p->l); 1094 1095 // Now we can push the conditional jump and start the body. 1096 bc_parse_push(p, BC_INST_JUMP_ZERO); 1097 bc_parse_pushIndex(p, idx); 1098 bc_parse_startBody(p, BC_PARSE_FLAG_LOOP | BC_PARSE_FLAG_LOOP_INNER); 1099 } 1100 1101 /** 1102 * Parse a for loop. 1103 * @param p The parser. 1104 */ 1105 static void bc_parse_for(BcParse *p) { 1106 1107 size_t cond_idx, exit_idx, body_idx, update_idx; 1108 1109 // Barf on the missing left paren. 1110 bc_lex_next(&p->l); 1111 if (BC_ERR(p->l.t != BC_LEX_LPAREN)) 1112 bc_parse_err(p, BC_ERR_PARSE_TOKEN); 1113 bc_lex_next(&p->l); 1114 1115 // The first statement can be empty, but if it is, check for error in POSIX 1116 // mode. Otherwise, parse it. 1117 if (p->l.t != BC_LEX_SCOLON) 1118 bc_parse_expr_status(p, 0, bc_parse_next_for); 1119 else bc_parse_err(p, BC_ERR_POSIX_FOR); 1120 1121 // Must have a semicolon. 1122 if (BC_ERR(p->l.t != BC_LEX_SCOLON)) bc_parse_err(p, BC_ERR_PARSE_TOKEN); 1123 bc_lex_next(&p->l); 1124 1125 // These are indices for labels. There are so many of them because the end 1126 // of the loop must unconditionally jump to the update code. Then the update 1127 // code must unconditionally jump to the condition code. Then the condition 1128 // code must *conditionally* jump to the exit. 1129 cond_idx = p->func->labels.len; 1130 update_idx = cond_idx + 1; 1131 body_idx = update_idx + 1; 1132 exit_idx = body_idx + 1; 1133 1134 // This creates the condition label. 1135 bc_parse_createLabel(p, p->func->code.len); 1136 1137 // Parse an expression if it exists. 1138 if (p->l.t != BC_LEX_SCOLON) { 1139 uint8_t flags = (BC_PARSE_REL | BC_PARSE_NEEDVAL); 1140 bc_parse_expr_status(p, flags, bc_parse_next_for); 1141 } 1142 else { 1143 1144 // Set this for the next call to bc_parse_number because an empty 1145 // condition means that it is an infinite loop, so the condition must be 1146 // non-zero. This is safe to set because the current token is a 1147 // semicolon, which has no string requirement. 1148 bc_vec_string(&p->l.str, sizeof(bc_parse_one) - 1, bc_parse_one); 1149 bc_parse_number(p); 1150 1151 // An empty condition makes POSIX mad. 1152 bc_parse_err(p, BC_ERR_POSIX_FOR); 1153 } 1154 1155 // Must have a semicolon. 1156 if (BC_ERR(p->l.t != BC_LEX_SCOLON)) 1157 bc_parse_err(p, BC_ERR_PARSE_TOKEN); 1158 bc_lex_next(&p->l); 1159 1160 // Now we can set up the conditional jump to the exit and an unconditional 1161 // jump to the body right after. The unconditional jump to the body is 1162 // because there is update code coming right after the condition, so we need 1163 // to skip it to get to the body. 1164 bc_parse_push(p, BC_INST_JUMP_ZERO); 1165 bc_parse_pushIndex(p, exit_idx); 1166 bc_parse_push(p, BC_INST_JUMP); 1167 bc_parse_pushIndex(p, body_idx); 1168 1169 // Now create the label for the update code. 1170 bc_parse_createCondLabel(p, update_idx); 1171 1172 // Parse if not empty, and if it is, let POSIX yell if necessary. 1173 if (p->l.t != BC_LEX_RPAREN) 1174 bc_parse_expr_status(p, 0, bc_parse_next_rel); 1175 else bc_parse_err(p, BC_ERR_POSIX_FOR); 1176 1177 // Must have a right paren. 1178 if (BC_ERR(p->l.t != BC_LEX_RPAREN)) 1179 bc_parse_err(p, BC_ERR_PARSE_TOKEN); 1180 1181 // Set up a jump to the condition right after the update code. 1182 bc_parse_push(p, BC_INST_JUMP); 1183 bc_parse_pushIndex(p, cond_idx); 1184 bc_parse_createLabel(p, p->func->code.len); 1185 1186 // Create an exit label for the body and start the body. 1187 bc_parse_createExitLabel(p, exit_idx, true); 1188 bc_lex_next(&p->l); 1189 bc_parse_startBody(p, BC_PARSE_FLAG_LOOP | BC_PARSE_FLAG_LOOP_INNER); 1190 } 1191 1192 /** 1193 * Parse a statement or token that indicates a loop exit. This includes an 1194 * actual loop exit, the break keyword, or the continue keyword. 1195 * @param p The parser. 1196 * @param type The type of exit. 1197 */ 1198 static void bc_parse_loopExit(BcParse *p, BcLexType type) { 1199 1200 size_t i; 1201 BcInstPtr *ip; 1202 1203 // Must have a loop. If we don't, that's an error. 1204 if (BC_ERR(!BC_PARSE_LOOP(p))) bc_parse_err(p, BC_ERR_PARSE_TOKEN); 1205 1206 // If we have a break statement... 1207 if (type == BC_LEX_KW_BREAK) { 1208 1209 // If there are no exits, something went wrong somewhere. 1210 if (BC_ERR(!p->exits.len)) bc_parse_err(p, BC_ERR_PARSE_TOKEN); 1211 1212 // Get the exit. 1213 i = p->exits.len - 1; 1214 ip = bc_vec_item(&p->exits, i); 1215 1216 // The condition !ip->func is true if the exit is not for a loop, so we 1217 // need to find the first actual loop exit. 1218 while (!ip->func && i < p->exits.len) ip = bc_vec_item(&p->exits, i--); 1219 1220 // Make sure everything is hunky dory. 1221 assert(ip != NULL && (i < p->exits.len || ip->func)); 1222 1223 // Set the index for the exit. 1224 i = ip->idx; 1225 } 1226 // If we have a continue statement or just the loop end, jump to the 1227 // condition (or update for a foor loop). 1228 else i = *((size_t*) bc_vec_top(&p->conds)); 1229 1230 // Add the unconditional jump. 1231 bc_parse_push(p, BC_INST_JUMP); 1232 bc_parse_pushIndex(p, i); 1233 1234 bc_lex_next(&p->l); 1235 } 1236 1237 /** 1238 * Parse a function (header). 1239 * @param p The parser. 1240 */ 1241 static void bc_parse_func(BcParse *p) { 1242 1243 bool comma = false, voidfn; 1244 uint16_t flags; 1245 size_t idx; 1246 1247 bc_lex_next(&p->l); 1248 1249 // Must have a name. 1250 if (BC_ERR(p->l.t != BC_LEX_NAME)) bc_parse_err(p, BC_ERR_PARSE_FUNC); 1251 1252 // If the name is "void", and POSIX is not on, mark as void. 1253 voidfn = (!BC_IS_POSIX && p->l.t == BC_LEX_NAME && 1254 !strcmp(p->l.str.v, "void")); 1255 1256 // We can safely do this because the expected token should not overwrite the 1257 // function name. 1258 bc_lex_next(&p->l); 1259 1260 // If we *don't* have another name, then void is the name of the function. 1261 voidfn = (voidfn && p->l.t == BC_LEX_NAME); 1262 1263 // With a void function, allow POSIX to complain and get a new token. 1264 if (voidfn) { 1265 1266 bc_parse_err(p, BC_ERR_POSIX_VOID); 1267 1268 // We can safely do this because the expected token should not overwrite 1269 // the function name. 1270 bc_lex_next(&p->l); 1271 } 1272 1273 // Must have a left paren. 1274 if (BC_ERR(p->l.t != BC_LEX_LPAREN)) 1275 bc_parse_err(p, BC_ERR_PARSE_FUNC); 1276 1277 // Make sure the functions map and vector are synchronized. 1278 assert(p->prog->fns.len == p->prog->fn_map.len); 1279 1280 // Must lock signals because vectors are changed, and the vector functions 1281 // expect signals to be locked. 1282 BC_SIG_LOCK; 1283 1284 // Insert the function by name into the map and vector. 1285 idx = bc_program_insertFunc(p->prog, p->l.str.v); 1286 1287 BC_SIG_UNLOCK; 1288 1289 // Make sure the insert worked. 1290 assert(idx); 1291 1292 // Update the function pointer and stuff in the parser and set its void. 1293 bc_parse_updateFunc(p, idx); 1294 p->func->voidfn = voidfn; 1295 1296 bc_lex_next(&p->l); 1297 1298 // While we do not have a right paren, we are still parsing arguments. 1299 while (p->l.t != BC_LEX_RPAREN) { 1300 1301 BcType t = BC_TYPE_VAR; 1302 1303 // If we have an asterisk, we are parsing a reference argument. 1304 if (p->l.t == BC_LEX_OP_MULTIPLY) { 1305 1306 t = BC_TYPE_REF; 1307 bc_lex_next(&p->l); 1308 1309 // Let POSIX complain if necessary. 1310 bc_parse_err(p, BC_ERR_POSIX_REF); 1311 } 1312 1313 // If we don't have a name, the argument will not have a name. Barf. 1314 if (BC_ERR(p->l.t != BC_LEX_NAME)) 1315 bc_parse_err(p, BC_ERR_PARSE_FUNC); 1316 1317 // Increment the number of parameters. 1318 p->func->nparams += 1; 1319 1320 // Copy the string in the lexer so that we can use the lexer again. 1321 bc_vec_string(&p->buf, p->l.str.len, p->l.str.v); 1322 1323 bc_lex_next(&p->l); 1324 1325 // We are parsing an array parameter if this is true. 1326 if (p->l.t == BC_LEX_LBRACKET) { 1327 1328 // Set the array type, unless we are already parsing a reference. 1329 if (t == BC_TYPE_VAR) t = BC_TYPE_ARRAY; 1330 1331 bc_lex_next(&p->l); 1332 1333 // The brackets *must* be empty. 1334 if (BC_ERR(p->l.t != BC_LEX_RBRACKET)) 1335 bc_parse_err(p, BC_ERR_PARSE_FUNC); 1336 1337 bc_lex_next(&p->l); 1338 } 1339 // If we did *not* get a bracket, but we are expecting a reference, we 1340 // have a problem. 1341 else if (BC_ERR(t == BC_TYPE_REF)) 1342 bc_parse_verr(p, BC_ERR_PARSE_REF_VAR, p->buf.v); 1343 1344 // Test for comma and get the next token if it exists. 1345 comma = (p->l.t == BC_LEX_COMMA); 1346 if (comma) bc_lex_next(&p->l); 1347 1348 // Insert the parameter into the function. 1349 bc_func_insert(p->func, p->prog, p->buf.v, t, p->l.line); 1350 } 1351 1352 // If we have a comma, but no parameter, barf. 1353 if (BC_ERR(comma)) bc_parse_err(p, BC_ERR_PARSE_FUNC); 1354 1355 // Start the body. 1356 flags = BC_PARSE_FLAG_FUNC | BC_PARSE_FLAG_FUNC_INNER; 1357 bc_parse_startBody(p, flags); 1358 1359 bc_lex_next(&p->l); 1360 1361 // POSIX requires that a brace be on the same line as the function header. 1362 // If we don't have a brace, let POSIX throw an error. 1363 if (p->l.t != BC_LEX_LBRACE) bc_parse_err(p, BC_ERR_POSIX_BRACE); 1364 } 1365 1366 /** 1367 * Parse an auto list. 1368 * @param p The parser. 1369 */ 1370 static void bc_parse_auto(BcParse *p) { 1371 1372 bool comma, one; 1373 1374 // Error if the auto keyword appeared in the wrong place. 1375 if (BC_ERR(!p->auto_part)) bc_parse_err(p, BC_ERR_PARSE_TOKEN); 1376 bc_lex_next(&p->l); 1377 1378 p->auto_part = comma = false; 1379 1380 // We need at least one variable or array. 1381 one = (p->l.t == BC_LEX_NAME); 1382 1383 // While we have a variable or array. 1384 while (p->l.t == BC_LEX_NAME) { 1385 1386 BcType t; 1387 1388 // Copy the name from the lexer, so we can use it again. 1389 bc_vec_string(&p->buf, p->l.str.len - 1, p->l.str.v); 1390 1391 bc_lex_next(&p->l); 1392 1393 // If we are parsing an array... 1394 if (p->l.t == BC_LEX_LBRACKET) { 1395 1396 t = BC_TYPE_ARRAY; 1397 1398 bc_lex_next(&p->l); 1399 1400 // The brackets *must* be empty. 1401 if (BC_ERR(p->l.t != BC_LEX_RBRACKET)) 1402 bc_parse_err(p, BC_ERR_PARSE_FUNC); 1403 1404 bc_lex_next(&p->l); 1405 } 1406 else t = BC_TYPE_VAR; 1407 1408 // Test for comma and get the next token if it exists. 1409 comma = (p->l.t == BC_LEX_COMMA); 1410 if (comma) bc_lex_next(&p->l); 1411 1412 // Insert the auto into the function. 1413 bc_func_insert(p->func, p->prog, p->buf.v, t, p->l.line); 1414 } 1415 1416 // If we have a comma, but no auto, barf. 1417 if (BC_ERR(comma)) bc_parse_err(p, BC_ERR_PARSE_FUNC); 1418 1419 // If we don't have any variables or arrays, barf. 1420 if (BC_ERR(!one)) bc_parse_err(p, BC_ERR_PARSE_NO_AUTO); 1421 1422 // The auto statement should be all that's in the statement. 1423 if (BC_ERR(!bc_parse_isDelimiter(p))) 1424 bc_parse_err(p, BC_ERR_PARSE_TOKEN); 1425 } 1426 1427 /** 1428 * Parses a body. 1429 * @param p The parser. 1430 * @param brace True if a brace was encountered, false otherwise. 1431 */ 1432 static void bc_parse_body(BcParse *p, bool brace) { 1433 1434 uint16_t *flag_ptr = BC_PARSE_TOP_FLAG_PTR(p); 1435 1436 assert(flag_ptr != NULL); 1437 assert(p->flags.len >= 2); 1438 1439 // The body flag is for when we expect a body. We got a body, so clear the 1440 // flag. 1441 *flag_ptr &= ~(BC_PARSE_FLAG_BODY); 1442 1443 // If we are inside a function, that means we just barely entered it, and 1444 // we can expect an auto list. 1445 if (*flag_ptr & BC_PARSE_FLAG_FUNC_INNER) { 1446 1447 // We *must* have a brace in this case. 1448 if (BC_ERR(!brace)) bc_parse_err(p, BC_ERR_PARSE_TOKEN); 1449 1450 p->auto_part = (p->l.t != BC_LEX_KW_AUTO); 1451 1452 if (!p->auto_part) { 1453 1454 // Make sure this is true to not get a parse error. 1455 p->auto_part = true; 1456 1457 // Since we already have the auto keyword, parse. 1458 bc_parse_auto(p); 1459 } 1460 1461 // Eat a newline. 1462 if (p->l.t == BC_LEX_NLINE) bc_lex_next(&p->l); 1463 } 1464 else { 1465 1466 // This is the easy part. 1467 size_t len = p->flags.len; 1468 1469 assert(*flag_ptr); 1470 1471 // Parse a statement. 1472 bc_parse_stmt(p); 1473 1474 // This is a very important condition to get right. If there is no 1475 // brace, and no body flag, and the flags len hasn't shrunk, then we 1476 // have a body that was not delimited by braces, so we need to end it 1477 // now, after just one statement. 1478 if (!brace && !BC_PARSE_BODY(p) && len <= p->flags.len) 1479 bc_parse_endBody(p, false); 1480 } 1481 } 1482 1483 /** 1484 * Parses a statement. This is the entry point for just about everything, except 1485 * function definitions. 1486 * @param p The parser. 1487 */ 1488 static void bc_parse_stmt(BcParse *p) { 1489 1490 size_t len; 1491 uint16_t flags; 1492 BcLexType type = p->l.t; 1493 1494 // Eat newline. 1495 if (type == BC_LEX_NLINE) { 1496 bc_lex_next(&p->l); 1497 return; 1498 } 1499 1500 // Eat auto list. 1501 if (type == BC_LEX_KW_AUTO) { 1502 bc_parse_auto(p); 1503 return; 1504 } 1505 1506 // If we reach this point, no auto list is allowed. 1507 p->auto_part = false; 1508 1509 // Everything but an else needs to be taken care of here, but else is 1510 // special. 1511 if (type != BC_LEX_KW_ELSE) { 1512 1513 // After an if, no else found. 1514 if (BC_PARSE_IF_END(p)) { 1515 1516 // Clear the expectation for else, end body, and return. Returning 1517 // gives us a clean slate for parsing again. 1518 bc_parse_noElse(p); 1519 if (p->flags.len > 1 && !BC_PARSE_BRACE(p)) 1520 bc_parse_endBody(p, false); 1521 return; 1522 } 1523 // With a left brace, we are parsing a body. 1524 else if (type == BC_LEX_LBRACE) { 1525 1526 // We need to start a body if we are not expecting one yet. 1527 if (!BC_PARSE_BODY(p)) { 1528 bc_parse_startBody(p, BC_PARSE_FLAG_BRACE); 1529 bc_lex_next(&p->l); 1530 } 1531 // If we *are* expecting a body, that body should get a brace. This 1532 // takes care of braces being on a different line than if and loop 1533 // headers. 1534 else { 1535 *(BC_PARSE_TOP_FLAG_PTR(p)) |= BC_PARSE_FLAG_BRACE; 1536 bc_lex_next(&p->l); 1537 bc_parse_body(p, true); 1538 } 1539 1540 // If we have reached this point, we need to return for a clean 1541 // slate. 1542 return; 1543 } 1544 // This happens when we are expecting a body and get a single statement, 1545 // i.e., a body with no braces surrounding it. Returns after for a clean 1546 // slate. 1547 else if (BC_PARSE_BODY(p) && !BC_PARSE_BRACE(p)) { 1548 bc_parse_body(p, false); 1549 return; 1550 } 1551 } 1552 1553 len = p->flags.len; 1554 flags = BC_PARSE_TOP_FLAG(p); 1555 1556 switch (type) { 1557 1558 // All of these are valid for expressions. 1559 case BC_LEX_OP_INC: 1560 case BC_LEX_OP_DEC: 1561 case BC_LEX_OP_MINUS: 1562 case BC_LEX_OP_BOOL_NOT: 1563 case BC_LEX_LPAREN: 1564 case BC_LEX_NAME: 1565 case BC_LEX_NUMBER: 1566 case BC_LEX_KW_IBASE: 1567 case BC_LEX_KW_LAST: 1568 case BC_LEX_KW_LENGTH: 1569 case BC_LEX_KW_OBASE: 1570 case BC_LEX_KW_SCALE: 1571 #if BC_ENABLE_EXTRA_MATH 1572 case BC_LEX_KW_SEED: 1573 #endif // BC_ENABLE_EXTRA_MATH 1574 case BC_LEX_KW_SQRT: 1575 case BC_LEX_KW_ABS: 1576 #if BC_ENABLE_EXTRA_MATH 1577 case BC_LEX_KW_IRAND: 1578 #endif // BC_ENABLE_EXTRA_MATH 1579 case BC_LEX_KW_ASCIIFY: 1580 case BC_LEX_KW_MODEXP: 1581 case BC_LEX_KW_DIVMOD: 1582 case BC_LEX_KW_READ: 1583 #if BC_ENABLE_EXTRA_MATH 1584 case BC_LEX_KW_RAND: 1585 #endif // BC_ENABLE_EXTRA_MATH 1586 case BC_LEX_KW_MAXIBASE: 1587 case BC_LEX_KW_MAXOBASE: 1588 case BC_LEX_KW_MAXSCALE: 1589 #if BC_ENABLE_EXTRA_MATH 1590 case BC_LEX_KW_MAXRAND: 1591 #endif // BC_ENABLE_EXTRA_MATH 1592 { 1593 bc_parse_expr_status(p, BC_PARSE_PRINT, bc_parse_next_expr); 1594 break; 1595 } 1596 1597 case BC_LEX_KW_ELSE: 1598 { 1599 bc_parse_else(p); 1600 break; 1601 } 1602 1603 // Just eat. 1604 case BC_LEX_SCOLON: 1605 { 1606 // Do nothing. 1607 break; 1608 } 1609 1610 case BC_LEX_RBRACE: 1611 { 1612 bc_parse_endBody(p, true); 1613 break; 1614 } 1615 1616 case BC_LEX_STR: 1617 { 1618 bc_parse_str(p, BC_INST_PRINT_STR); 1619 break; 1620 } 1621 1622 case BC_LEX_KW_BREAK: 1623 case BC_LEX_KW_CONTINUE: 1624 { 1625 bc_parse_loopExit(p, p->l.t); 1626 break; 1627 } 1628 1629 case BC_LEX_KW_FOR: 1630 { 1631 bc_parse_for(p); 1632 break; 1633 } 1634 1635 case BC_LEX_KW_HALT: 1636 { 1637 bc_parse_push(p, BC_INST_HALT); 1638 bc_lex_next(&p->l); 1639 break; 1640 } 1641 1642 case BC_LEX_KW_IF: 1643 { 1644 bc_parse_if(p); 1645 break; 1646 } 1647 1648 case BC_LEX_KW_LIMITS: 1649 { 1650 // `limits` is a compile-time command, so execute it right away. 1651 bc_vm_printf("BC_LONG_BIT = %lu\n", (ulong) BC_LONG_BIT); 1652 bc_vm_printf("BC_BASE_DIGS = %lu\n", (ulong) BC_BASE_DIGS); 1653 bc_vm_printf("BC_BASE_POW = %lu\n", (ulong) BC_BASE_POW); 1654 bc_vm_printf("BC_OVERFLOW_MAX = %lu\n", (ulong) BC_NUM_BIGDIG_MAX); 1655 bc_vm_printf("\n"); 1656 bc_vm_printf("BC_BASE_MAX = %lu\n", BC_MAX_OBASE); 1657 bc_vm_printf("BC_DIM_MAX = %lu\n", BC_MAX_DIM); 1658 bc_vm_printf("BC_SCALE_MAX = %lu\n", BC_MAX_SCALE); 1659 bc_vm_printf("BC_STRING_MAX = %lu\n", BC_MAX_STRING); 1660 bc_vm_printf("BC_NAME_MAX = %lu\n", BC_MAX_NAME); 1661 bc_vm_printf("BC_NUM_MAX = %lu\n", BC_MAX_NUM); 1662 #if BC_ENABLE_EXTRA_MATH 1663 bc_vm_printf("BC_RAND_MAX = %lu\n", BC_MAX_RAND); 1664 #endif // BC_ENABLE_EXTRA_MATH 1665 bc_vm_printf("MAX Exponent = %lu\n", BC_MAX_EXP); 1666 bc_vm_printf("Number of vars = %lu\n", BC_MAX_VARS); 1667 1668 bc_lex_next(&p->l); 1669 1670 break; 1671 } 1672 1673 case BC_LEX_KW_STREAM: 1674 case BC_LEX_KW_PRINT: 1675 { 1676 bc_parse_print(p, type); 1677 break; 1678 } 1679 1680 case BC_LEX_KW_QUIT: 1681 { 1682 // Quit is a compile-time command. We don't exit directly, so the vm 1683 // can clean up. 1684 vm.status = BC_STATUS_QUIT; 1685 BC_JMP; 1686 break; 1687 } 1688 1689 case BC_LEX_KW_RETURN: 1690 { 1691 bc_parse_return(p); 1692 break; 1693 } 1694 1695 case BC_LEX_KW_WHILE: 1696 { 1697 bc_parse_while(p); 1698 break; 1699 } 1700 1701 default: 1702 { 1703 bc_parse_err(p, BC_ERR_PARSE_TOKEN); 1704 } 1705 } 1706 1707 // If the flags did not change, we expect a delimiter. 1708 if (len == p->flags.len && flags == BC_PARSE_TOP_FLAG(p)) { 1709 if (BC_ERR(!bc_parse_isDelimiter(p))) 1710 bc_parse_err(p, BC_ERR_PARSE_TOKEN); 1711 } 1712 1713 // Make sure semicolons are eaten. 1714 while (p->l.t == BC_LEX_SCOLON) bc_lex_next(&p->l); 1715 } 1716 1717 void bc_parse_parse(BcParse *p) { 1718 1719 assert(p); 1720 1721 BC_SETJMP(exit); 1722 1723 // We should not let an EOF get here unless some partial parse was not 1724 // completed, in which case, it's the user's fault. 1725 if (BC_ERR(p->l.t == BC_LEX_EOF)) bc_parse_err(p, BC_ERR_PARSE_EOF); 1726 1727 // Functions need special parsing. 1728 else if (p->l.t == BC_LEX_KW_DEFINE) { 1729 if (BC_ERR(BC_PARSE_NO_EXEC(p))) 1730 bc_parse_err(p, BC_ERR_PARSE_TOKEN); 1731 bc_parse_func(p); 1732 } 1733 1734 // Otherwise, parse a normal statement. 1735 else bc_parse_stmt(p); 1736 1737 exit: 1738 1739 BC_SIG_MAYLOCK; 1740 1741 // We need to reset on error. 1742 if (BC_ERR(((vm.status && vm.status != BC_STATUS_QUIT) || vm.sig))) 1743 bc_parse_reset(p); 1744 1745 BC_LONGJMP_CONT; 1746 } 1747 1748 /** 1749 * Parse an expression. This is the actual implementation of the Shunting-Yard 1750 * Algorithm. 1751 * @param p The parser. 1752 * @param flags The flags for what is valid in the expression. 1753 * @param next A set of tokens for what is valid *after* the expression. 1754 * @return A parse status. In some places, an empty expression is an 1755 * error, and sometimes, it is required. This allows this function 1756 * to tell the caller if the expression was empty and let the 1757 * caller handle it. 1758 */ 1759 static BcParseStatus bc_parse_expr_err(BcParse *p, uint8_t flags, 1760 BcParseNext next) 1761 { 1762 BcInst prev = BC_INST_PRINT; 1763 uchar inst = BC_INST_INVALID; 1764 BcLexType top, t; 1765 size_t nexprs, ops_bgn; 1766 uint32_t i, nparens, nrelops; 1767 bool pfirst, rprn, done, get_token, assign, bin_last, incdec, can_assign; 1768 1769 // One of these *must* be true. 1770 assert(!(flags & BC_PARSE_PRINT) || !(flags & BC_PARSE_NEEDVAL)); 1771 1772 // These are set very carefully. In fact, controlling the values of these 1773 // locals is the biggest part of making this work. ops_bgn especially is 1774 // important because it marks where the operator stack begins for *this* 1775 // invocation of this function. That's because bc_parse_expr_err() is 1776 // recursive (the Shunting-Yard Algorithm is most easily expressed 1777 // recursively when parsing subexpressions), and each invocation needs to 1778 // know where to stop. 1779 // 1780 // - nparens is the number of left parens without matches. 1781 // - nrelops is the number of relational operators that appear in the expr. 1782 // - nexprs is the number of unused expressions. 1783 // - rprn is a right paren encountered last. 1784 // - done means the expression has been fully parsed. 1785 // - get_token is true when a token is needed at the end of an iteration. 1786 // - assign is true when an assignment statement was parsed last. 1787 // - incdec is true when the previous operator was an inc or dec operator. 1788 // - can_assign is true when an assignemnt is valid. 1789 // - bin_last is true when the previous instruction was a binary operator. 1790 t = p->l.t; 1791 pfirst = (p->l.t == BC_LEX_LPAREN); 1792 nparens = nrelops = 0; 1793 nexprs = 0; 1794 ops_bgn = p->ops.len; 1795 rprn = done = get_token = assign = incdec = can_assign = false; 1796 bin_last = true; 1797 1798 // We want to eat newlines if newlines are not a valid ending token. 1799 // This is for spacing in things like for loop headers. 1800 if (!(flags & BC_PARSE_NOREAD)) { 1801 while ((t = p->l.t) == BC_LEX_NLINE) bc_lex_next(&p->l); 1802 } 1803 1804 // This is the Shunting-Yard algorithm loop. 1805 for (; !done && BC_PARSE_EXPR(t); t = p->l.t) 1806 { 1807 switch (t) { 1808 1809 case BC_LEX_OP_INC: 1810 case BC_LEX_OP_DEC: 1811 { 1812 // These operators can only be used with items that can be 1813 // assigned to. 1814 if (BC_ERR(incdec)) bc_parse_err(p, BC_ERR_PARSE_ASSIGN); 1815 1816 bc_parse_incdec(p, &prev, &can_assign, &nexprs, flags); 1817 1818 rprn = get_token = bin_last = false; 1819 incdec = true; 1820 flags &= ~(BC_PARSE_ARRAY); 1821 1822 break; 1823 } 1824 1825 #if BC_ENABLE_EXTRA_MATH 1826 case BC_LEX_OP_TRUNC: 1827 { 1828 // The previous token must have been a leaf expression, or the 1829 // operator is in the wrong place. 1830 if (BC_ERR(!BC_PARSE_LEAF(prev, bin_last, rprn))) 1831 bc_parse_err(p, BC_ERR_PARSE_TOKEN); 1832 1833 // I can just add the instruction because 1834 // negative will already be taken care of. 1835 bc_parse_push(p, BC_INST_TRUNC); 1836 1837 rprn = can_assign = incdec = false; 1838 get_token = true; 1839 flags &= ~(BC_PARSE_ARRAY); 1840 1841 break; 1842 } 1843 #endif // BC_ENABLE_EXTRA_MATH 1844 1845 case BC_LEX_OP_MINUS: 1846 { 1847 bc_parse_minus(p, &prev, ops_bgn, rprn, bin_last, &nexprs); 1848 1849 rprn = get_token = can_assign = false; 1850 1851 // This is true if it was a binary operator last. 1852 bin_last = (prev == BC_INST_MINUS); 1853 if (bin_last) incdec = false; 1854 1855 flags &= ~(BC_PARSE_ARRAY); 1856 1857 break; 1858 } 1859 1860 // All of this group, including the fallthrough, is to parse binary 1861 // operators. 1862 case BC_LEX_OP_ASSIGN_POWER: 1863 case BC_LEX_OP_ASSIGN_MULTIPLY: 1864 case BC_LEX_OP_ASSIGN_DIVIDE: 1865 case BC_LEX_OP_ASSIGN_MODULUS: 1866 case BC_LEX_OP_ASSIGN_PLUS: 1867 case BC_LEX_OP_ASSIGN_MINUS: 1868 #if BC_ENABLE_EXTRA_MATH 1869 case BC_LEX_OP_ASSIGN_PLACES: 1870 case BC_LEX_OP_ASSIGN_LSHIFT: 1871 case BC_LEX_OP_ASSIGN_RSHIFT: 1872 #endif // BC_ENABLE_EXTRA_MATH 1873 case BC_LEX_OP_ASSIGN: 1874 { 1875 // We need to make sure the assignment is valid. 1876 if (!BC_PARSE_INST_VAR(prev)) 1877 bc_parse_err(p, BC_ERR_PARSE_ASSIGN); 1878 } 1879 // Fallthrough. 1880 BC_FALLTHROUGH 1881 1882 case BC_LEX_OP_POWER: 1883 case BC_LEX_OP_MULTIPLY: 1884 case BC_LEX_OP_DIVIDE: 1885 case BC_LEX_OP_MODULUS: 1886 case BC_LEX_OP_PLUS: 1887 #if BC_ENABLE_EXTRA_MATH 1888 case BC_LEX_OP_PLACES: 1889 case BC_LEX_OP_LSHIFT: 1890 case BC_LEX_OP_RSHIFT: 1891 #endif // BC_ENABLE_EXTRA_MATH 1892 case BC_LEX_OP_REL_EQ: 1893 case BC_LEX_OP_REL_LE: 1894 case BC_LEX_OP_REL_GE: 1895 case BC_LEX_OP_REL_NE: 1896 case BC_LEX_OP_REL_LT: 1897 case BC_LEX_OP_REL_GT: 1898 case BC_LEX_OP_BOOL_NOT: 1899 case BC_LEX_OP_BOOL_OR: 1900 case BC_LEX_OP_BOOL_AND: 1901 { 1902 // This is true if the operator if the token is a prefix 1903 // operator. This is only for boolean not. 1904 if (BC_PARSE_OP_PREFIX(t)) { 1905 1906 // Prefix operators are only allowed after binary operators 1907 // or prefix operators. 1908 if (BC_ERR(!bin_last && !BC_PARSE_OP_PREFIX(p->l.last))) 1909 bc_parse_err(p, BC_ERR_PARSE_EXPR); 1910 } 1911 // If we execute the else, that means we have a binary operator. 1912 // If the previous operator was a prefix or a binary operator, 1913 // then a binary operator is not allowed. 1914 else if (BC_ERR(BC_PARSE_PREV_PREFIX(prev) || bin_last)) 1915 bc_parse_err(p, BC_ERR_PARSE_EXPR); 1916 1917 nrelops += (t >= BC_LEX_OP_REL_EQ && t <= BC_LEX_OP_REL_GT); 1918 prev = BC_PARSE_TOKEN_INST(t); 1919 1920 bc_parse_operator(p, t, ops_bgn, &nexprs); 1921 1922 rprn = incdec = can_assign = false; 1923 get_token = true; 1924 bin_last = !BC_PARSE_OP_PREFIX(t); 1925 flags &= ~(BC_PARSE_ARRAY); 1926 1927 break; 1928 } 1929 1930 case BC_LEX_LPAREN: 1931 { 1932 // A left paren is *not* allowed right after a leaf expr. 1933 if (BC_ERR(BC_PARSE_LEAF(prev, bin_last, rprn))) 1934 bc_parse_err(p, BC_ERR_PARSE_EXPR); 1935 1936 nparens += 1; 1937 rprn = incdec = can_assign = false; 1938 get_token = true; 1939 1940 // Push the paren onto the operator stack. 1941 bc_vec_push(&p->ops, &t); 1942 1943 break; 1944 } 1945 1946 case BC_LEX_RPAREN: 1947 { 1948 // This needs to be a status. The error is handled in 1949 // bc_parse_expr_status(). 1950 if (BC_ERR(p->l.last == BC_LEX_LPAREN)) 1951 return BC_PARSE_STATUS_EMPTY_EXPR; 1952 1953 // The right paren must not come after a prefix or binary 1954 // operator. 1955 if (BC_ERR(bin_last || BC_PARSE_PREV_PREFIX(prev))) 1956 bc_parse_err(p, BC_ERR_PARSE_EXPR); 1957 1958 // If there are no parens left, we are done, but we need another 1959 // token. 1960 if (!nparens) { 1961 done = true; 1962 get_token = false; 1963 break; 1964 } 1965 1966 nparens -= 1; 1967 rprn = true; 1968 get_token = bin_last = incdec = false; 1969 1970 bc_parse_rightParen(p, &nexprs); 1971 1972 break; 1973 } 1974 1975 case BC_LEX_STR: 1976 { 1977 // POSIX only allows strings alone. 1978 if (BC_IS_POSIX) bc_parse_err(p, BC_ERR_POSIX_EXPR_STRING); 1979 1980 // A string is a leaf and cannot come right after a leaf. 1981 if (BC_ERR(BC_PARSE_LEAF(prev, bin_last, rprn))) 1982 bc_parse_err(p, BC_ERR_PARSE_EXPR); 1983 1984 bc_parse_addString(p); 1985 1986 get_token = true; 1987 bin_last = rprn = false; 1988 nexprs += 1; 1989 1990 break; 1991 } 1992 1993 case BC_LEX_NAME: 1994 { 1995 // A name is a leaf and cannot come right after a leaf. 1996 if (BC_ERR(BC_PARSE_LEAF(prev, bin_last, rprn))) 1997 bc_parse_err(p, BC_ERR_PARSE_EXPR); 1998 1999 get_token = bin_last = false; 2000 2001 bc_parse_name(p, &prev, &can_assign, flags & ~BC_PARSE_NOCALL); 2002 2003 rprn = (prev == BC_INST_CALL); 2004 nexprs += 1; 2005 flags &= ~(BC_PARSE_ARRAY); 2006 2007 break; 2008 } 2009 2010 case BC_LEX_NUMBER: 2011 { 2012 // A number is a leaf and cannot come right after a leaf. 2013 if (BC_ERR(BC_PARSE_LEAF(prev, bin_last, rprn))) 2014 bc_parse_err(p, BC_ERR_PARSE_EXPR); 2015 2016 // The number instruction is pushed in here. 2017 bc_parse_number(p); 2018 2019 nexprs += 1; 2020 prev = BC_INST_NUM; 2021 get_token = true; 2022 rprn = bin_last = can_assign = false; 2023 flags &= ~(BC_PARSE_ARRAY); 2024 2025 break; 2026 } 2027 2028 case BC_LEX_KW_IBASE: 2029 case BC_LEX_KW_LAST: 2030 case BC_LEX_KW_OBASE: 2031 #if BC_ENABLE_EXTRA_MATH 2032 case BC_LEX_KW_SEED: 2033 #endif // BC_ENABLE_EXTRA_MATH 2034 { 2035 // All of these are leaves and cannot come right after a leaf. 2036 if (BC_ERR(BC_PARSE_LEAF(prev, bin_last, rprn))) 2037 bc_parse_err(p, BC_ERR_PARSE_EXPR); 2038 2039 prev = t - BC_LEX_KW_LAST + BC_INST_LAST; 2040 bc_parse_push(p, prev); 2041 2042 get_token = can_assign = true; 2043 rprn = bin_last = false; 2044 nexprs += 1; 2045 flags &= ~(BC_PARSE_ARRAY); 2046 2047 break; 2048 } 2049 2050 case BC_LEX_KW_LENGTH: 2051 case BC_LEX_KW_SQRT: 2052 case BC_LEX_KW_ABS: 2053 #if BC_ENABLE_EXTRA_MATH 2054 case BC_LEX_KW_IRAND: 2055 #endif // BC_ENABLE_EXTRA_MATH 2056 case BC_LEX_KW_ASCIIFY: 2057 { 2058 // All of these are leaves and cannot come right after a leaf. 2059 if (BC_ERR(BC_PARSE_LEAF(prev, bin_last, rprn))) 2060 bc_parse_err(p, BC_ERR_PARSE_EXPR); 2061 2062 bc_parse_builtin(p, t, flags, &prev); 2063 2064 rprn = get_token = bin_last = incdec = can_assign = false; 2065 nexprs += 1; 2066 flags &= ~(BC_PARSE_ARRAY); 2067 2068 break; 2069 } 2070 2071 case BC_LEX_KW_READ: 2072 #if BC_ENABLE_EXTRA_MATH 2073 case BC_LEX_KW_RAND: 2074 #endif // BC_ENABLE_EXTRA_MATH 2075 case BC_LEX_KW_MAXIBASE: 2076 case BC_LEX_KW_MAXOBASE: 2077 case BC_LEX_KW_MAXSCALE: 2078 #if BC_ENABLE_EXTRA_MATH 2079 case BC_LEX_KW_MAXRAND: 2080 #endif // BC_ENABLE_EXTRA_MATH 2081 { 2082 // All of these are leaves and cannot come right after a leaf. 2083 if (BC_ERR(BC_PARSE_LEAF(prev, bin_last, rprn))) 2084 bc_parse_err(p, BC_ERR_PARSE_EXPR); 2085 2086 // Error if we have read and it's not allowed. 2087 else if (t == BC_LEX_KW_READ && BC_ERR(flags & BC_PARSE_NOREAD)) 2088 bc_parse_err(p, BC_ERR_EXEC_REC_READ); 2089 2090 prev = t - BC_LEX_KW_READ + BC_INST_READ; 2091 bc_parse_noArgBuiltin(p, prev); 2092 2093 rprn = get_token = bin_last = incdec = can_assign = false; 2094 nexprs += 1; 2095 flags &= ~(BC_PARSE_ARRAY); 2096 2097 break; 2098 } 2099 2100 case BC_LEX_KW_SCALE: 2101 { 2102 // This is a leaf and cannot come right after a leaf. 2103 if (BC_ERR(BC_PARSE_LEAF(prev, bin_last, rprn))) 2104 bc_parse_err(p, BC_ERR_PARSE_EXPR); 2105 2106 // Scale needs special work because it can be a variable *or* a 2107 // function. 2108 bc_parse_scale(p, &prev, &can_assign, flags); 2109 2110 rprn = get_token = bin_last = false; 2111 nexprs += 1; 2112 flags &= ~(BC_PARSE_ARRAY); 2113 2114 break; 2115 } 2116 2117 case BC_LEX_KW_MODEXP: 2118 case BC_LEX_KW_DIVMOD: 2119 { 2120 // This is a leaf and cannot come right after a leaf. 2121 if (BC_ERR(BC_PARSE_LEAF(prev, bin_last, rprn))) 2122 bc_parse_err(p, BC_ERR_PARSE_EXPR); 2123 2124 bc_parse_builtin3(p, t, flags, &prev); 2125 2126 rprn = get_token = bin_last = incdec = can_assign = false; 2127 nexprs += 1; 2128 flags &= ~(BC_PARSE_ARRAY); 2129 2130 break; 2131 } 2132 2133 default: 2134 { 2135 #ifndef NDEBUG 2136 // We should never get here, even in debug builds. 2137 bc_parse_err(p, BC_ERR_PARSE_TOKEN); 2138 break; 2139 #endif // NDEBUG 2140 } 2141 } 2142 2143 if (get_token) bc_lex_next(&p->l); 2144 } 2145 2146 // Now that we have parsed the expression, we need to empty the operator 2147 // stack. 2148 while (p->ops.len > ops_bgn) { 2149 2150 top = BC_PARSE_TOP_OP(p); 2151 assign = top >= BC_LEX_OP_ASSIGN_POWER && top <= BC_LEX_OP_ASSIGN; 2152 2153 // There should not be *any* parens on the stack anymore. 2154 if (BC_ERR(top == BC_LEX_LPAREN || top == BC_LEX_RPAREN)) 2155 bc_parse_err(p, BC_ERR_PARSE_EXPR); 2156 2157 bc_parse_push(p, BC_PARSE_TOKEN_INST(top)); 2158 2159 // Adjust the number of unused expressions. 2160 nexprs -= !BC_PARSE_OP_PREFIX(top); 2161 bc_vec_pop(&p->ops); 2162 2163 incdec = false; 2164 } 2165 2166 // There must be only one expression at the top. 2167 if (BC_ERR(nexprs != 1)) bc_parse_err(p, BC_ERR_PARSE_EXPR); 2168 2169 // Check that the next token is correct. 2170 for (i = 0; i < next.len && t != next.tokens[i]; ++i); 2171 if (BC_ERR(i == next.len && !bc_parse_isDelimiter(p))) 2172 bc_parse_err(p, BC_ERR_PARSE_EXPR); 2173 2174 // Check that POSIX would be happy with the number of relational operators. 2175 if (!(flags & BC_PARSE_REL) && nrelops) 2176 bc_parse_err(p, BC_ERR_POSIX_REL_POS); 2177 else if ((flags & BC_PARSE_REL) && nrelops > 1) 2178 bc_parse_err(p, BC_ERR_POSIX_MULTIREL); 2179 2180 // If this is true, then we might be in a situation where we don't print. 2181 // We would want to have the increment/decrement operator not make an extra 2182 // copy if it's not necessary. 2183 if (!(flags & BC_PARSE_NEEDVAL) && !pfirst) { 2184 2185 // We have the easy case if the last operator was an assignment 2186 // operator. 2187 if (assign) { 2188 inst = *((uchar*) bc_vec_top(&p->func->code)); 2189 inst += (BC_INST_ASSIGN_POWER_NO_VAL - BC_INST_ASSIGN_POWER); 2190 incdec = false; 2191 } 2192 // If we have an inc/dec operator and we are *not* printing, implement 2193 // the optimization to get rid of the extra copy. 2194 else if (incdec && !(flags & BC_PARSE_PRINT)) { 2195 inst = *((uchar*) bc_vec_top(&p->func->code)); 2196 incdec = (inst <= BC_INST_DEC); 2197 inst = BC_INST_ASSIGN_PLUS_NO_VAL + (inst != BC_INST_INC && 2198 inst != BC_INST_ASSIGN_PLUS); 2199 } 2200 2201 // This condition allows us to change the previous assignment 2202 // instruction (which does a copy) for a NO_VAL version, which does not. 2203 // This condition is set if either of the above if statements ends up 2204 // being true. 2205 if (inst >= BC_INST_ASSIGN_POWER_NO_VAL && 2206 inst <= BC_INST_ASSIGN_NO_VAL) 2207 { 2208 // Pop the previous assignment instruction and push a new one. 2209 // Inc/dec needs the extra instruction because it is now a binary 2210 // operator and needs a second operand. 2211 bc_vec_pop(&p->func->code); 2212 if (incdec) bc_parse_push(p, BC_INST_ONE); 2213 bc_parse_push(p, inst); 2214 } 2215 } 2216 2217 // If we might have to print... 2218 if ((flags & BC_PARSE_PRINT)) { 2219 2220 // With a paren first or the last operator not being an assignment, we 2221 // *do* want to print. 2222 if (pfirst || !assign) bc_parse_push(p, BC_INST_PRINT); 2223 } 2224 // We need to make sure to push a pop instruction for assignment statements 2225 // that will not print. The print will pop, but without it, we need to pop. 2226 else if (!(flags & BC_PARSE_NEEDVAL) && 2227 (inst < BC_INST_ASSIGN_POWER_NO_VAL || 2228 inst > BC_INST_ASSIGN_NO_VAL)) 2229 { 2230 bc_parse_push(p, BC_INST_POP); 2231 } 2232 2233 // We want to eat newlines if newlines are not a valid ending token. 2234 // This is for spacing in things like for loop headers. 2235 // 2236 // Yes, this is one case where I reuse a variable for a different purpose; 2237 // in this case, incdec being true now means that newlines are not valid. 2238 for (incdec = true, i = 0; i < next.len && incdec; ++i) 2239 incdec = (next.tokens[i] != BC_LEX_NLINE); 2240 if (incdec) { 2241 while (p->l.t == BC_LEX_NLINE) bc_lex_next(&p->l); 2242 } 2243 2244 return BC_PARSE_STATUS_SUCCESS; 2245 } 2246 2247 /** 2248 * Parses an expression with bc_parse_expr_err(), but throws an error if it gets 2249 * an empty expression. 2250 * @param p The parser. 2251 * @param flags The flags for what is valid in the expression. 2252 * @param next A set of tokens for what is valid *after* the expression. 2253 */ 2254 static void bc_parse_expr_status(BcParse *p, uint8_t flags, BcParseNext next) { 2255 2256 BcParseStatus s = bc_parse_expr_err(p, flags, next); 2257 2258 if (BC_ERR(s == BC_PARSE_STATUS_EMPTY_EXPR)) 2259 bc_parse_err(p, BC_ERR_PARSE_EMPTY_EXPR); 2260 } 2261 2262 void bc_parse_expr(BcParse *p, uint8_t flags) { 2263 assert(p); 2264 bc_parse_expr_status(p, flags, bc_parse_next_read); 2265 } 2266 #endif // BC_ENABLED 2267