1 /* 2 * ***************************************************************************** 3 * 4 * SPDX-License-Identifier: BSD-2-Clause 5 * 6 * Copyright (c) 2018-2023 Gavin D. Howard and contributors. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions are met: 10 * 11 * * Redistributions of source code must retain the above copyright notice, this 12 * list of conditions and the following disclaimer. 13 * 14 * * Redistributions in binary form must reproduce the above copyright notice, 15 * this list of conditions and the following disclaimer in the documentation 16 * and/or other materials provided with the distribution. 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 19 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 21 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE 22 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 23 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 24 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 25 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 26 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 27 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 28 * POSSIBILITY OF SUCH DAMAGE. 29 * 30 * ***************************************************************************** 31 * 32 * Definitions for program data. 33 * 34 */ 35 36 #ifndef BC_LANG_H 37 #define BC_LANG_H 38 39 #include <stdbool.h> 40 41 // These have to come first to silence a warning on BC_C11 below. 42 #include <status.h> 43 #include <vector.h> 44 #include <num.h> 45 46 #if BC_C11 47 #include <assert.h> 48 #endif // BC_C11 49 50 /// The instructions for bytecode. 51 typedef enum BcInst 52 { 53 #if BC_ENABLED 54 /// Postfix increment and decrement. Prefix are translated into 55 /// BC_INST_ONE with either BC_INST_ASSIGN_PLUS or BC_INST_ASSIGN_MINUS. 56 BC_INST_INC = 0, 57 BC_INST_DEC, 58 #endif // BC_ENABLED 59 60 /// Unary negation. 61 BC_INST_NEG, 62 63 /// Boolean not. 64 BC_INST_BOOL_NOT, 65 66 #if BC_ENABLE_EXTRA_MATH 67 /// Truncation operator. 68 BC_INST_TRUNC, 69 #endif // BC_ENABLE_EXTRA_MATH 70 71 /// These should be self-explanatory. 72 BC_INST_POWER, 73 BC_INST_MULTIPLY, 74 BC_INST_DIVIDE, 75 BC_INST_MODULUS, 76 BC_INST_PLUS, 77 BC_INST_MINUS, 78 79 #if BC_ENABLE_EXTRA_MATH 80 /// Places operator. 81 BC_INST_PLACES, 82 83 /// Shift operators. 84 BC_INST_LSHIFT, 85 BC_INST_RSHIFT, 86 #endif // BC_ENABLE_EXTRA_MATH 87 88 /// Comparison operators. 89 BC_INST_REL_EQ, 90 BC_INST_REL_LE, 91 BC_INST_REL_GE, 92 BC_INST_REL_NE, 93 BC_INST_REL_LT, 94 BC_INST_REL_GT, 95 96 /// Boolean or and and. 97 BC_INST_BOOL_OR, 98 BC_INST_BOOL_AND, 99 100 #if BC_ENABLED 101 /// Same as the normal operators, but assigment. So ^=, *=, /=, etc. 102 BC_INST_ASSIGN_POWER, 103 BC_INST_ASSIGN_MULTIPLY, 104 BC_INST_ASSIGN_DIVIDE, 105 BC_INST_ASSIGN_MODULUS, 106 BC_INST_ASSIGN_PLUS, 107 BC_INST_ASSIGN_MINUS, 108 #if BC_ENABLE_EXTRA_MATH 109 /// Places and shift assignment operators. 110 BC_INST_ASSIGN_PLACES, 111 BC_INST_ASSIGN_LSHIFT, 112 BC_INST_ASSIGN_RSHIFT, 113 #endif // BC_ENABLE_EXTRA_MATH 114 115 /// Normal assignment. 116 BC_INST_ASSIGN, 117 118 /// bc and dc detect when the value from an assignment is not necessary. 119 /// For example, a plain assignment statement means the value is never used. 120 /// In those cases, we can get lots of performance back by not even creating 121 /// a copy at all. In fact, it saves a copy, a push onto the results stack, 122 /// a pop from the results stack, and a free. Definitely worth it to detect. 123 BC_INST_ASSIGN_POWER_NO_VAL, 124 BC_INST_ASSIGN_MULTIPLY_NO_VAL, 125 BC_INST_ASSIGN_DIVIDE_NO_VAL, 126 BC_INST_ASSIGN_MODULUS_NO_VAL, 127 BC_INST_ASSIGN_PLUS_NO_VAL, 128 BC_INST_ASSIGN_MINUS_NO_VAL, 129 #if BC_ENABLE_EXTRA_MATH 130 /// Same as above. 131 BC_INST_ASSIGN_PLACES_NO_VAL, 132 BC_INST_ASSIGN_LSHIFT_NO_VAL, 133 BC_INST_ASSIGN_RSHIFT_NO_VAL, 134 #endif // BC_ENABLE_EXTRA_MATH 135 #endif // BC_ENABLED 136 137 /// Normal assignment that pushes no value on the stack. 138 BC_INST_ASSIGN_NO_VAL, 139 140 /// Push a constant onto the results stack. 141 BC_INST_NUM, 142 143 /// Push a variable onto the results stack. 144 BC_INST_VAR, 145 146 /// Push an array element onto the results stack. 147 BC_INST_ARRAY_ELEM, 148 149 /// Push an array onto the results stack. This is different from pushing an 150 /// array *element* onto the results stack; it pushes a reference to the 151 /// whole array. This is needed in bc for function arguments that are 152 /// arrays. It is also needed for returning the length of an array. 153 BC_INST_ARRAY, 154 155 /// Push a zero or a one onto the stack. These are special cased because it 156 /// does help performance, particularly for one since inc/dec operators 157 /// use it. 158 BC_INST_ZERO, 159 BC_INST_ONE, 160 161 #if BC_ENABLED 162 /// Push the last printed value onto the stack. 163 BC_INST_LAST, 164 #endif // BC_ENABLED 165 166 /// Push the value of any of the globals onto the stack. 167 BC_INST_IBASE, 168 BC_INST_OBASE, 169 BC_INST_SCALE, 170 171 #if BC_ENABLE_EXTRA_MATH 172 /// Push the value of the seed global onto the stack. 173 BC_INST_SEED, 174 #endif // BC_ENABLE_EXTRA_MATH 175 176 /// These are builtin functions. 177 BC_INST_LENGTH, 178 BC_INST_SCALE_FUNC, 179 BC_INST_SQRT, 180 BC_INST_ABS, 181 BC_INST_IS_NUMBER, 182 BC_INST_IS_STRING, 183 184 #if BC_ENABLE_EXTRA_MATH 185 /// Another builtin function. 186 BC_INST_IRAND, 187 #endif // BC_ENABLE_EXTRA_MATH 188 189 /// Asciify. 190 BC_INST_ASCIIFY, 191 192 /// Another builtin function. 193 BC_INST_READ, 194 195 #if BC_ENABLE_EXTRA_MATH 196 /// Another builtin function. 197 BC_INST_RAND, 198 #endif // BC_ENABLE_EXTRA_MATH 199 200 /// Return the max for the various globals. 201 BC_INST_MAXIBASE, 202 BC_INST_MAXOBASE, 203 BC_INST_MAXSCALE, 204 #if BC_ENABLE_EXTRA_MATH 205 /// Return the max value returned by rand(). 206 BC_INST_MAXRAND, 207 #endif // BC_ENABLE_EXTRA_MATH 208 209 /// bc line_length() builtin function. 210 BC_INST_LINE_LENGTH, 211 212 #if BC_ENABLED 213 214 /// bc global_stacks() builtin function. 215 BC_INST_GLOBAL_STACKS, 216 217 #endif // BC_ENABLED 218 219 /// bc leading_zero() builtin function. 220 BC_INST_LEADING_ZERO, 221 222 /// This is slightly misnamed versus BC_INST_PRINT_POP. Well, it is in bc. 223 /// dc uses this instruction to print, but not pop. That's valid in dc. 224 /// However, in bc, it is *never* valid to print without popping. In bc, 225 /// BC_INST_PRINT_POP is used to indicate when a string should be printed 226 /// because of a print statement or whether it should be printed raw. The 227 /// reason for this is because a print statement handles escaped characters. 228 /// So BC_INST_PRINT_POP is for printing a string from a print statement, 229 /// BC_INST_PRINT_STR is for printing a string by itself. 230 /// 231 /// In dc, BC_INST_PRINT_POP prints and pops, and BC_INST_PRINT just prints. 232 /// 233 /// Oh, and BC_INST_STR pushes a string onto the results stack. 234 BC_INST_PRINT, 235 BC_INST_PRINT_POP, 236 BC_INST_STR, 237 #if BC_ENABLED 238 BC_INST_PRINT_STR, 239 240 /// Jumps unconditionally. 241 BC_INST_JUMP, 242 243 /// Jumps if the top of the results stack is zero (condition failed). It 244 /// turns out that we only want to jump when conditions fail to "skip" code. 245 BC_INST_JUMP_ZERO, 246 247 /// Call a function. 248 BC_INST_CALL, 249 250 /// Return the top of the stack to the caller. 251 BC_INST_RET, 252 253 /// Return 0 to the caller. 254 BC_INST_RET0, 255 256 /// Special return instruction for void functions. 257 BC_INST_RET_VOID, 258 259 /// Special halt instruction. 260 BC_INST_HALT, 261 #endif // BC_ENABLED 262 263 /// Pop an item off of the results stack. 264 BC_INST_POP, 265 266 /// Swaps the top two items on the results stack. 267 BC_INST_SWAP, 268 269 /// Modular exponentiation. 270 BC_INST_MODEXP, 271 272 /// Do divide and modulus at the same time. 273 BC_INST_DIVMOD, 274 275 /// Turns a number into a string and prints it. 276 BC_INST_PRINT_STREAM, 277 278 #if DC_ENABLED 279 280 /// dc's return; it pops an executing string off of the stack. 281 BC_INST_POP_EXEC, 282 283 /// Unconditionally execute a string. 284 BC_INST_EXECUTE, 285 286 /// Conditionally execute a string. 287 BC_INST_EXEC_COND, 288 289 /// Prints each item on the results stack, separated by newlines. 290 BC_INST_PRINT_STACK, 291 292 /// Pops everything off of the results stack. 293 BC_INST_CLEAR_STACK, 294 295 /// Pushes the current length of a register stack onto the results stack. 296 BC_INST_REG_STACK_LEN, 297 298 /// Pushes the current length of the results stack onto the results stack. 299 BC_INST_STACK_LEN, 300 301 /// Pushes a copy of the item on the top of the results stack onto the 302 /// results stack. 303 BC_INST_DUPLICATE, 304 305 /// Copies the value in a register and pushes the copy onto the results 306 /// stack. 307 BC_INST_LOAD, 308 309 /// Pops an item off of a register stack and pushes it onto the results 310 /// stack. 311 BC_INST_PUSH_VAR, 312 313 /// Pops an item off of the results stack and pushes it onto a register's 314 /// stack. 315 BC_INST_PUSH_TO_VAR, 316 317 /// Quit. 318 BC_INST_QUIT, 319 320 /// Quit executing some number of strings. 321 BC_INST_NQUIT, 322 323 /// Push the depth of the execution stack onto the stack. 324 BC_INST_EXEC_STACK_LEN, 325 326 #endif // DC_ENABLED 327 328 /// Invalid instruction. 329 BC_INST_INVALID, 330 331 } BcInst; 332 333 #if BC_C11 334 _Static_assert(BC_INST_INVALID <= UCHAR_MAX, 335 "Too many instructions to fit into an unsigned char"); 336 #endif // BC_C11 337 338 /// Used by maps to identify where items are in the array. 339 typedef struct BcId 340 { 341 /// The name of the item. 342 char* name; 343 344 /// The index into the array where the item is. 345 size_t idx; 346 347 } BcId; 348 349 /// The location of a var, array, or array element. 350 typedef struct BcLoc 351 { 352 /// The index of the var or array. 353 size_t loc; 354 355 /// The index of the array or variable in the array stack. This is to 356 /// prevent a bug with getting the wrong array element or variable after a 357 /// function call. See the tests/bc/scripts/array.bc test for the array 358 /// case; the variable case is in various variable tests. 359 size_t stack_idx; 360 361 /// The index of the array element. Only used for array elements. 362 size_t idx; 363 364 } BcLoc; 365 366 /// An entry for a constant. 367 typedef struct BcConst 368 { 369 /// The original string as parsed from the source code. 370 char* val; 371 372 /// The last base that the constant was parsed in. 373 BcBigDig base; 374 375 /// The parsed constant. 376 BcNum num; 377 378 } BcConst; 379 380 /// A function. This is also used in dc, not just bc. The reason is that strings 381 /// are executed in dc, and they are converted to functions in order to be 382 /// executed. 383 typedef struct BcFunc 384 { 385 /// The bytecode instructions. 386 BcVec code; 387 388 #if BC_ENABLED 389 390 /// The labels. This is a vector of indices. The index is the index into 391 /// the bytecode vector where the label is. 392 BcVec labels; 393 394 /// The autos for the function. The first items are the parameters, and the 395 /// arguments to the parameters must match the types in this vector. 396 BcVec autos; 397 398 /// The number of parameters the function takes. 399 size_t nparams; 400 401 #endif // BC_ENABLED 402 403 /// The function's name. 404 const char* name; 405 406 #if BC_ENABLED 407 /// True if the function is a void function. 408 bool voidfn; 409 #endif // BC_ENABLED 410 411 } BcFunc; 412 413 /// Types of results that can be pushed onto the results stack. 414 typedef enum BcResultType 415 { 416 /// Result is a variable. 417 BC_RESULT_VAR, 418 419 /// Result is an array element. 420 BC_RESULT_ARRAY_ELEM, 421 422 /// Result is an array. This is only allowed for function arguments or 423 /// returning the length of the array. 424 BC_RESULT_ARRAY, 425 426 /// Result is a string. 427 BC_RESULT_STR, 428 429 /// Result is a temporary. This is used for the result of almost all 430 /// expressions. 431 BC_RESULT_TEMP, 432 433 /// Special casing the two below gave performance improvements. 434 435 /// Result is a 0. 436 BC_RESULT_ZERO, 437 438 /// Result is a 1. Useful for inc/dec operators. 439 BC_RESULT_ONE, 440 441 #if BC_ENABLED 442 443 /// Result is the special "last" variable. 444 BC_RESULT_LAST, 445 446 /// Result is the return value of a void function. 447 BC_RESULT_VOID, 448 #endif // BC_ENABLED 449 450 /// Result is the value of ibase. 451 BC_RESULT_IBASE, 452 453 /// Result is the value of obase. 454 BC_RESULT_OBASE, 455 456 /// Result is the value of scale. 457 BC_RESULT_SCALE, 458 459 #if BC_ENABLE_EXTRA_MATH 460 461 /// Result is the value of seed. 462 BC_RESULT_SEED, 463 464 #endif // BC_ENABLE_EXTRA_MATH 465 466 } BcResultType; 467 468 /// A union to store data for various result types. 469 typedef union BcResultData 470 { 471 /// A number. Strings are stored here too; they are numbers with 472 /// cap == 0 && num == NULL. The string's index into the strings vector is 473 /// stored in the scale field. But this is only used for strings stored in 474 /// variables. 475 BcNum n; 476 477 /// A vector. 478 BcVec v; 479 480 /// A variable, array, or array element reference. This could also be a 481 /// string if a string is not stored in a variable (dc only). 482 BcLoc loc; 483 484 } BcResultData; 485 486 /// A tagged union for results. 487 typedef struct BcResult 488 { 489 /// The tag. The type of the result. 490 BcResultType t; 491 492 /// The data. The data for the result. 493 BcResultData d; 494 495 } BcResult; 496 497 /// An instruction pointer. This is how bc knows where in the bytecode vector, 498 /// and which function, the current execution is. 499 typedef struct BcInstPtr 500 { 501 /// The index of the currently executing function in the fns vector. 502 size_t func; 503 504 /// The index into the bytecode vector of the *next* instruction. 505 size_t idx; 506 507 /// The length of the results vector when this function started executing. 508 /// This is mostly used for bc where functions should not affect the results 509 /// of their callers. 510 size_t len; 511 512 } BcInstPtr; 513 514 /// Types of identifiers. 515 typedef enum BcType 516 { 517 /// Variable. 518 BC_TYPE_VAR, 519 520 /// Array. 521 BC_TYPE_ARRAY, 522 523 #if BC_ENABLED 524 525 /// Array reference. 526 BC_TYPE_REF, 527 528 #endif // BC_ENABLED 529 530 } BcType; 531 532 #if BC_ENABLED 533 /// An auto variable in bc. 534 typedef struct BcAuto 535 { 536 /// The index of the variable in the vars or arrs vectors. 537 size_t idx; 538 539 /// The type of the variable. 540 BcType type; 541 542 } BcAuto; 543 #endif // BC_ENABLED 544 545 /// Forward declaration. 546 struct BcProgram; 547 548 /** 549 * Initializes a function. 550 * @param f The function to initialize. 551 * @param name The name of the function. The string is assumed to be owned by 552 * some other entity. 553 */ 554 void 555 bc_func_init(BcFunc* f, const char* name); 556 557 /** 558 * Inserts an auto into the function. 559 * @param f The function to insert into. 560 * @param p The program. This is to search for the variable or array name. 561 * @param name The name of the auto to insert. 562 * @param type The type of the auto. 563 * @param line The line in the source code where the insert happened. This is 564 * solely for error reporting. 565 */ 566 void 567 bc_func_insert(BcFunc* f, struct BcProgram* p, char* name, BcType type, 568 size_t line); 569 570 /** 571 * Resets a function in preparation for it to be reused. This can happen in bc 572 * because it is a dynamic language and functions can be redefined. 573 * @param f The functio to reset. 574 */ 575 void 576 bc_func_reset(BcFunc* f); 577 578 #ifndef NDEBUG 579 /** 580 * Frees a function. This is a destructor. This is only used in debug builds 581 * because all functions are freed at exit. We free them in debug builds to 582 * check for memory leaks. 583 * @param func The function to free as a void pointer. 584 */ 585 void 586 bc_func_free(void* func); 587 #endif // NDEBUG 588 589 /** 590 * Initializes an array, which is the array type in bc and dc source code. Since 591 * variables and arrays are both arrays (see the development manual, 592 * manuals/development.md#execution, for more information), the @a nums 593 * parameter tells bc whether to initialize an array of numbers or an array of 594 * arrays of numbers. If the latter, it does a recursive call with nums set to 595 * true. 596 * @param a The array to initialize. 597 * @param nums True if the array should be for numbers, false if it should be 598 * for vectors. 599 */ 600 void 601 bc_array_init(BcVec* a, bool nums); 602 603 /** 604 * Copies an array to another array. This is used to do pass arrays to functions 605 * that do not take references to arrays. The arrays are passed entirely by 606 * value, which means that they need to be copied. 607 * @param d The destination array. 608 * @param s The source array. 609 */ 610 void 611 bc_array_copy(BcVec* d, const BcVec* s); 612 613 /** 614 * Frees a string stored in a function. This is a destructor. 615 * @param string The string to free as a void pointer. 616 */ 617 void 618 bc_string_free(void* string); 619 620 /** 621 * Frees a constant stored in a function. This is a destructor. 622 * @param constant The constant to free as a void pointer. 623 */ 624 void 625 bc_const_free(void* constant); 626 627 /** 628 * Clears a result. It sets the type to BC_RESULT_TEMP and clears the union by 629 * clearing the BcNum in the union. This is to ensure that bc does not use 630 * uninitialized data. 631 * @param r The result to clear. 632 */ 633 void 634 bc_result_clear(BcResult* r); 635 636 /** 637 * Copies a result into another. This is done for things like duplicating the 638 * top of the results stack or copying the result of an assignment to put back 639 * on the results stack. 640 * @param d The destination result. 641 * @param src The source result. 642 */ 643 void 644 bc_result_copy(BcResult* d, BcResult* src); 645 646 /** 647 * Frees a result. This is a destructor. 648 * @param result The result to free as a void pointer. 649 */ 650 void 651 bc_result_free(void* result); 652 653 /** 654 * Expands an array to @a len. This can happen because in bc, you do not have to 655 * explicitly initialize elements of an array. If you access an element that is 656 * not initialized, the array is expanded to fit it, and all missing elements 657 * are initialized to 0 if they are numbers, or arrays with one element of 0. 658 * This function does that expansion. 659 * @param a The array to expand. 660 * @param len The length to expand to. 661 */ 662 void 663 bc_array_expand(BcVec* a, size_t len); 664 665 #if BC_ENABLED 666 667 /** 668 * Returns non-zero if the bytecode instruction i is an assignment instruction. 669 * @param i The instruction to test. 670 * @return Non-zero if i is an assignment instruction, zero otherwise. 671 */ 672 #define BC_INST_IS_ASSIGN(i) \ 673 ((i) == BC_INST_ASSIGN || (i) == BC_INST_ASSIGN_NO_VAL) 674 675 /** 676 * Returns true if the bytecode instruction @a i requires the value to be 677 * returned for use. 678 * @param i The instruction to test. 679 * @return True if @a i requires the value to be returned for use, false 680 * otherwise. 681 */ 682 #define BC_INST_USE_VAL(i) ((i) <= BC_INST_ASSIGN) 683 684 #else // BC_ENABLED 685 686 /** 687 * Returns non-zero if the bytecode instruction i is an assignment instruction. 688 * @param i The instruction to test. 689 * @return Non-zero if i is an assignment instruction, zero otherwise. 690 */ 691 #define BC_INST_IS_ASSIGN(i) ((i) == BC_INST_ASSIGN_NO_VAL) 692 693 /** 694 * Returns true if the bytecode instruction @a i requires the value to be 695 * returned for use. 696 * @param i The instruction to test. 697 * @return True if @a i requires the value to be returned for use, false 698 * otherwise. 699 */ 700 #define BC_INST_USE_VAL(i) (false) 701 702 #endif // BC_ENABLED 703 704 #if BC_DEBUG_CODE 705 /// Reference to string names for all of the instructions. For debugging. 706 extern const char* bc_inst_names[]; 707 #endif // BC_DEBUG_CODE 708 709 /// References to the names of the main and read functions. 710 extern const char bc_func_main[]; 711 extern const char bc_func_read[]; 712 713 #endif // BC_LANG_H 714