1 /* 2 * ***************************************************************************** 3 * 4 * SPDX-License-Identifier: BSD-2-Clause 5 * 6 * Copyright (c) 2018-2024 Gavin D. Howard and contributors. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions are met: 10 * 11 * * Redistributions of source code must retain the above copyright notice, this 12 * list of conditions and the following disclaimer. 13 * 14 * * Redistributions in binary form must reproduce the above copyright notice, 15 * this list of conditions and the following disclaimer in the documentation 16 * and/or other materials provided with the distribution. 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 19 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 21 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE 22 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 23 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 24 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 25 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 26 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 27 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 28 * POSSIBILITY OF SUCH DAMAGE. 29 * 30 * ***************************************************************************** 31 * 32 * Definitions for program data. 33 * 34 */ 35 36 #ifndef BC_LANG_H 37 #define BC_LANG_H 38 39 #include <stdbool.h> 40 41 // These have to come first to silence a warning on BC_C11 below. 42 #include <status.h> 43 #include <vector.h> 44 #include <num.h> 45 46 #if BC_C11 47 #include <assert.h> 48 #endif // BC_C11 49 50 /// The instructions for bytecode. 51 typedef enum BcInst 52 { 53 #if BC_ENABLED 54 /// Postfix increment and decrement. Prefix are translated into 55 /// BC_INST_ONE with either BC_INST_ASSIGN_PLUS or BC_INST_ASSIGN_MINUS. 56 BC_INST_INC = 0, 57 BC_INST_DEC, 58 #endif // BC_ENABLED 59 60 /// Unary negation. 61 BC_INST_NEG, 62 63 /// Boolean not. 64 BC_INST_BOOL_NOT, 65 66 #if BC_ENABLE_EXTRA_MATH 67 /// Truncation operator. 68 BC_INST_TRUNC, 69 #endif // BC_ENABLE_EXTRA_MATH 70 71 /// These should be self-explanatory. 72 BC_INST_POWER, 73 BC_INST_MULTIPLY, 74 BC_INST_DIVIDE, 75 BC_INST_MODULUS, 76 BC_INST_PLUS, 77 BC_INST_MINUS, 78 79 #if BC_ENABLE_EXTRA_MATH 80 /// Places operator. 81 BC_INST_PLACES, 82 83 /// Shift operators. 84 BC_INST_LSHIFT, 85 BC_INST_RSHIFT, 86 #endif // BC_ENABLE_EXTRA_MATH 87 88 /// Comparison operators. 89 BC_INST_REL_EQ, 90 BC_INST_REL_LE, 91 BC_INST_REL_GE, 92 BC_INST_REL_NE, 93 BC_INST_REL_LT, 94 BC_INST_REL_GT, 95 96 /// Boolean or and and. 97 BC_INST_BOOL_OR, 98 BC_INST_BOOL_AND, 99 100 #if BC_ENABLED 101 /// Same as the normal operators, but assigment. So ^=, *=, /=, etc. 102 BC_INST_ASSIGN_POWER, 103 BC_INST_ASSIGN_MULTIPLY, 104 BC_INST_ASSIGN_DIVIDE, 105 BC_INST_ASSIGN_MODULUS, 106 BC_INST_ASSIGN_PLUS, 107 BC_INST_ASSIGN_MINUS, 108 #if BC_ENABLE_EXTRA_MATH 109 /// Places and shift assignment operators. 110 BC_INST_ASSIGN_PLACES, 111 BC_INST_ASSIGN_LSHIFT, 112 BC_INST_ASSIGN_RSHIFT, 113 #endif // BC_ENABLE_EXTRA_MATH 114 115 /// Normal assignment. 116 BC_INST_ASSIGN, 117 118 /// bc and dc detect when the value from an assignment is not necessary. 119 /// For example, a plain assignment statement means the value is never used. 120 /// In those cases, we can get lots of performance back by not even creating 121 /// a copy at all. In fact, it saves a copy, a push onto the results stack, 122 /// a pop from the results stack, and a free. Definitely worth it to detect. 123 BC_INST_ASSIGN_POWER_NO_VAL, 124 BC_INST_ASSIGN_MULTIPLY_NO_VAL, 125 BC_INST_ASSIGN_DIVIDE_NO_VAL, 126 BC_INST_ASSIGN_MODULUS_NO_VAL, 127 BC_INST_ASSIGN_PLUS_NO_VAL, 128 BC_INST_ASSIGN_MINUS_NO_VAL, 129 #if BC_ENABLE_EXTRA_MATH 130 /// Same as above. 131 BC_INST_ASSIGN_PLACES_NO_VAL, 132 BC_INST_ASSIGN_LSHIFT_NO_VAL, 133 BC_INST_ASSIGN_RSHIFT_NO_VAL, 134 #endif // BC_ENABLE_EXTRA_MATH 135 #endif // BC_ENABLED 136 137 /// Normal assignment that pushes no value on the stack. 138 BC_INST_ASSIGN_NO_VAL, 139 140 /// Push a constant onto the results stack. 141 BC_INST_NUM, 142 143 /// Push a variable onto the results stack. 144 BC_INST_VAR, 145 146 /// Push an array element onto the results stack. 147 BC_INST_ARRAY_ELEM, 148 149 /// Push an array onto the results stack. This is different from pushing an 150 /// array *element* onto the results stack; it pushes a reference to the 151 /// whole array. This is needed in bc for function arguments that are 152 /// arrays. It is also needed for returning the length of an array. 153 BC_INST_ARRAY, 154 155 /// Push a zero or a one onto the stack. These are special cased because it 156 /// does help performance, particularly for one since inc/dec operators 157 /// use it. 158 BC_INST_ZERO, 159 BC_INST_ONE, 160 161 #if BC_ENABLED 162 /// Push the last printed value onto the stack. 163 BC_INST_LAST, 164 #endif // BC_ENABLED 165 166 /// Push the value of any of the globals onto the stack. 167 BC_INST_IBASE, 168 BC_INST_OBASE, 169 BC_INST_SCALE, 170 171 #if BC_ENABLE_EXTRA_MATH 172 /// Push the value of the seed global onto the stack. 173 BC_INST_SEED, 174 #endif // BC_ENABLE_EXTRA_MATH 175 176 /// These are builtin functions. 177 BC_INST_LENGTH, 178 BC_INST_SCALE_FUNC, 179 BC_INST_SQRT, 180 BC_INST_ABS, 181 BC_INST_IS_NUMBER, 182 BC_INST_IS_STRING, 183 184 #if BC_ENABLE_EXTRA_MATH 185 /// Another builtin function. 186 BC_INST_IRAND, 187 #endif // BC_ENABLE_EXTRA_MATH 188 189 /// Asciify. 190 BC_INST_ASCIIFY, 191 192 /// Another builtin function. 193 BC_INST_READ, 194 195 #if BC_ENABLE_EXTRA_MATH 196 /// Another builtin function. 197 BC_INST_RAND, 198 #endif // BC_ENABLE_EXTRA_MATH 199 200 /// Return the max for the various globals. 201 BC_INST_MAXIBASE, 202 BC_INST_MAXOBASE, 203 BC_INST_MAXSCALE, 204 #if BC_ENABLE_EXTRA_MATH 205 /// Return the max value returned by rand(). 206 BC_INST_MAXRAND, 207 #endif // BC_ENABLE_EXTRA_MATH 208 209 /// bc line_length() builtin function. 210 BC_INST_LINE_LENGTH, 211 212 #if BC_ENABLED 213 214 /// bc global_stacks() builtin function. 215 BC_INST_GLOBAL_STACKS, 216 217 #endif // BC_ENABLED 218 219 /// bc leading_zero() builtin function. 220 BC_INST_LEADING_ZERO, 221 222 /// This is slightly misnamed versus BC_INST_PRINT_POP. Well, it is in bc. 223 /// dc uses this instruction to print, but not pop. That's valid in dc. 224 /// However, in bc, it is *never* valid to print without popping. In bc, 225 /// BC_INST_PRINT_POP is used to indicate when a string should be printed 226 /// because of a print statement or whether it should be printed raw. The 227 /// reason for this is because a print statement handles escaped characters. 228 /// So BC_INST_PRINT_POP is for printing a string from a print statement, 229 /// BC_INST_PRINT_STR is for printing a string by itself. 230 /// 231 /// In dc, BC_INST_PRINT_POP prints and pops, and BC_INST_PRINT just prints. 232 /// 233 /// Oh, and BC_INST_STR pushes a string onto the results stack. 234 BC_INST_PRINT, 235 BC_INST_PRINT_POP, 236 BC_INST_STR, 237 #if BC_ENABLED 238 BC_INST_PRINT_STR, 239 240 /// Jumps unconditionally. 241 BC_INST_JUMP, 242 243 /// Jumps if the top of the results stack is zero (condition failed). It 244 /// turns out that we only want to jump when conditions fail to "skip" code. 245 BC_INST_JUMP_ZERO, 246 247 /// Call a function. 248 BC_INST_CALL, 249 250 /// Return the top of the stack to the caller. 251 BC_INST_RET, 252 253 /// Return 0 to the caller. 254 BC_INST_RET0, 255 256 /// Special return instruction for void functions. 257 BC_INST_RET_VOID, 258 259 /// Special halt instruction. 260 BC_INST_HALT, 261 #endif // BC_ENABLED 262 263 /// Pop an item off of the results stack. 264 BC_INST_POP, 265 266 /// Swaps the top two items on the results stack. 267 BC_INST_SWAP, 268 269 /// Modular exponentiation. 270 BC_INST_MODEXP, 271 272 /// Do divide and modulus at the same time. 273 BC_INST_DIVMOD, 274 275 /// Turns a number into a string and prints it. 276 BC_INST_PRINT_STREAM, 277 278 #if DC_ENABLED 279 280 /// dc extended registers command. 281 BC_INST_EXTENDED_REGISTERS, 282 283 /// dc's return; it pops an executing string off of the stack. 284 BC_INST_POP_EXEC, 285 286 /// Unconditionally execute a string. 287 BC_INST_EXECUTE, 288 289 /// Conditionally execute a string. 290 BC_INST_EXEC_COND, 291 292 /// Prints each item on the results stack, separated by newlines. 293 BC_INST_PRINT_STACK, 294 295 /// Pops everything off of the results stack. 296 BC_INST_CLEAR_STACK, 297 298 /// Pushes the current length of a register stack onto the results stack. 299 BC_INST_REG_STACK_LEN, 300 301 /// Pushes the current length of the results stack onto the results stack. 302 BC_INST_STACK_LEN, 303 304 /// Pushes a copy of the item on the top of the results stack onto the 305 /// results stack. 306 BC_INST_DUPLICATE, 307 308 /// Copies the value in a register and pushes the copy onto the results 309 /// stack. 310 BC_INST_LOAD, 311 312 /// Pops an item off of a register stack and pushes it onto the results 313 /// stack. 314 BC_INST_PUSH_VAR, 315 316 /// Pops an item off of the results stack and pushes it onto a register's 317 /// stack. 318 BC_INST_PUSH_TO_VAR, 319 320 /// Quit. 321 BC_INST_QUIT, 322 323 /// Quit executing some number of strings. 324 BC_INST_NQUIT, 325 326 /// Push the depth of the execution stack onto the stack. 327 BC_INST_EXEC_STACK_LEN, 328 329 #endif // DC_ENABLED 330 331 /// Invalid instruction. 332 BC_INST_INVALID, 333 334 } BcInst; 335 336 #if BC_C11 337 _Static_assert(BC_INST_INVALID <= UCHAR_MAX, 338 "Too many instructions to fit into an unsigned char"); 339 #endif // BC_C11 340 341 /// Used by maps to identify where items are in the array. 342 typedef struct BcId 343 { 344 /// The name of the item. 345 char* name; 346 347 /// The index into the array where the item is. 348 size_t idx; 349 350 } BcId; 351 352 /// The location of a var, array, or array element. 353 typedef struct BcLoc 354 { 355 /// The index of the var or array. 356 size_t loc; 357 358 /// The index of the array or variable in the array stack. This is to 359 /// prevent a bug with getting the wrong array element or variable after a 360 /// function call. See the tests/bc/scripts/array.bc test for the array 361 /// case; the variable case is in various variable tests. 362 size_t stack_idx; 363 364 /// The index of the array element. Only used for array elements. 365 size_t idx; 366 367 } BcLoc; 368 369 /// An entry for a constant. 370 typedef struct BcConst 371 { 372 /// The original string as parsed from the source code. 373 char* val; 374 375 /// The last base that the constant was parsed in. 376 BcBigDig base; 377 378 /// The parsed constant. 379 BcNum num; 380 381 } BcConst; 382 383 /// A function. This is also used in dc, not just bc. The reason is that strings 384 /// are executed in dc, and they are converted to functions in order to be 385 /// executed. 386 typedef struct BcFunc 387 { 388 /// The bytecode instructions. 389 BcVec code; 390 391 #if BC_ENABLED 392 393 /// The labels. This is a vector of indices. The index is the index into 394 /// the bytecode vector where the label is. 395 BcVec labels; 396 397 /// The autos for the function. The first items are the parameters, and the 398 /// arguments to the parameters must match the types in this vector. 399 BcVec autos; 400 401 /// The number of parameters the function takes. 402 size_t nparams; 403 404 #endif // BC_ENABLED 405 406 /// The function's name. 407 const char* name; 408 409 #if BC_ENABLED 410 /// True if the function is a void function. 411 bool voidfn; 412 #endif // BC_ENABLED 413 414 } BcFunc; 415 416 /// Types of results that can be pushed onto the results stack. 417 typedef enum BcResultType 418 { 419 /// Result is a variable. 420 BC_RESULT_VAR, 421 422 /// Result is an array element. 423 BC_RESULT_ARRAY_ELEM, 424 425 /// Result is an array. This is only allowed for function arguments or 426 /// returning the length of the array. 427 BC_RESULT_ARRAY, 428 429 /// Result is a string. 430 BC_RESULT_STR, 431 432 /// Result is a temporary. This is used for the result of almost all 433 /// expressions. 434 BC_RESULT_TEMP, 435 436 /// Special casing the two below gave performance improvements. 437 438 /// Result is a 0. 439 BC_RESULT_ZERO, 440 441 /// Result is a 1. Useful for inc/dec operators. 442 BC_RESULT_ONE, 443 444 #if BC_ENABLED 445 446 /// Result is the special "last" variable. 447 BC_RESULT_LAST, 448 449 /// Result is the return value of a void function. 450 BC_RESULT_VOID, 451 #endif // BC_ENABLED 452 453 /// Result is the value of ibase. 454 BC_RESULT_IBASE, 455 456 /// Result is the value of obase. 457 BC_RESULT_OBASE, 458 459 /// Result is the value of scale. 460 BC_RESULT_SCALE, 461 462 #if BC_ENABLE_EXTRA_MATH 463 464 /// Result is the value of seed. 465 BC_RESULT_SEED, 466 467 #endif // BC_ENABLE_EXTRA_MATH 468 469 } BcResultType; 470 471 /// A union to store data for various result types. 472 typedef union BcResultData 473 { 474 /// A number. Strings are stored here too; they are numbers with 475 /// cap == 0 && num == NULL. The string's index into the strings vector is 476 /// stored in the scale field. But this is only used for strings stored in 477 /// variables. 478 BcNum n; 479 480 /// A vector. 481 BcVec v; 482 483 /// A variable, array, or array element reference. This could also be a 484 /// string if a string is not stored in a variable (dc only). 485 BcLoc loc; 486 487 } BcResultData; 488 489 /// A tagged union for results. 490 typedef struct BcResult 491 { 492 /// The tag. The type of the result. 493 BcResultType t; 494 495 /// The data. The data for the result. 496 BcResultData d; 497 498 } BcResult; 499 500 /// An instruction pointer. This is how bc knows where in the bytecode vector, 501 /// and which function, the current execution is. 502 typedef struct BcInstPtr 503 { 504 /// The index of the currently executing function in the fns vector. 505 size_t func; 506 507 /// The index into the bytecode vector of the *next* instruction. 508 size_t idx; 509 510 /// The length of the results vector when this function started executing. 511 /// This is mostly used for bc where functions should not affect the results 512 /// of their callers. 513 size_t len; 514 515 } BcInstPtr; 516 517 /// Types of identifiers. 518 typedef enum BcType 519 { 520 /// Variable. 521 BC_TYPE_VAR, 522 523 /// Array. 524 BC_TYPE_ARRAY, 525 526 #if BC_ENABLED 527 528 /// Array reference. 529 BC_TYPE_REF, 530 531 #endif // BC_ENABLED 532 533 } BcType; 534 535 #if BC_ENABLED 536 /// An auto variable in bc. 537 typedef struct BcAuto 538 { 539 /// The index of the variable in the vars or arrs vectors. 540 size_t idx; 541 542 /// The type of the variable. 543 BcType type; 544 545 } BcAuto; 546 #endif // BC_ENABLED 547 548 /// Forward declaration. 549 struct BcProgram; 550 551 /** 552 * Initializes a function. 553 * @param f The function to initialize. 554 * @param name The name of the function. The string is assumed to be owned by 555 * some other entity. 556 */ 557 void 558 bc_func_init(BcFunc* f, const char* name); 559 560 /** 561 * Inserts an auto into the function. 562 * @param f The function to insert into. 563 * @param p The program. This is to search for the variable or array name. 564 * @param name The name of the auto to insert. 565 * @param type The type of the auto. 566 * @param line The line in the source code where the insert happened. This is 567 * solely for error reporting. 568 */ 569 void 570 bc_func_insert(BcFunc* f, struct BcProgram* p, char* name, BcType type, 571 size_t line); 572 573 /** 574 * Resets a function in preparation for it to be reused. This can happen in bc 575 * because it is a dynamic language and functions can be redefined. 576 * @param f The functio to reset. 577 */ 578 void 579 bc_func_reset(BcFunc* f); 580 581 #if BC_DEBUG 582 /** 583 * Frees a function. This is a destructor. This is only used in debug builds 584 * because all functions are freed at exit. We free them in debug builds to 585 * check for memory leaks. 586 * @param func The function to free as a void pointer. 587 */ 588 void 589 bc_func_free(void* func); 590 #endif // BC_DEBUG 591 592 /** 593 * Initializes an array, which is the array type in bc and dc source code. Since 594 * variables and arrays are both arrays (see the development manual, 595 * manuals/development.md#execution, for more information), the @a nums 596 * parameter tells bc whether to initialize an array of numbers or an array of 597 * arrays of numbers. If the latter, it does a recursive call with nums set to 598 * true. 599 * @param a The array to initialize. 600 * @param nums True if the array should be for numbers, false if it should be 601 * for vectors. 602 */ 603 void 604 bc_array_init(BcVec* a, bool nums); 605 606 /** 607 * Copies an array to another array. This is used to do pass arrays to functions 608 * that do not take references to arrays. The arrays are passed entirely by 609 * value, which means that they need to be copied. 610 * @param d The destination array. 611 * @param s The source array. 612 */ 613 void 614 bc_array_copy(BcVec* d, const BcVec* s); 615 616 /** 617 * Frees a string stored in a function. This is a destructor. 618 * @param string The string to free as a void pointer. 619 */ 620 void 621 bc_string_free(void* string); 622 623 /** 624 * Frees a constant stored in a function. This is a destructor. 625 * @param constant The constant to free as a void pointer. 626 */ 627 void 628 bc_const_free(void* constant); 629 630 /** 631 * Clears a result. It sets the type to BC_RESULT_TEMP and clears the union by 632 * clearing the BcNum in the union. This is to ensure that bc does not use 633 * uninitialized data. 634 * @param r The result to clear. 635 */ 636 void 637 bc_result_clear(BcResult* r); 638 639 /** 640 * Copies a result into another. This is done for things like duplicating the 641 * top of the results stack or copying the result of an assignment to put back 642 * on the results stack. 643 * @param d The destination result. 644 * @param src The source result. 645 */ 646 void 647 bc_result_copy(BcResult* d, BcResult* src); 648 649 /** 650 * Frees a result. This is a destructor. 651 * @param result The result to free as a void pointer. 652 */ 653 void 654 bc_result_free(void* result); 655 656 /** 657 * Expands an array to @a len. This can happen because in bc, you do not have to 658 * explicitly initialize elements of an array. If you access an element that is 659 * not initialized, the array is expanded to fit it, and all missing elements 660 * are initialized to 0 if they are numbers, or arrays with one element of 0. 661 * This function does that expansion. 662 * @param a The array to expand. 663 * @param len The length to expand to. 664 */ 665 void 666 bc_array_expand(BcVec* a, size_t len); 667 668 #if BC_ENABLED 669 670 /** 671 * Returns non-zero if the bytecode instruction i is an assignment instruction. 672 * @param i The instruction to test. 673 * @return Non-zero if i is an assignment instruction, zero otherwise. 674 */ 675 #define BC_INST_IS_ASSIGN(i) \ 676 ((i) == BC_INST_ASSIGN || (i) == BC_INST_ASSIGN_NO_VAL) 677 678 /** 679 * Returns true if the bytecode instruction @a i requires the value to be 680 * returned for use. 681 * @param i The instruction to test. 682 * @return True if @a i requires the value to be returned for use, false 683 * otherwise. 684 */ 685 #define BC_INST_USE_VAL(i) ((i) <= BC_INST_ASSIGN) 686 687 #else // BC_ENABLED 688 689 /** 690 * Returns non-zero if the bytecode instruction i is an assignment instruction. 691 * @param i The instruction to test. 692 * @return Non-zero if i is an assignment instruction, zero otherwise. 693 */ 694 #define BC_INST_IS_ASSIGN(i) ((i) == BC_INST_ASSIGN_NO_VAL) 695 696 /** 697 * Returns true if the bytecode instruction @a i requires the value to be 698 * returned for use. 699 * @param i The instruction to test. 700 * @return True if @a i requires the value to be returned for use, false 701 * otherwise. 702 */ 703 #define BC_INST_USE_VAL(i) (false) 704 705 #endif // BC_ENABLED 706 707 #if BC_DEBUG_CODE 708 /// Reference to string names for all of the instructions. For debugging. 709 extern const char* bc_inst_names[]; 710 #endif // BC_DEBUG_CODE 711 712 /// References to the names of the main and read functions. 713 extern const char bc_func_main[]; 714 extern const char bc_func_read[]; 715 716 #endif // BC_LANG_H 717