1 /* 2 * ***************************************************************************** 3 * 4 * SPDX-License-Identifier: BSD-2-Clause 5 * 6 * Copyright (c) 2018-2021 Gavin D. Howard and contributors. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions are met: 10 * 11 * * Redistributions of source code must retain the above copyright notice, this 12 * list of conditions and the following disclaimer. 13 * 14 * * Redistributions in binary form must reproduce the above copyright notice, 15 * this list of conditions and the following disclaimer in the documentation 16 * and/or other materials provided with the distribution. 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 19 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 21 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE 22 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 23 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 24 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 25 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 26 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 27 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 28 * POSSIBILITY OF SUCH DAMAGE. 29 * 30 * ***************************************************************************** 31 * 32 * Definitions for program data. 33 * 34 */ 35 36 #ifndef BC_LANG_H 37 #define BC_LANG_H 38 39 #include <stdbool.h> 40 41 #if BC_C11 42 #include <assert.h> 43 #endif // BC_C11 44 45 #include <status.h> 46 #include <vector.h> 47 #include <num.h> 48 49 /// The instructions for bytecode. 50 typedef enum BcInst 51 { 52 #if BC_ENABLED 53 54 /// Postfix increment and decrement. Prefix are translated into 55 /// BC_INST_ONE with either BC_INST_ASSIGN_PLUS or BC_INST_ASSIGN_MINUS. 56 BC_INST_INC = 0, 57 BC_INST_DEC, 58 #endif // BC_ENABLED 59 60 /// Unary negation. 61 BC_INST_NEG, 62 63 /// Boolean not. 64 BC_INST_BOOL_NOT, 65 #if BC_ENABLE_EXTRA_MATH 66 /// Truncation operator. 67 BC_INST_TRUNC, 68 #endif // BC_ENABLE_EXTRA_MATH 69 70 /// These should be self-explanatory. 71 BC_INST_POWER, 72 BC_INST_MULTIPLY, 73 BC_INST_DIVIDE, 74 BC_INST_MODULUS, 75 BC_INST_PLUS, 76 BC_INST_MINUS, 77 78 #if BC_ENABLE_EXTRA_MATH 79 80 /// Places operator. 81 BC_INST_PLACES, 82 83 /// Shift operators. 84 BC_INST_LSHIFT, 85 BC_INST_RSHIFT, 86 #endif // BC_ENABLE_EXTRA_MATH 87 88 /// Comparison operators. 89 BC_INST_REL_EQ, 90 BC_INST_REL_LE, 91 BC_INST_REL_GE, 92 BC_INST_REL_NE, 93 BC_INST_REL_LT, 94 BC_INST_REL_GT, 95 96 /// Boolean or and and. 97 BC_INST_BOOL_OR, 98 BC_INST_BOOL_AND, 99 100 #if BC_ENABLED 101 /// Same as the normal operators, but assigment. So ^=, *=, /=, etc. 102 BC_INST_ASSIGN_POWER, 103 BC_INST_ASSIGN_MULTIPLY, 104 BC_INST_ASSIGN_DIVIDE, 105 BC_INST_ASSIGN_MODULUS, 106 BC_INST_ASSIGN_PLUS, 107 BC_INST_ASSIGN_MINUS, 108 #if BC_ENABLE_EXTRA_MATH 109 /// Places and shift assignment operators. 110 BC_INST_ASSIGN_PLACES, 111 BC_INST_ASSIGN_LSHIFT, 112 BC_INST_ASSIGN_RSHIFT, 113 #endif // BC_ENABLE_EXTRA_MATH 114 115 /// Normal assignment. 116 BC_INST_ASSIGN, 117 118 /// bc and dc detect when the value from an assignment is not necessary. 119 /// For example, a plain assignment statement means the value is never used. 120 /// In those cases, we can get lots of performance back by not even creating 121 /// a copy at all. In fact, it saves a copy, a push onto the results stack, 122 /// a pop from the results stack, and a free. Definitely worth it to detect. 123 BC_INST_ASSIGN_POWER_NO_VAL, 124 BC_INST_ASSIGN_MULTIPLY_NO_VAL, 125 BC_INST_ASSIGN_DIVIDE_NO_VAL, 126 BC_INST_ASSIGN_MODULUS_NO_VAL, 127 BC_INST_ASSIGN_PLUS_NO_VAL, 128 BC_INST_ASSIGN_MINUS_NO_VAL, 129 #if BC_ENABLE_EXTRA_MATH 130 /// Same as above. 131 BC_INST_ASSIGN_PLACES_NO_VAL, 132 BC_INST_ASSIGN_LSHIFT_NO_VAL, 133 BC_INST_ASSIGN_RSHIFT_NO_VAL, 134 #endif // BC_ENABLE_EXTRA_MATH 135 #endif // BC_ENABLED 136 137 /// Normal assignment that pushes no value on the stack. 138 BC_INST_ASSIGN_NO_VAL, 139 140 /// Push a constant onto the results stack. 141 BC_INST_NUM, 142 143 /// Push a variable onto the results stack. 144 BC_INST_VAR, 145 146 /// Push an array element onto the results stack. 147 BC_INST_ARRAY_ELEM, 148 149 /// Push an array onto the results stack. This is different from pushing an 150 /// array *element* onto the results stack; it pushes a reference to the 151 /// whole array. This is needed in bc for function arguments that are 152 /// arrays. It is also needed for returning the length of an array. 153 BC_INST_ARRAY, 154 155 /// Push a zero or a one onto the stack. These are special cased because it 156 /// does help performance, particularly for one since inc/dec operators 157 /// use it. 158 BC_INST_ZERO, 159 BC_INST_ONE, 160 161 #if BC_ENABLED 162 /// Push the last printed value onto the stack. 163 BC_INST_LAST, 164 #endif // BC_ENABLED 165 166 /// Push the value of any of the globals onto the stack. 167 BC_INST_IBASE, 168 BC_INST_OBASE, 169 BC_INST_SCALE, 170 171 #if BC_ENABLE_EXTRA_MATH 172 /// Push the value of the seed global onto the stack. 173 BC_INST_SEED, 174 #endif // BC_ENABLE_EXTRA_MATH 175 176 /// These are builtin functions. 177 BC_INST_LENGTH, 178 BC_INST_SCALE_FUNC, 179 BC_INST_SQRT, 180 BC_INST_ABS, 181 182 #if BC_ENABLE_EXTRA_MATH 183 /// Another builtin function. 184 BC_INST_IRAND, 185 #endif // BC_ENABLE_EXTRA_MATH 186 187 /// Asciify. 188 BC_INST_ASCIIFY, 189 190 /// Another builtin function. 191 BC_INST_READ, 192 193 #if BC_ENABLE_EXTRA_MATH 194 /// Another builtin function. 195 BC_INST_RAND, 196 #endif // BC_ENABLE_EXTRA_MATH 197 198 /// Return the max for the various globals. 199 BC_INST_MAXIBASE, 200 BC_INST_MAXOBASE, 201 BC_INST_MAXSCALE, 202 #if BC_ENABLE_EXTRA_MATH 203 /// Return the max value returned by rand(). 204 BC_INST_MAXRAND, 205 #endif // BC_ENABLE_EXTRA_MATH 206 207 /// bc line_length() builtin function. 208 BC_INST_LINE_LENGTH, 209 210 #if BC_ENABLED 211 212 /// bc global_stacks() builtin function. 213 BC_INST_GLOBAL_STACKS, 214 215 #endif // BC_ENABLED 216 217 /// bc leading_zero() builtin function. 218 BC_INST_LEADING_ZERO, 219 220 /// This is slightly misnamed versus BC_INST_PRINT_POP. Well, it is in bc. 221 /// dc uses this instruction to print, but not pop. That's valid in dc. 222 /// However, in bc, it is *never* valid to print without popping. In bc, 223 /// BC_INST_PRINT_POP is used to indicate when a string should be printed 224 /// because of a print statement or whether it should be printed raw. The 225 /// reason for this is because a print statement handles escaped characters. 226 /// So BC_INST_PRINT_POP is for printing a string from a print statement, 227 /// BC_INST_PRINT_STR is for printing a string by itself. 228 /// 229 /// In dc, BC_INST_PRINT_POP prints and pops, and BC_INST_PRINT just prints. 230 /// 231 /// Oh, and BC_INST_STR pushes a string onto the results stack. 232 BC_INST_PRINT, 233 BC_INST_PRINT_POP, 234 BC_INST_STR, 235 #if BC_ENABLED 236 BC_INST_PRINT_STR, 237 238 /// Jumps unconditionally. 239 BC_INST_JUMP, 240 241 /// Jumps if the top of the results stack is zero (condition failed). It 242 /// turns out that we only want to jump when conditions fail to "skip" code. 243 BC_INST_JUMP_ZERO, 244 245 /// Call a function. 246 BC_INST_CALL, 247 248 /// Return the top of the stack to the caller. 249 BC_INST_RET, 250 251 /// Return 0 to the caller. 252 BC_INST_RET0, 253 254 /// Special return instruction for void functions. 255 BC_INST_RET_VOID, 256 257 /// Special halt instruction. 258 BC_INST_HALT, 259 #endif // BC_ENABLED 260 261 /// Pop an item off of the results stack. 262 BC_INST_POP, 263 264 /// Swaps the top two items on the results stack. 265 BC_INST_SWAP, 266 267 /// Modular exponentiation. 268 BC_INST_MODEXP, 269 270 /// Do divide and modulus at the same time. 271 BC_INST_DIVMOD, 272 273 /// Turns a number into a string and prints it. 274 BC_INST_PRINT_STREAM, 275 276 #if DC_ENABLED 277 278 /// dc's return; it pops an executing string off of the stack. 279 BC_INST_POP_EXEC, 280 281 /// Unconditionally execute a string. 282 BC_INST_EXECUTE, 283 284 /// Conditionally execute a string. 285 BC_INST_EXEC_COND, 286 287 /// Prints each item on the results stack, separated by newlines. 288 BC_INST_PRINT_STACK, 289 290 /// Pops everything off of the results stack. 291 BC_INST_CLEAR_STACK, 292 293 /// Pushes the current length of a register stack onto the results stack. 294 BC_INST_REG_STACK_LEN, 295 296 /// Pushes the current length of the results stack onto the results stack. 297 BC_INST_STACK_LEN, 298 299 /// Pushes a copy of the item on the top of the results stack onto the 300 /// results stack. 301 BC_INST_DUPLICATE, 302 303 /// Copies the value in a register and pushes the copy onto the results 304 /// stack. 305 BC_INST_LOAD, 306 307 /// Pops an item off of a register stack and pushes it onto the results 308 /// stack. 309 BC_INST_PUSH_VAR, 310 311 /// Pops an item off of the results stack and pushes it onto a register's 312 /// stack. 313 BC_INST_PUSH_TO_VAR, 314 315 /// Quit. 316 BC_INST_QUIT, 317 318 /// Quit executing some number of strings. 319 BC_INST_NQUIT, 320 321 /// Push the depth of the execution stack onto the stack. 322 BC_INST_EXEC_STACK_LEN, 323 324 #endif // DC_ENABLED 325 326 /// Invalid instruction. 327 BC_INST_INVALID, 328 329 } BcInst; 330 331 #if BC_C11 332 _Static_assert(BC_INST_INVALID <= UCHAR_MAX, 333 "Too many instructions to fit into an unsigned char"); 334 #endif // BC_C11 335 336 /// Used by maps to identify where items are in the array. 337 typedef struct BcId 338 { 339 /// The name of the item. 340 char* name; 341 342 /// The index into the array where the item is. 343 size_t idx; 344 345 } BcId; 346 347 /// The location of a var, array, or array element. 348 typedef struct BcLoc 349 { 350 /// The index of the var or array. 351 size_t loc; 352 353 /// The index of the array element. Only used for array elements. 354 size_t idx; 355 356 } BcLoc; 357 358 /// An entry for a constant. 359 typedef struct BcConst 360 { 361 /// The original string as parsed from the source code. 362 char* val; 363 364 /// The last base that the constant was parsed in. 365 BcBigDig base; 366 367 /// The parsed constant. 368 BcNum num; 369 370 } BcConst; 371 372 /// A function. This is also used in dc, not just bc. The reason is that strings 373 /// are executed in dc, and they are converted to functions in order to be 374 /// executed. 375 typedef struct BcFunc 376 { 377 /// The bytecode instructions. 378 BcVec code; 379 380 #if BC_ENABLED 381 382 /// The labels. This is a vector of indices. The index is the index into 383 /// the bytecode vector where the label is. 384 BcVec labels; 385 386 /// The autos for the function. The first items are the parameters, and the 387 /// arguments to the parameters must match the types in this vector. 388 BcVec autos; 389 390 /// The number of parameters the function takes. 391 size_t nparams; 392 393 #endif // BC_ENABLED 394 395 /// The strings encountered in the function. 396 BcVec strs; 397 398 /// The constants encountered in the function. 399 BcVec consts; 400 401 /// The function's name. 402 const char* name; 403 404 #if BC_ENABLED 405 /// True if the function is a void function. 406 bool voidfn; 407 #endif // BC_ENABLED 408 409 } BcFunc; 410 411 /// Types of results that can be pushed onto the results stack. 412 typedef enum BcResultType 413 { 414 /// Result is a variable. 415 BC_RESULT_VAR, 416 417 /// Result is an array element. 418 BC_RESULT_ARRAY_ELEM, 419 420 /// Result is an array. This is only allowed for function arguments or 421 /// returning the length of the array. 422 BC_RESULT_ARRAY, 423 424 /// Result is a string. 425 BC_RESULT_STR, 426 427 /// Result is a temporary. This is used for the result of almost all 428 /// expressions. 429 BC_RESULT_TEMP, 430 431 /// Special casing the two below gave performance improvements. 432 433 /// Result is a 0. 434 BC_RESULT_ZERO, 435 436 /// Result is a 1. Useful for inc/dec operators. 437 BC_RESULT_ONE, 438 439 #if BC_ENABLED 440 441 /// Result is the special "last" variable. 442 BC_RESULT_LAST, 443 444 /// Result is the return value of a void function. 445 BC_RESULT_VOID, 446 #endif // BC_ENABLED 447 448 /// Result is the value of ibase. 449 BC_RESULT_IBASE, 450 451 /// Result is the value of obase. 452 BC_RESULT_OBASE, 453 454 /// Result is the value of scale. 455 BC_RESULT_SCALE, 456 457 #if BC_ENABLE_EXTRA_MATH 458 459 /// Result is the value of seed. 460 BC_RESULT_SEED, 461 462 #endif // BC_ENABLE_EXTRA_MATH 463 464 } BcResultType; 465 466 /// A union to store data for various result types. 467 typedef union BcResultData 468 { 469 /// A number. Strings are stored here too; they are numbers with 470 /// cap == 0 && num == NULL. The string's index into the strings vector is 471 /// stored in the scale field. But this is only used for strings stored in 472 /// variables. 473 BcNum n; 474 475 /// A vector. 476 BcVec v; 477 478 /// A variable, array, or array element reference. This could also be a 479 /// string if a string is not stored in a variable (dc only). 480 BcLoc loc; 481 482 } BcResultData; 483 484 /// A tagged union for results. 485 typedef struct BcResult 486 { 487 /// The tag. The type of the result. 488 BcResultType t; 489 490 /// The data. The data for the result. 491 BcResultData d; 492 493 } BcResult; 494 495 /// An instruction pointer. This is how bc knows where in the bytecode vector, 496 /// and which function, the current execution is. 497 typedef struct BcInstPtr 498 { 499 /// The index of the currently executing function in the fns vector. 500 size_t func; 501 502 /// The index into the bytecode vector of the *next* instruction. 503 size_t idx; 504 505 /// The length of the results vector when this function started executing. 506 /// This is mostly used for bc where functions should not affect the results 507 /// of their callers. 508 size_t len; 509 510 } BcInstPtr; 511 512 /// Types of identifiers. 513 typedef enum BcType 514 { 515 /// Variable. 516 BC_TYPE_VAR, 517 518 /// Array. 519 BC_TYPE_ARRAY, 520 521 #if BC_ENABLED 522 523 /// Array reference. 524 BC_TYPE_REF, 525 526 #endif // BC_ENABLED 527 528 } BcType; 529 530 #if BC_ENABLED 531 /// An auto variable in bc. 532 typedef struct BcAuto 533 { 534 /// The index of the variable in the vars or arrs vectors. 535 size_t idx; 536 537 /// The type of the variable. 538 BcType type; 539 540 } BcAuto; 541 #endif // BC_ENABLED 542 543 /// Forward declaration. 544 struct BcProgram; 545 546 /** 547 * Initializes a function. 548 * @param f The function to initialize. 549 * @param name The name of the function. The string is assumed to be owned by 550 * some other entity. 551 */ 552 void 553 bc_func_init(BcFunc* f, const char* name); 554 555 /** 556 * Inserts an auto into the function. 557 * @param f The function to insert into. 558 * @param p The program. This is to search for the variable or array name. 559 * @param name The name of the auto to insert. 560 * @param type The type of the auto. 561 * @param line The line in the source code where the insert happened. This is 562 * solely for error reporting. 563 */ 564 void 565 bc_func_insert(BcFunc* f, struct BcProgram* p, char* name, BcType type, 566 size_t line); 567 568 /** 569 * Resets a function in preparation for it to be reused. This can happen in bc 570 * because it is a dynamic language and functions can be redefined. 571 * @param f The functio to reset. 572 */ 573 void 574 bc_func_reset(BcFunc* f); 575 576 #ifndef NDEBUG 577 /** 578 * Frees a function. This is a destructor. This is only used in debug builds 579 * because all functions are freed at exit. We free them in debug builds to 580 * check for memory leaks. 581 * @param func The function to free as a void pointer. 582 */ 583 void 584 bc_func_free(void* func); 585 #endif // NDEBUG 586 587 /** 588 * Initializes an array, which is the array type in bc and dc source code. Since 589 * variables and arrays are both arrays (see the development manual, 590 * manuals/development.md#execution, for more information), the @a nums 591 * parameter tells bc whether to initialize an array of numbers or an array of 592 * arrays of numbers. If the latter, it does a recursive call with nums set to 593 * true. 594 * @param a The array to initialize. 595 * @param nums True if the array should be for numbers, false if it should be 596 * for vectors. 597 */ 598 void 599 bc_array_init(BcVec* a, bool nums); 600 601 /** 602 * Copies an array to another array. This is used to do pass arrays to functions 603 * that do not take references to arrays. The arrays are passed entirely by 604 * value, which means that they need to be copied. 605 * @param d The destination array. 606 * @param s The source array. 607 */ 608 void 609 bc_array_copy(BcVec* d, const BcVec* s); 610 611 /** 612 * Frees a string stored in a function. This is a destructor. 613 * @param string The string to free as a void pointer. 614 */ 615 void 616 bc_string_free(void* string); 617 618 /** 619 * Frees a constant stored in a function. This is a destructor. 620 * @param constant The constant to free as a void pointer. 621 */ 622 void 623 bc_const_free(void* constant); 624 625 /** 626 * Clears a result. It sets the type to BC_RESULT_TEMP and clears the union by 627 * clearing the BcNum in the union. This is to ensure that bc does not use 628 * uninitialized data. 629 * @param r The result to clear. 630 */ 631 void 632 bc_result_clear(BcResult* r); 633 634 /** 635 * Copies a result into another. This is done for things like duplicating the 636 * top of the results stack or copying the result of an assignment to put back 637 * on the results stack. 638 * @param d The destination result. 639 * @param src The source result. 640 */ 641 void 642 bc_result_copy(BcResult* d, BcResult* src); 643 644 /** 645 * Frees a result. This is a destructor. 646 * @param result The result to free as a void pointer. 647 */ 648 void 649 bc_result_free(void* result); 650 651 /** 652 * Expands an array to @a len. This can happen because in bc, you do not have to 653 * explicitly initialize elements of an array. If you access an element that is 654 * not initialized, the array is expanded to fit it, and all missing elements 655 * are initialized to 0 if they are numbers, or arrays with one element of 0. 656 * This function does that expansion. 657 * @param a The array to expand. 658 * @param len The length to expand to. 659 */ 660 void 661 bc_array_expand(BcVec* a, size_t len); 662 663 /** 664 * Compare two BcId's and return the result. Since they are just comparing the 665 * names in the BcId, I return the result from strcmp() exactly. This is used by 666 * maps in their binary search. 667 * @param e1 The first id. 668 * @param e2 The second id. 669 * @return The result of strcmp() on the BcId's names. 670 */ 671 int 672 bc_id_cmp(const BcId* e1, const BcId* e2); 673 674 #if BC_ENABLED 675 676 /** 677 * Returns non-zero if the bytecode instruction i is an assignment instruction. 678 * @param i The instruction to test. 679 * @return Non-zero if i is an assignment instruction, zero otherwise. 680 */ 681 #define BC_INST_IS_ASSIGN(i) \ 682 ((i) == BC_INST_ASSIGN || (i) == BC_INST_ASSIGN_NO_VAL) 683 684 /** 685 * Returns true if the bytecode instruction @a i requires the value to be 686 * returned for use. 687 * @param i The instruction to test. 688 * @return True if @a i requires the value to be returned for use, false 689 * otherwise. 690 */ 691 #define BC_INST_USE_VAL(i) ((i) <= BC_INST_ASSIGN) 692 693 #else // BC_ENABLED 694 695 /** 696 * Returns non-zero if the bytecode instruction i is an assignment instruction. 697 * @param i The instruction to test. 698 * @return Non-zero if i is an assignment instruction, zero otherwise. 699 */ 700 #define BC_INST_IS_ASSIGN(i) ((i) == BC_INST_ASSIGN_NO_VAL) 701 702 /** 703 * Returns true if the bytecode instruction @a i requires the value to be 704 * returned for use. 705 * @param i The instruction to test. 706 * @return True if @a i requires the value to be returned for use, false 707 * otherwise. 708 */ 709 #define BC_INST_USE_VAL(i) (false) 710 711 #endif // BC_ENABLED 712 713 #if BC_DEBUG_CODE 714 /// Reference to string names for all of the instructions. For debugging. 715 extern const char* bc_inst_names[]; 716 #endif // BC_DEBUG_CODE 717 718 /// References to the names of the main and read functions. 719 extern const char bc_func_main[]; 720 extern const char bc_func_read[]; 721 722 #endif // BC_LANG_H 723