1 /* 2 * ***************************************************************************** 3 * 4 * SPDX-License-Identifier: BSD-2-Clause 5 * 6 * Copyright (c) 2018-2021 Gavin D. Howard and contributors. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions are met: 10 * 11 * * Redistributions of source code must retain the above copyright notice, this 12 * list of conditions and the following disclaimer. 13 * 14 * * Redistributions in binary form must reproduce the above copyright notice, 15 * this list of conditions and the following disclaimer in the documentation 16 * and/or other materials provided with the distribution. 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 19 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 21 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE 22 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 23 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 24 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 25 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 26 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 27 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 28 * POSSIBILITY OF SUCH DAMAGE. 29 * 30 * ***************************************************************************** 31 * 32 * Definitions for program data. 33 * 34 */ 35 36 #ifndef BC_LANG_H 37 #define BC_LANG_H 38 39 #include <stdbool.h> 40 #if BC_C11 41 #include <assert.h> 42 #endif // BC_C11 43 44 #include <status.h> 45 #include <vector.h> 46 #include <num.h> 47 48 /// The instructions for bytecode. 49 typedef enum BcInst { 50 51 #if BC_ENABLED 52 53 /// Postfix increment and decrement. Prefix are translated into 54 /// BC_INST_ONE with either BC_INST_ASSIGN_PLUS or BC_INST_ASSIGN_MINUS. 55 BC_INST_INC = 0, 56 BC_INST_DEC, 57 #endif // BC_ENABLED 58 59 /// Unary negation. 60 BC_INST_NEG, 61 62 /// Boolean not. 63 BC_INST_BOOL_NOT, 64 #if BC_ENABLE_EXTRA_MATH 65 /// Truncation operator. 66 BC_INST_TRUNC, 67 #endif // BC_ENABLE_EXTRA_MATH 68 69 /// These should be self-explanatory. 70 BC_INST_POWER, 71 BC_INST_MULTIPLY, 72 BC_INST_DIVIDE, 73 BC_INST_MODULUS, 74 BC_INST_PLUS, 75 BC_INST_MINUS, 76 77 #if BC_ENABLE_EXTRA_MATH 78 79 /// Places operator. 80 BC_INST_PLACES, 81 82 /// Shift operators. 83 BC_INST_LSHIFT, 84 BC_INST_RSHIFT, 85 #endif // BC_ENABLE_EXTRA_MATH 86 87 /// Comparison operators. 88 BC_INST_REL_EQ, 89 BC_INST_REL_LE, 90 BC_INST_REL_GE, 91 BC_INST_REL_NE, 92 BC_INST_REL_LT, 93 BC_INST_REL_GT, 94 95 /// Boolean or and and. 96 BC_INST_BOOL_OR, 97 BC_INST_BOOL_AND, 98 99 #if BC_ENABLED 100 /// Same as the normal operators, but assigment. So ^=, *=, /=, etc. 101 BC_INST_ASSIGN_POWER, 102 BC_INST_ASSIGN_MULTIPLY, 103 BC_INST_ASSIGN_DIVIDE, 104 BC_INST_ASSIGN_MODULUS, 105 BC_INST_ASSIGN_PLUS, 106 BC_INST_ASSIGN_MINUS, 107 #if BC_ENABLE_EXTRA_MATH 108 /// Places and shift assignment operators. 109 BC_INST_ASSIGN_PLACES, 110 BC_INST_ASSIGN_LSHIFT, 111 BC_INST_ASSIGN_RSHIFT, 112 #endif // BC_ENABLE_EXTRA_MATH 113 114 /// Normal assignment. 115 BC_INST_ASSIGN, 116 117 /// bc and dc detect when the value from an assignment is not necessary. 118 /// For example, a plain assignment statement means the value is never used. 119 /// In those cases, we can get lots of performance back by not even creating 120 /// a copy at all. In fact, it saves a copy, a push onto the results stack, 121 /// a pop from the results stack, and a free. Definitely worth it to detect. 122 BC_INST_ASSIGN_POWER_NO_VAL, 123 BC_INST_ASSIGN_MULTIPLY_NO_VAL, 124 BC_INST_ASSIGN_DIVIDE_NO_VAL, 125 BC_INST_ASSIGN_MODULUS_NO_VAL, 126 BC_INST_ASSIGN_PLUS_NO_VAL, 127 BC_INST_ASSIGN_MINUS_NO_VAL, 128 #if BC_ENABLE_EXTRA_MATH 129 /// Same as above. 130 BC_INST_ASSIGN_PLACES_NO_VAL, 131 BC_INST_ASSIGN_LSHIFT_NO_VAL, 132 BC_INST_ASSIGN_RSHIFT_NO_VAL, 133 #endif // BC_ENABLE_EXTRA_MATH 134 #endif // BC_ENABLED 135 136 /// Normal assignment that pushes no value on the stack. 137 BC_INST_ASSIGN_NO_VAL, 138 139 /// Push a constant onto the results stack. 140 BC_INST_NUM, 141 142 /// Push a variable onto the results stack. 143 BC_INST_VAR, 144 145 /// Push an array element onto the results stack. 146 BC_INST_ARRAY_ELEM, 147 148 /// Push an array onto the results stack. This is different from pushing an 149 /// array *element* onto the results stack; it pushes a reference to the 150 /// whole array. This is needed in bc for function arguments that are 151 /// arrays. It is also needed for returning the length of an array. 152 BC_INST_ARRAY, 153 154 /// Push a zero or a one onto the stack. These are special cased because it 155 /// does help performance, particularly for one since inc/dec operators 156 /// use it. 157 BC_INST_ZERO, 158 BC_INST_ONE, 159 160 #if BC_ENABLED 161 /// Push the last printed value onto the stack. 162 BC_INST_LAST, 163 #endif // BC_ENABLED 164 165 /// Push the value of any of the globals onto the stack. 166 BC_INST_IBASE, 167 BC_INST_OBASE, 168 BC_INST_SCALE, 169 170 #if BC_ENABLE_EXTRA_MATH 171 /// Push the value of the seed global onto the stack. 172 BC_INST_SEED, 173 #endif // BC_ENABLE_EXTRA_MATH 174 175 /// These are builtin functions. 176 BC_INST_LENGTH, 177 BC_INST_SCALE_FUNC, 178 BC_INST_SQRT, 179 BC_INST_ABS, 180 181 #if BC_ENABLE_EXTRA_MATH 182 /// Another builtin function. 183 BC_INST_IRAND, 184 #endif // BC_ENABLE_EXTRA_MATH 185 186 /// Asciify. 187 BC_INST_ASCIIFY, 188 189 /// Another builtin function. 190 BC_INST_READ, 191 192 #if BC_ENABLE_EXTRA_MATH 193 /// Another builtin function. 194 BC_INST_RAND, 195 #endif // BC_ENABLE_EXTRA_MATH 196 197 /// Return the max for the various globals. 198 BC_INST_MAXIBASE, 199 BC_INST_MAXOBASE, 200 BC_INST_MAXSCALE, 201 #if BC_ENABLE_EXTRA_MATH 202 /// Return the max value returned by rand(). 203 BC_INST_MAXRAND, 204 #endif // BC_ENABLE_EXTRA_MATH 205 206 /// bc line_length() builtin function. 207 BC_INST_LINE_LENGTH, 208 209 #if BC_ENABLED 210 211 /// bc global_stacks() builtin function. 212 BC_INST_GLOBAL_STACKS, 213 214 #endif // BC_ENABLED 215 216 /// bc leading_zero() builtin function. 217 BC_INST_LEADING_ZERO, 218 219 /// This is slightly misnamed versus BC_INST_PRINT_POP. Well, it is in bc. 220 /// dc uses this instruction to print, but not pop. That's valid in dc. 221 /// However, in bc, it is *never* valid to print without popping. In bc, 222 /// BC_INST_PRINT_POP is used to indicate when a string should be printed 223 /// because of a print statement or whether it should be printed raw. The 224 /// reason for this is because a print statement handles escaped characters. 225 /// So BC_INST_PRINT_POP is for printing a string from a print statement, 226 /// BC_INST_PRINT_STR is for printing a string by itself. 227 /// 228 /// In dc, BC_INST_PRINT_POP prints and pops, and BC_INST_PRINT just prints. 229 /// 230 /// Oh, and BC_INST_STR pushes a string onto the results stack. 231 BC_INST_PRINT, 232 BC_INST_PRINT_POP, 233 BC_INST_STR, 234 #if BC_ENABLED 235 BC_INST_PRINT_STR, 236 237 /// Jumps unconditionally. 238 BC_INST_JUMP, 239 240 /// Jumps if the top of the results stack is zero (condition failed). It 241 /// turns out that we only want to jump when conditions fail to "skip" code. 242 BC_INST_JUMP_ZERO, 243 244 /// Call a function. 245 BC_INST_CALL, 246 247 /// Return the top of the stack to the caller. 248 BC_INST_RET, 249 250 /// Return 0 to the caller. 251 BC_INST_RET0, 252 253 /// Special return instruction for void functions. 254 BC_INST_RET_VOID, 255 256 /// Special halt instruction. 257 BC_INST_HALT, 258 #endif // BC_ENABLED 259 260 /// Pop an item off of the results stack. 261 BC_INST_POP, 262 263 /// Swaps the top two items on the results stack. 264 BC_INST_SWAP, 265 266 /// Modular exponentiation. 267 BC_INST_MODEXP, 268 269 /// Do divide and modulus at the same time. 270 BC_INST_DIVMOD, 271 272 /// Turns a number into a string and prints it. 273 BC_INST_PRINT_STREAM, 274 275 #if DC_ENABLED 276 277 /// dc's return; it pops an executing string off of the stack. 278 BC_INST_POP_EXEC, 279 280 /// Unconditionally execute a string. 281 BC_INST_EXECUTE, 282 283 /// Conditionally execute a string. 284 BC_INST_EXEC_COND, 285 286 /// Prints each item on the results stack, separated by newlines. 287 BC_INST_PRINT_STACK, 288 289 /// Pops everything off of the results stack. 290 BC_INST_CLEAR_STACK, 291 292 /// Pushes the current length of a register stack onto the results stack. 293 BC_INST_REG_STACK_LEN, 294 295 /// Pushes the current length of the results stack onto the results stack. 296 BC_INST_STACK_LEN, 297 298 /// Pushes a copy of the item on the top of the results stack onto the 299 /// results stack. 300 BC_INST_DUPLICATE, 301 302 /// Copies the value in a register and pushes the copy onto the results 303 /// stack. 304 BC_INST_LOAD, 305 306 /// Pops an item off of a register stack and pushes it onto the results 307 /// stack. 308 BC_INST_PUSH_VAR, 309 310 /// Pops an item off of the results stack and pushes it onto a register's 311 /// stack. 312 BC_INST_PUSH_TO_VAR, 313 314 /// Quit. 315 BC_INST_QUIT, 316 317 /// Quit executing some number of strings. 318 BC_INST_NQUIT, 319 320 /// Push the depth of the execution stack onto the stack. 321 BC_INST_EXEC_STACK_LEN, 322 323 #endif // DC_ENABLED 324 325 /// Invalid instruction. 326 BC_INST_INVALID, 327 328 } BcInst; 329 330 #if BC_C11 331 static_assert(BC_INST_INVALID <= UCHAR_MAX, 332 "Too many instructions to fit into an unsigned char"); 333 #endif // BC_C11 334 335 /// Used by maps to identify where items are in the array. 336 typedef struct BcId { 337 338 /// The name of the item. 339 char *name; 340 341 /// The index into the array where the item is. 342 size_t idx; 343 344 } BcId; 345 346 /// The location of a var, array, or array element. 347 typedef struct BcLoc { 348 349 /// The index of the var or array. 350 size_t loc; 351 352 /// The index of the array element. Only used for array elements. 353 size_t idx; 354 355 } BcLoc; 356 357 /// An entry for a constant. 358 typedef struct BcConst { 359 360 /// The original string as parsed from the source code. 361 char *val; 362 363 /// The last base that the constant was parsed in. 364 BcBigDig base; 365 366 /// The parsed constant. 367 BcNum num; 368 369 } BcConst; 370 371 /// A function. This is also used in dc, not just bc. The reason is that strings 372 /// are executed in dc, and they are converted to functions in order to be 373 /// executed. 374 typedef struct BcFunc { 375 376 /// The bytecode instructions. 377 BcVec code; 378 379 #if BC_ENABLED 380 381 /// The labels. This is a vector of indices. The index is the index into 382 /// the bytecode vector where the label is. 383 BcVec labels; 384 385 /// The autos for the function. The first items are the parameters, and the 386 /// arguments to the parameters must match the types in this vector. 387 BcVec autos; 388 389 /// The number of parameters the function takes. 390 size_t nparams; 391 392 #endif // BC_ENABLED 393 394 /// The strings encountered in the function. 395 BcVec strs; 396 397 /// The constants encountered in the function. 398 BcVec consts; 399 400 /// The function's name. 401 const char *name; 402 403 #if BC_ENABLED 404 /// True if the function is a void function. 405 bool voidfn; 406 #endif // BC_ENABLED 407 408 } BcFunc; 409 410 /// Types of results that can be pushed onto the results stack. 411 typedef enum BcResultType { 412 413 /// Result is a variable. 414 BC_RESULT_VAR, 415 416 /// Result is an array element. 417 BC_RESULT_ARRAY_ELEM, 418 419 /// Result is an array. This is only allowed for function arguments or 420 /// returning the length of the array. 421 BC_RESULT_ARRAY, 422 423 /// Result is a string. 424 BC_RESULT_STR, 425 426 /// Result is a temporary. This is used for the result of almost all 427 /// expressions. 428 BC_RESULT_TEMP, 429 430 /// Special casing the two below gave performance improvements. 431 432 /// Result is a 0. 433 BC_RESULT_ZERO, 434 435 /// Result is a 1. Useful for inc/dec operators. 436 BC_RESULT_ONE, 437 438 #if BC_ENABLED 439 440 /// Result is the special "last" variable. 441 BC_RESULT_LAST, 442 443 /// Result is the return value of a void function. 444 BC_RESULT_VOID, 445 #endif // BC_ENABLED 446 447 /// Result is the value of ibase. 448 BC_RESULT_IBASE, 449 450 /// Result is the value of obase. 451 BC_RESULT_OBASE, 452 453 /// Result is the value of scale. 454 BC_RESULT_SCALE, 455 456 #if BC_ENABLE_EXTRA_MATH 457 458 /// Result is the value of seed. 459 BC_RESULT_SEED, 460 461 #endif // BC_ENABLE_EXTRA_MATH 462 463 } BcResultType; 464 465 /// A union to store data for various result types. 466 typedef union BcResultData { 467 468 /// A number. Strings are stored here too; they are numbers with 469 /// cap == 0 && num == NULL. The string's index into the strings vector is 470 /// stored in the scale field. But this is only used for strings stored in 471 /// variables. 472 BcNum n; 473 474 /// A vector. 475 BcVec v; 476 477 /// A variable, array, or array element reference. This could also be a 478 /// string if a string is not stored in a variable (dc only). 479 BcLoc loc; 480 481 } BcResultData; 482 483 /// A tagged union for results. 484 typedef struct BcResult { 485 486 /// The tag. The type of the result. 487 BcResultType t; 488 489 /// The data. The data for the result. 490 BcResultData d; 491 492 } BcResult; 493 494 /// An instruction pointer. This is how bc knows where in the bytecode vector, 495 /// and which function, the current execution is. 496 typedef struct BcInstPtr { 497 498 /// The index of the currently executing function in the fns vector. 499 size_t func; 500 501 /// The index into the bytecode vector of the *next* instruction. 502 size_t idx; 503 504 /// The length of the results vector when this function started executing. 505 /// This is mostly used for bc where functions should not affect the results 506 /// of their callers. 507 size_t len; 508 509 } BcInstPtr; 510 511 /// Types of identifiers. 512 typedef enum BcType { 513 514 /// Variable. 515 BC_TYPE_VAR, 516 517 /// Array. 518 BC_TYPE_ARRAY, 519 520 #if BC_ENABLED 521 522 /// Array reference. 523 BC_TYPE_REF, 524 525 #endif // BC_ENABLED 526 527 } BcType; 528 529 #if BC_ENABLED 530 /// An auto variable in bc. 531 typedef struct BcAuto { 532 533 /// The index of the variable in the vars or arrs vectors. 534 size_t idx; 535 536 /// The type of the variable. 537 BcType type; 538 539 } BcAuto; 540 #endif // BC_ENABLED 541 542 /// Forward declaration. 543 struct BcProgram; 544 545 /** 546 * Initializes a function. 547 * @param f The function to initialize. 548 * @param name The name of the function. The string is assumed to be owned by 549 * some other entity. 550 */ 551 void bc_func_init(BcFunc *f, const char* name); 552 553 /** 554 * Inserts an auto into the function. 555 * @param f The function to insert into. 556 * @param p The program. This is to search for the variable or array name. 557 * @param name The name of the auto to insert. 558 * @param type The type of the auto. 559 * @param line The line in the source code where the insert happened. This is 560 * solely for error reporting. 561 */ 562 void bc_func_insert(BcFunc *f, struct BcProgram* p, char* name, 563 BcType type, size_t line); 564 565 /** 566 * Resets a function in preparation for it to be reused. This can happen in bc 567 * because it is a dynamic language and functions can be redefined. 568 * @param f The functio to reset. 569 */ 570 void bc_func_reset(BcFunc *f); 571 572 #ifndef NDEBUG 573 /** 574 * Frees a function. This is a destructor. This is only used in debug builds 575 * because all functions are freed at exit. We free them in debug builds to 576 * check for memory leaks. 577 * @param func The function to free as a void pointer. 578 */ 579 void bc_func_free(void *func); 580 #endif // NDEBUG 581 582 /** 583 * Initializes an array, which is the array type in bc and dc source code. Since 584 * variables and arrays are both arrays (see the development manual, 585 * manuals/development.md#execution, for more information), the @a nums 586 * parameter tells bc whether to initialize an array of numbers or an array of 587 * arrays of numbers. If the latter, it does a recursive call with nums set to 588 * true. 589 * @param a The array to initialize. 590 * @param nums True if the array should be for numbers, false if it should be 591 * for vectors. 592 */ 593 void bc_array_init(BcVec *a, bool nums); 594 595 /** 596 * Copies an array to another array. This is used to do pass arrays to functions 597 * that do not take references to arrays. The arrays are passed entirely by 598 * value, which means that they need to be copied. 599 * @param d The destination array. 600 * @param s The source array. 601 */ 602 void bc_array_copy(BcVec *d, const BcVec *s); 603 604 /** 605 * Frees a string stored in a function. This is a destructor. 606 * @param string The string to free as a void pointer. 607 */ 608 void bc_string_free(void *string); 609 610 /** 611 * Frees a constant stored in a function. This is a destructor. 612 * @param constant The constant to free as a void pointer. 613 */ 614 void bc_const_free(void *constant); 615 616 /** 617 * Clears a result. It sets the type to BC_RESULT_TEMP and clears the union by 618 * clearing the BcNum in the union. This is to ensure that bc does not use 619 * uninitialized data. 620 * @param r The result to clear. 621 */ 622 void bc_result_clear(BcResult *r); 623 624 /** 625 * Copies a result into another. This is done for things like duplicating the 626 * top of the results stack or copying the result of an assignment to put back 627 * on the results stack. 628 * @param d The destination result. 629 * @param src The source result. 630 */ 631 void bc_result_copy(BcResult *d, BcResult *src); 632 633 /** 634 * Frees a result. This is a destructor. 635 * @param result The result to free as a void pointer. 636 */ 637 void bc_result_free(void *result); 638 639 /** 640 * Expands an array to @a len. This can happen because in bc, you do not have to 641 * explicitly initialize elements of an array. If you access an element that is 642 * not initialized, the array is expanded to fit it, and all missing elements 643 * are initialized to 0 if they are numbers, or arrays with one element of 0. 644 * This function does that expansion. 645 * @param a The array to expand. 646 * @param len The length to expand to. 647 */ 648 void bc_array_expand(BcVec *a, size_t len); 649 650 /** 651 * Compare two BcId's and return the result. Since they are just comparing the 652 * names in the BcId, I return the result from strcmp() exactly. This is used by 653 * maps in their binary search. 654 * @param e1 The first id. 655 * @param e2 The second id. 656 * @return The result of strcmp() on the BcId's names. 657 */ 658 int bc_id_cmp(const BcId *e1, const BcId *e2); 659 660 #if BC_ENABLED 661 662 /** 663 * Returns non-zero if the bytecode instruction i is an assignment instruction. 664 * @param i The instruction to test. 665 * @return Non-zero if i is an assignment instruction, zero otherwise. 666 */ 667 #define BC_INST_IS_ASSIGN(i) \ 668 ((i) == BC_INST_ASSIGN || (i) == BC_INST_ASSIGN_NO_VAL) 669 670 /** 671 * Returns true if the bytecode instruction @a i requires the value to be 672 * returned for use. 673 * @param i The instruction to test. 674 * @return True if @a i requires the value to be returned for use, false 675 * otherwise. 676 */ 677 #define BC_INST_USE_VAL(i) ((i) <= BC_INST_ASSIGN) 678 679 #else // BC_ENABLED 680 681 /** 682 * Returns non-zero if the bytecode instruction i is an assignment instruction. 683 * @param i The instruction to test. 684 * @return Non-zero if i is an assignment instruction, zero otherwise. 685 */ 686 #define BC_INST_IS_ASSIGN(i) ((i) == BC_INST_ASSIGN_NO_VAL) 687 688 /** 689 * Returns true if the bytecode instruction @a i requires the value to be 690 * returned for use. 691 * @param i The instruction to test. 692 * @return True if @a i requires the value to be returned for use, false 693 * otherwise. 694 */ 695 #define BC_INST_USE_VAL(i) (false) 696 697 #endif // BC_ENABLED 698 699 #if BC_DEBUG_CODE 700 /// Reference to string names for all of the instructions. For debugging. 701 extern const char* bc_inst_names[]; 702 #endif // BC_DEBUG_CODE 703 704 /// References to the names of the main and read functions. 705 extern const char bc_func_main[]; 706 extern const char bc_func_read[]; 707 708 #endif // BC_LANG_H 709