1 /* 2 * ***************************************************************************** 3 * 4 * SPDX-License-Identifier: BSD-2-Clause 5 * 6 * Copyright (c) 2018-2021 Gavin D. Howard and contributors. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions are met: 10 * 11 * * Redistributions of source code must retain the above copyright notice, this 12 * list of conditions and the following disclaimer. 13 * 14 * * Redistributions in binary form must reproduce the above copyright notice, 15 * this list of conditions and the following disclaimer in the documentation 16 * and/or other materials provided with the distribution. 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 19 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 21 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE 22 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 23 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 24 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 25 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 26 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 27 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 28 * POSSIBILITY OF SUCH DAMAGE. 29 * 30 * ***************************************************************************** 31 * 32 * Definitions for program data. 33 * 34 */ 35 36 #ifndef BC_LANG_H 37 #define BC_LANG_H 38 39 #include <stdbool.h> 40 41 #include <status.h> 42 #include <vector.h> 43 #include <num.h> 44 45 /// The instructions for bytecode. 46 typedef enum BcInst { 47 48 #if BC_ENABLED 49 50 /// Postfix increment and decrement. Prefix are translated into 51 /// BC_INST_ONE with either BC_INST_ASSIGN_PLUS or BC_INST_ASSIGN_MINUS. 52 BC_INST_INC = 0, 53 BC_INST_DEC, 54 #endif // BC_ENABLED 55 56 /// Unary negation. 57 BC_INST_NEG, 58 59 /// Boolean not. 60 BC_INST_BOOL_NOT, 61 #if BC_ENABLE_EXTRA_MATH 62 /// Truncation operator. 63 BC_INST_TRUNC, 64 #endif // BC_ENABLE_EXTRA_MATH 65 66 /// These should be self-explanatory. 67 BC_INST_POWER, 68 BC_INST_MULTIPLY, 69 BC_INST_DIVIDE, 70 BC_INST_MODULUS, 71 BC_INST_PLUS, 72 BC_INST_MINUS, 73 74 #if BC_ENABLE_EXTRA_MATH 75 76 /// Places operator. 77 BC_INST_PLACES, 78 79 /// Shift operators. 80 BC_INST_LSHIFT, 81 BC_INST_RSHIFT, 82 #endif // BC_ENABLE_EXTRA_MATH 83 84 /// Comparison operators. 85 BC_INST_REL_EQ, 86 BC_INST_REL_LE, 87 BC_INST_REL_GE, 88 BC_INST_REL_NE, 89 BC_INST_REL_LT, 90 BC_INST_REL_GT, 91 92 /// Boolean or and and. 93 BC_INST_BOOL_OR, 94 BC_INST_BOOL_AND, 95 96 #if BC_ENABLED 97 /// Same as the normal operators, but assigment. So ^=, *=, /=, etc. 98 BC_INST_ASSIGN_POWER, 99 BC_INST_ASSIGN_MULTIPLY, 100 BC_INST_ASSIGN_DIVIDE, 101 BC_INST_ASSIGN_MODULUS, 102 BC_INST_ASSIGN_PLUS, 103 BC_INST_ASSIGN_MINUS, 104 #if BC_ENABLE_EXTRA_MATH 105 /// Places and shift assignment operators. 106 BC_INST_ASSIGN_PLACES, 107 BC_INST_ASSIGN_LSHIFT, 108 BC_INST_ASSIGN_RSHIFT, 109 #endif // BC_ENABLE_EXTRA_MATH 110 111 /// Normal assignment. 112 BC_INST_ASSIGN, 113 114 /// bc and dc detect when the value from an assignment is not necessary. 115 /// For example, a plain assignment statement means the value is never used. 116 /// In those cases, we can get lots of performance back by not even creating 117 /// a copy at all. In fact, it saves a copy, a push onto the results stack, 118 /// a pop from the results stack, and a free. Definitely worth it to detect. 119 BC_INST_ASSIGN_POWER_NO_VAL, 120 BC_INST_ASSIGN_MULTIPLY_NO_VAL, 121 BC_INST_ASSIGN_DIVIDE_NO_VAL, 122 BC_INST_ASSIGN_MODULUS_NO_VAL, 123 BC_INST_ASSIGN_PLUS_NO_VAL, 124 BC_INST_ASSIGN_MINUS_NO_VAL, 125 #if BC_ENABLE_EXTRA_MATH 126 /// Same as above. 127 BC_INST_ASSIGN_PLACES_NO_VAL, 128 BC_INST_ASSIGN_LSHIFT_NO_VAL, 129 BC_INST_ASSIGN_RSHIFT_NO_VAL, 130 #endif // BC_ENABLE_EXTRA_MATH 131 #endif // BC_ENABLED 132 133 /// Normal assignment that pushes no value on the stack. 134 BC_INST_ASSIGN_NO_VAL, 135 136 /// Push a constant onto the results stack. 137 BC_INST_NUM, 138 139 /// Push a variable onto the results stack. 140 BC_INST_VAR, 141 142 /// Push an array element onto the results stack. 143 BC_INST_ARRAY_ELEM, 144 145 /// Push an array onto the results stack. This is different from pushing an 146 /// array *element* onto the results stack; it pushes a reference to the 147 /// whole array. This is needed in bc for function arguments that are 148 /// arrays. It is also needed for returning the length of an array. 149 BC_INST_ARRAY, 150 151 /// Push a zero or a one onto the stack. These are special cased because it 152 /// does help performance, particularly for one since inc/dec operators 153 /// use it. 154 BC_INST_ZERO, 155 BC_INST_ONE, 156 157 #if BC_ENABLED 158 /// Push the last printed value onto the stack. 159 BC_INST_LAST, 160 #endif // BC_ENABLED 161 162 /// Push the value of any of the globals onto the stack. 163 BC_INST_IBASE, 164 BC_INST_OBASE, 165 BC_INST_SCALE, 166 167 #if BC_ENABLE_EXTRA_MATH 168 /// Push the value of the seed global onto the stack. 169 BC_INST_SEED, 170 #endif // BC_ENABLE_EXTRA_MATH 171 172 /// These are builtin functions. 173 BC_INST_LENGTH, 174 BC_INST_SCALE_FUNC, 175 BC_INST_SQRT, 176 BC_INST_ABS, 177 178 #if BC_ENABLE_EXTRA_MATH 179 /// Another builtin function. 180 BC_INST_IRAND, 181 #endif // BC_ENABLE_EXTRA_MATH 182 183 /// Asciify. 184 BC_INST_ASCIIFY, 185 186 /// Another builtin function. 187 BC_INST_READ, 188 189 #if BC_ENABLE_EXTRA_MATH 190 /// Another builtin function. 191 BC_INST_RAND, 192 #endif // BC_ENABLE_EXTRA_MATH 193 194 /// Return the max for the various globals. 195 BC_INST_MAXIBASE, 196 BC_INST_MAXOBASE, 197 BC_INST_MAXSCALE, 198 #if BC_ENABLE_EXTRA_MATH 199 /// Return the max value returned by rand(). 200 BC_INST_MAXRAND, 201 #endif // BC_ENABLE_EXTRA_MATH 202 203 /// This is slightly misnamed versus BC_INST_PRINT_POP. Well, it is in bc. 204 /// dc uses this instruction to print, but not pop. That's valid in dc. 205 /// However, in bc, it is *never* valid to print without popping. In bc, 206 /// BC_INST_PRINT_POP is used to indicate when a string should be printed 207 /// because of a print statement or whether it should be printed raw. The 208 /// reason for this is because a print statement handles escaped characters. 209 /// So BC_INST_PRINT_POP is for printing a string from a print statement, 210 /// BC_INST_PRINT_STR is for printing a string by itself. 211 /// 212 /// In dc, BC_INST_PRINT_POP prints and pops, and BC_INST_PRINT just prints. 213 /// 214 /// Oh, and BC_INST_STR pushes a string onto the results stack. 215 BC_INST_PRINT, 216 BC_INST_PRINT_POP, 217 BC_INST_STR, 218 #if BC_ENABLED 219 BC_INST_PRINT_STR, 220 221 /// Jumps unconditionally. 222 BC_INST_JUMP, 223 224 /// Jumps if the top of the results stack is zero (condition failed). It 225 /// turns out that we only want to jump when conditions fail to "skip" code. 226 BC_INST_JUMP_ZERO, 227 228 /// Call a function. 229 BC_INST_CALL, 230 231 /// Return the top of the stack to the caller. 232 BC_INST_RET, 233 234 /// Return 0 to the caller. 235 BC_INST_RET0, 236 237 /// Special return instruction for void functions. 238 BC_INST_RET_VOID, 239 240 /// Special halt instruction. 241 BC_INST_HALT, 242 #endif // BC_ENABLED 243 244 /// Pop an item off of the results stack. 245 BC_INST_POP, 246 247 /// Swaps the top two items on the results stack. 248 BC_INST_SWAP, 249 250 /// Modular exponentiation. 251 BC_INST_MODEXP, 252 253 /// Do divide and modulus at the same time. 254 BC_INST_DIVMOD, 255 256 /// Turns a number into a string and prints it. 257 BC_INST_PRINT_STREAM, 258 259 #if DC_ENABLED 260 261 /// dc's return; it pops an executing string off of the stack. 262 BC_INST_POP_EXEC, 263 264 /// Unconditionally execute a string. 265 BC_INST_EXECUTE, 266 267 /// Conditionally execute a string. 268 BC_INST_EXEC_COND, 269 270 /// Prints each item on the results stack, separated by newlines. 271 BC_INST_PRINT_STACK, 272 273 /// Pops everything off of the results stack. 274 BC_INST_CLEAR_STACK, 275 276 /// Pushes the current length of a register stack onto the results stack. 277 BC_INST_REG_STACK_LEN, 278 279 /// Pushes the current length of the results stack onto the results stack. 280 BC_INST_STACK_LEN, 281 282 /// Pushes a copy of the item on the top of the results stack onto the 283 /// results stack. 284 BC_INST_DUPLICATE, 285 286 /// Copies the value in a register and pushes the copy onto the results 287 /// stack. 288 BC_INST_LOAD, 289 290 /// Pops an item off of a register stack and pushes it onto the results 291 /// stack. 292 BC_INST_PUSH_VAR, 293 294 /// Pops an item off of the results stack and pushes it onto a register's 295 /// stack. 296 BC_INST_PUSH_TO_VAR, 297 298 /// Quit. 299 BC_INST_QUIT, 300 301 /// Quit executing some number of strings. 302 BC_INST_NQUIT, 303 304 /// Push the depth of the execution stack onto the stack. 305 BC_INST_EXEC_STACK_LEN, 306 307 #endif // DC_ENABLED 308 309 /// Invalid instruction. 310 BC_INST_INVALID, 311 312 } BcInst; 313 314 /// Used by maps to identify where items are in the array. 315 typedef struct BcId { 316 317 /// The name of the item. 318 char *name; 319 320 /// The index into the array where the item is. 321 size_t idx; 322 323 } BcId; 324 325 /// The location of a var, array, or array element. 326 typedef struct BcLoc { 327 328 /// The index of the var or array. 329 size_t loc; 330 331 /// The index of the array element. Only used for array elements. 332 size_t idx; 333 334 } BcLoc; 335 336 /// An entry for a constant. 337 typedef struct BcConst { 338 339 /// The original string as parsed from the source code. 340 char *val; 341 342 /// The last base that the constant was parsed in. 343 BcBigDig base; 344 345 /// The parsed constant. 346 BcNum num; 347 348 } BcConst; 349 350 /// A function. This is also used in dc, not just bc. The reason is that strings 351 /// are executed in dc, and they are converted to functions in order to be 352 /// executed. 353 typedef struct BcFunc { 354 355 /// The bytecode instructions. 356 BcVec code; 357 358 #if BC_ENABLED 359 360 /// The labels. This is a vector of indices. The index is the index into 361 /// the bytecode vector where the label is. 362 BcVec labels; 363 364 /// The autos for the function. The first items are the parameters, and the 365 /// arguments to the parameters must match the types in this vector. 366 BcVec autos; 367 368 /// The number of parameters the function takes. 369 size_t nparams; 370 371 #endif // BC_ENABLED 372 373 /// The strings encountered in the function. 374 BcVec strs; 375 376 /// The constants encountered in the function. 377 BcVec consts; 378 379 /// The function's name. 380 const char *name; 381 382 #if BC_ENABLED 383 /// True if the function is a void function. 384 bool voidfn; 385 #endif // BC_ENABLED 386 387 } BcFunc; 388 389 /// Types of results that can be pushed onto the results stack. 390 typedef enum BcResultType { 391 392 /// Result is a variable. 393 BC_RESULT_VAR, 394 395 /// Result is an array element. 396 BC_RESULT_ARRAY_ELEM, 397 398 /// Result is an array. This is only allowed for function arguments or 399 /// returning the length of the array. 400 BC_RESULT_ARRAY, 401 402 /// Result is a string. 403 BC_RESULT_STR, 404 405 /// Result is a temporary. This is used for the result of almost all 406 /// expressions. 407 BC_RESULT_TEMP, 408 409 /// Special casing the two below gave performance improvements. 410 411 /// Result is a 0. 412 BC_RESULT_ZERO, 413 414 /// Result is a 1. Useful for inc/dec operators. 415 BC_RESULT_ONE, 416 417 #if BC_ENABLED 418 419 /// Result is the special "last" variable. 420 BC_RESULT_LAST, 421 422 /// Result is the return value of a void function. 423 BC_RESULT_VOID, 424 #endif // BC_ENABLED 425 426 /// Result is the value of ibase. 427 BC_RESULT_IBASE, 428 429 /// Result is the value of obase. 430 BC_RESULT_OBASE, 431 432 /// Result is the value of scale. 433 BC_RESULT_SCALE, 434 435 #if BC_ENABLE_EXTRA_MATH 436 437 /// Result is the value of seed. 438 BC_RESULT_SEED, 439 440 #endif // BC_ENABLE_EXTRA_MATH 441 442 } BcResultType; 443 444 /// A union to store data for various result types. 445 typedef union BcResultData { 446 447 /// A number. Strings are stored here too; they are numbers with 448 /// cap == 0 && num == NULL. The string's index into the strings vector is 449 /// stored in the scale field. But this is only used for strings stored in 450 /// variables. 451 BcNum n; 452 453 /// A vector. 454 BcVec v; 455 456 /// A variable, array, or array element reference. This could also be a 457 /// string if a string is not stored in a variable (dc only). 458 BcLoc loc; 459 460 } BcResultData; 461 462 /// A tagged union for results. 463 typedef struct BcResult { 464 465 /// The tag. The type of the result. 466 BcResultType t; 467 468 /// The data. The data for the result. 469 BcResultData d; 470 471 } BcResult; 472 473 /// An instruction pointer. This is how bc knows where in the bytecode vector, 474 /// and which function, the current execution is. 475 typedef struct BcInstPtr { 476 477 /// The index of the currently executing function in the fns vector. 478 size_t func; 479 480 /// The index into the bytecode vector of the *next* instruction. 481 size_t idx; 482 483 /// The length of the results vector when this function started executing. 484 /// This is mostly used for bc where functions should not affect the results 485 /// of their callers. 486 size_t len; 487 488 } BcInstPtr; 489 490 /// Types of identifiers. 491 typedef enum BcType { 492 493 /// Variable. 494 BC_TYPE_VAR, 495 496 /// Array. 497 BC_TYPE_ARRAY, 498 499 #if BC_ENABLED 500 501 /// Array reference. 502 BC_TYPE_REF, 503 504 #endif // BC_ENABLED 505 506 } BcType; 507 508 #if BC_ENABLED 509 /// An auto variable in bc. 510 typedef struct BcAuto { 511 512 /// The index of the variable in the vars or arrs vectors. 513 size_t idx; 514 515 /// The type of the variable. 516 BcType type; 517 518 } BcAuto; 519 #endif // BC_ENABLED 520 521 /// Forward declaration. 522 struct BcProgram; 523 524 /** 525 * Initializes a function. 526 * @param f The function to initialize. 527 * @param name The name of the function. The string is assumed to be owned by 528 * some other entity. 529 */ 530 void bc_func_init(BcFunc *f, const char* name); 531 532 /** 533 * Inserts an auto into the function. 534 * @param f The function to insert into. 535 * @param p The program. This is to search for the variable or array name. 536 * @param name The name of the auto to insert. 537 * @param type The type of the auto. 538 * @param line The line in the source code where the insert happened. This is 539 * solely for error reporting. 540 */ 541 void bc_func_insert(BcFunc *f, struct BcProgram* p, char* name, 542 BcType type, size_t line); 543 544 /** 545 * Resets a function in preparation for it to be reused. This can happen in bc 546 * because it is a dynamic language and functions can be redefined. 547 * @param f The functio to reset. 548 */ 549 void bc_func_reset(BcFunc *f); 550 551 #ifndef NDEBUG 552 /** 553 * Frees a function. This is a destructor. This is only used in debug builds 554 * because all functions are freed at exit. We free them in debug builds to 555 * check for memory leaks. 556 * @param func The function to free as a void pointer. 557 */ 558 void bc_func_free(void *func); 559 #endif // NDEBUG 560 561 /** 562 * Initializes an array, which is the array type in bc and dc source code. Since 563 * variables and arrays are both arrays (see the development manual, 564 * manuals/development.md#execution, for more information), the @a nums 565 * parameter tells bc whether to initialize an array of numbers or an array of 566 * arrays of numbers. If the latter, it does a recursive call with nums set to 567 * true. 568 * @param a The array to initialize. 569 * @param nums True if the array should be for numbers, false if it should be 570 * for vectors. 571 */ 572 void bc_array_init(BcVec *a, bool nums); 573 574 /** 575 * Copies an array to another array. This is used to do pass arrays to functions 576 * that do not take references to arrays. The arrays are passed entirely by 577 * value, which means that they need to be copied. 578 * @param d The destination array. 579 * @param s The source array. 580 */ 581 void bc_array_copy(BcVec *d, const BcVec *s); 582 583 /** 584 * Frees a string stored in a function. This is a destructor. 585 * @param string The string to free as a void pointer. 586 */ 587 void bc_string_free(void *string); 588 589 /** 590 * Frees a constant stored in a function. This is a destructor. 591 * @param constant The constant to free as a void pointer. 592 */ 593 void bc_const_free(void *constant); 594 595 /** 596 * Clears a result. It sets the type to BC_RESULT_TEMP and clears the union by 597 * clearing the BcNum in the union. This is to ensure that bc does not use 598 * uninitialized data. 599 * @param r The result to clear. 600 */ 601 void bc_result_clear(BcResult *r); 602 603 /** 604 * Copies a result into another. This is done for things like duplicating the 605 * top of the results stack or copying the result of an assignment to put back 606 * on the results stack. 607 * @param d The destination result. 608 * @param src The source result. 609 */ 610 void bc_result_copy(BcResult *d, BcResult *src); 611 612 /** 613 * Frees a result. This is a destructor. 614 * @param result The result to free as a void pointer. 615 */ 616 void bc_result_free(void *result); 617 618 /** 619 * Expands an array to @a len. This can happen because in bc, you do not have to 620 * explicitly initialize elements of an array. If you access an element that is 621 * not initialized, the array is expanded to fit it, and all missing elements 622 * are initialized to 0 if they are numbers, or arrays with one element of 0. 623 * This function does that expansion. 624 * @param a The array to expand. 625 * @param len The length to expand to. 626 */ 627 void bc_array_expand(BcVec *a, size_t len); 628 629 /** 630 * Compare two BcId's and return the result. Since they are just comparing the 631 * names in the BcId, I return the result from strcmp() exactly. This is used by 632 * maps in their binary search. 633 * @param e1 The first id. 634 * @param e2 The second id. 635 * @return The result of strcmp() on the BcId's names. 636 */ 637 int bc_id_cmp(const BcId *e1, const BcId *e2); 638 639 #if BC_ENABLED 640 641 /** 642 * Returns non-zero if the bytecode instruction i is an assignment instruction. 643 * @param i The instruction to test. 644 * @return Non-zero if i is an assignment instruction, zero otherwise. 645 */ 646 #define BC_INST_IS_ASSIGN(i) \ 647 ((i) == BC_INST_ASSIGN || (i) == BC_INST_ASSIGN_NO_VAL) 648 649 /** 650 * Returns true if the bytecode instruction @a i requires the value to be 651 * returned for use. 652 * @param i The instruction to test. 653 * @return True if @a i requires the value to be returned for use, false 654 * otherwise. 655 */ 656 #define BC_INST_USE_VAL(i) ((i) <= BC_INST_ASSIGN) 657 658 #else // BC_ENABLED 659 660 /** 661 * Returns non-zero if the bytecode instruction i is an assignment instruction. 662 * @param i The instruction to test. 663 * @return Non-zero if i is an assignment instruction, zero otherwise. 664 */ 665 #define BC_INST_IS_ASSIGN(i) ((i) == BC_INST_ASSIGN_NO_VAL) 666 667 /** 668 * Returns true if the bytecode instruction @a i requires the value to be 669 * returned for use. 670 * @param i The instruction to test. 671 * @return True if @a i requires the value to be returned for use, false 672 * otherwise. 673 */ 674 #define BC_INST_USE_VAL(i) (false) 675 676 #endif // BC_ENABLED 677 678 #if BC_DEBUG_CODE 679 /// Reference to string names for all of the instructions. For debugging. 680 extern const char* bc_inst_names[]; 681 #endif // BC_DEBUG_CODE 682 683 /// References to the names of the main and read functions. 684 extern const char bc_func_main[]; 685 extern const char bc_func_read[]; 686 687 #endif // BC_LANG_H 688