xref: /freebsd/contrib/bc/include/lang.h (revision 6549718b70f0e660a15685369afb4f9caf2215ce)
1 /*
2  * *****************************************************************************
3  *
4  * SPDX-License-Identifier: BSD-2-Clause
5  *
6  * Copyright (c) 2018-2024 Gavin D. Howard and contributors.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions are met:
10  *
11  * * Redistributions of source code must retain the above copyright notice, this
12  *   list of conditions and the following disclaimer.
13  *
14  * * Redistributions in binary form must reproduce the above copyright notice,
15  *   this list of conditions and the following disclaimer in the documentation
16  *   and/or other materials provided with the distribution.
17  *
18  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
22  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
23  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
24  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
25  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
26  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
27  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
28  * POSSIBILITY OF SUCH DAMAGE.
29  *
30  * *****************************************************************************
31  *
32  * Definitions for program data.
33  *
34  */
35 
36 #ifndef BC_LANG_H
37 #define BC_LANG_H
38 
39 #include <stdbool.h>
40 
41 // These have to come first to silence a warning on BC_C11 below.
42 #include <status.h>
43 #include <vector.h>
44 #include <num.h>
45 
46 #if BC_C11
47 #include <assert.h>
48 #endif // BC_C11
49 
50 /// The instructions for bytecode.
51 typedef enum BcInst
52 {
53 #if BC_ENABLED
54 	/// Postfix increment and decrement. Prefix are translated into
55 	/// BC_INST_ONE with either BC_INST_ASSIGN_PLUS or BC_INST_ASSIGN_MINUS.
56 	BC_INST_INC = 0,
57 	BC_INST_DEC,
58 #endif // BC_ENABLED
59 
60 	/// Unary negation.
61 	BC_INST_NEG,
62 
63 	/// Boolean not.
64 	BC_INST_BOOL_NOT,
65 
66 #if BC_ENABLE_EXTRA_MATH
67 	/// Truncation operator.
68 	BC_INST_TRUNC,
69 #endif // BC_ENABLE_EXTRA_MATH
70 
71 	/// These should be self-explanatory.
72 	BC_INST_POWER,
73 	BC_INST_MULTIPLY,
74 	BC_INST_DIVIDE,
75 	BC_INST_MODULUS,
76 	BC_INST_PLUS,
77 	BC_INST_MINUS,
78 
79 #if BC_ENABLE_EXTRA_MATH
80 	/// Places operator.
81 	BC_INST_PLACES,
82 
83 	/// Shift operators.
84 	BC_INST_LSHIFT,
85 	BC_INST_RSHIFT,
86 #endif // BC_ENABLE_EXTRA_MATH
87 
88 	/// Comparison operators.
89 	BC_INST_REL_EQ,
90 	BC_INST_REL_LE,
91 	BC_INST_REL_GE,
92 	BC_INST_REL_NE,
93 	BC_INST_REL_LT,
94 	BC_INST_REL_GT,
95 
96 	/// Boolean or and and.
97 	BC_INST_BOOL_OR,
98 	BC_INST_BOOL_AND,
99 
100 #if BC_ENABLED
101 	/// Same as the normal operators, but assigment. So ^=, *=, /=, etc.
102 	BC_INST_ASSIGN_POWER,
103 	BC_INST_ASSIGN_MULTIPLY,
104 	BC_INST_ASSIGN_DIVIDE,
105 	BC_INST_ASSIGN_MODULUS,
106 	BC_INST_ASSIGN_PLUS,
107 	BC_INST_ASSIGN_MINUS,
108 #if BC_ENABLE_EXTRA_MATH
109 	/// Places and shift assignment operators.
110 	BC_INST_ASSIGN_PLACES,
111 	BC_INST_ASSIGN_LSHIFT,
112 	BC_INST_ASSIGN_RSHIFT,
113 #endif // BC_ENABLE_EXTRA_MATH
114 
115 	/// Normal assignment.
116 	BC_INST_ASSIGN,
117 
118 	/// bc and dc detect when the value from an assignment is not necessary.
119 	/// For example, a plain assignment statement means the value is never used.
120 	/// In those cases, we can get lots of performance back by not even creating
121 	/// a copy at all. In fact, it saves a copy, a push onto the results stack,
122 	/// a pop from the results stack, and a free. Definitely worth it to detect.
123 	BC_INST_ASSIGN_POWER_NO_VAL,
124 	BC_INST_ASSIGN_MULTIPLY_NO_VAL,
125 	BC_INST_ASSIGN_DIVIDE_NO_VAL,
126 	BC_INST_ASSIGN_MODULUS_NO_VAL,
127 	BC_INST_ASSIGN_PLUS_NO_VAL,
128 	BC_INST_ASSIGN_MINUS_NO_VAL,
129 #if BC_ENABLE_EXTRA_MATH
130 	/// Same as above.
131 	BC_INST_ASSIGN_PLACES_NO_VAL,
132 	BC_INST_ASSIGN_LSHIFT_NO_VAL,
133 	BC_INST_ASSIGN_RSHIFT_NO_VAL,
134 #endif // BC_ENABLE_EXTRA_MATH
135 #endif // BC_ENABLED
136 
137 	/// Normal assignment that pushes no value on the stack.
138 	BC_INST_ASSIGN_NO_VAL,
139 
140 	/// Push a constant onto the results stack.
141 	BC_INST_NUM,
142 
143 	/// Push a variable onto the results stack.
144 	BC_INST_VAR,
145 
146 	/// Push an array element onto the results stack.
147 	BC_INST_ARRAY_ELEM,
148 
149 	/// Push an array onto the results stack. This is different from pushing an
150 	/// array *element* onto the results stack; it pushes a reference to the
151 	/// whole array. This is needed in bc for function arguments that are
152 	/// arrays. It is also needed for returning the length of an array.
153 	BC_INST_ARRAY,
154 
155 	/// Push a zero or a one onto the stack. These are special cased because it
156 	/// does help performance, particularly for one since inc/dec operators
157 	/// use it.
158 	BC_INST_ZERO,
159 	BC_INST_ONE,
160 
161 #if BC_ENABLED
162 	/// Push the last printed value onto the stack.
163 	BC_INST_LAST,
164 #endif // BC_ENABLED
165 
166 	/// Push the value of any of the globals onto the stack.
167 	BC_INST_IBASE,
168 	BC_INST_OBASE,
169 	BC_INST_SCALE,
170 
171 #if BC_ENABLE_EXTRA_MATH
172 	/// Push the value of the seed global onto the stack.
173 	BC_INST_SEED,
174 #endif // BC_ENABLE_EXTRA_MATH
175 
176 	/// These are builtin functions.
177 	BC_INST_LENGTH,
178 	BC_INST_SCALE_FUNC,
179 	BC_INST_SQRT,
180 	BC_INST_ABS,
181 	BC_INST_IS_NUMBER,
182 	BC_INST_IS_STRING,
183 
184 #if BC_ENABLE_EXTRA_MATH
185 	/// Another builtin function.
186 	BC_INST_IRAND,
187 #endif // BC_ENABLE_EXTRA_MATH
188 
189 	/// Asciify.
190 	BC_INST_ASCIIFY,
191 
192 	/// Another builtin function.
193 	BC_INST_READ,
194 
195 #if BC_ENABLE_EXTRA_MATH
196 	/// Another builtin function.
197 	BC_INST_RAND,
198 #endif // BC_ENABLE_EXTRA_MATH
199 
200 	/// Return the max for the various globals.
201 	BC_INST_MAXIBASE,
202 	BC_INST_MAXOBASE,
203 	BC_INST_MAXSCALE,
204 #if BC_ENABLE_EXTRA_MATH
205 	/// Return the max value returned by rand().
206 	BC_INST_MAXRAND,
207 #endif // BC_ENABLE_EXTRA_MATH
208 
209 	/// bc line_length() builtin function.
210 	BC_INST_LINE_LENGTH,
211 
212 #if BC_ENABLED
213 
214 	/// bc global_stacks() builtin function.
215 	BC_INST_GLOBAL_STACKS,
216 
217 #endif // BC_ENABLED
218 
219 	/// bc leading_zero() builtin function.
220 	BC_INST_LEADING_ZERO,
221 
222 	/// This is slightly misnamed versus BC_INST_PRINT_POP. Well, it is in bc.
223 	/// dc uses this instruction to print, but not pop. That's valid in dc.
224 	/// However, in bc, it is *never* valid to print without popping. In bc,
225 	/// BC_INST_PRINT_POP is used to indicate when a string should be printed
226 	/// because of a print statement or whether it should be printed raw. The
227 	/// reason for this is because a print statement handles escaped characters.
228 	/// So BC_INST_PRINT_POP is for printing a string from a print statement,
229 	/// BC_INST_PRINT_STR is for printing a string by itself.
230 	///
231 	/// In dc, BC_INST_PRINT_POP prints and pops, and BC_INST_PRINT just prints.
232 	///
233 	/// Oh, and BC_INST_STR pushes a string onto the results stack.
234 	BC_INST_PRINT,
235 	BC_INST_PRINT_POP,
236 	BC_INST_STR,
237 #if BC_ENABLED
238 	BC_INST_PRINT_STR,
239 
240 	/// Jumps unconditionally.
241 	BC_INST_JUMP,
242 
243 	/// Jumps if the top of the results stack is zero (condition failed). It
244 	/// turns out that we only want to jump when conditions fail to "skip" code.
245 	BC_INST_JUMP_ZERO,
246 
247 	/// Call a function.
248 	BC_INST_CALL,
249 
250 	/// Return the top of the stack to the caller.
251 	BC_INST_RET,
252 
253 	/// Return 0 to the caller.
254 	BC_INST_RET0,
255 
256 	/// Special return instruction for void functions.
257 	BC_INST_RET_VOID,
258 
259 	/// Special halt instruction.
260 	BC_INST_HALT,
261 #endif // BC_ENABLED
262 
263 	/// Pop an item off of the results stack.
264 	BC_INST_POP,
265 
266 	/// Swaps the top two items on the results stack.
267 	BC_INST_SWAP,
268 
269 	/// Modular exponentiation.
270 	BC_INST_MODEXP,
271 
272 	/// Do divide and modulus at the same time.
273 	BC_INST_DIVMOD,
274 
275 	/// Turns a number into a string and prints it.
276 	BC_INST_PRINT_STREAM,
277 
278 #if DC_ENABLED
279 
280 	/// dc extended registers command.
281 	BC_INST_EXTENDED_REGISTERS,
282 
283 	/// dc's return; it pops an executing string off of the stack.
284 	BC_INST_POP_EXEC,
285 
286 	/// Unconditionally execute a string.
287 	BC_INST_EXECUTE,
288 
289 	/// Conditionally execute a string.
290 	BC_INST_EXEC_COND,
291 
292 	/// Prints each item on the results stack, separated by newlines.
293 	BC_INST_PRINT_STACK,
294 
295 	/// Pops everything off of the results stack.
296 	BC_INST_CLEAR_STACK,
297 
298 	/// Pushes the current length of a register stack onto the results stack.
299 	BC_INST_REG_STACK_LEN,
300 
301 	/// Pushes the current length of the results stack onto the results stack.
302 	BC_INST_STACK_LEN,
303 
304 	/// Pushes a copy of the item on the top of the results stack onto the
305 	/// results stack.
306 	BC_INST_DUPLICATE,
307 
308 	/// Copies the value in a register and pushes the copy onto the results
309 	/// stack.
310 	BC_INST_LOAD,
311 
312 	/// Pops an item off of a register stack and pushes it onto the results
313 	/// stack.
314 	BC_INST_PUSH_VAR,
315 
316 	/// Pops an item off of the results stack and pushes it onto a register's
317 	/// stack.
318 	BC_INST_PUSH_TO_VAR,
319 
320 	/// Quit.
321 	BC_INST_QUIT,
322 
323 	/// Quit executing some number of strings.
324 	BC_INST_NQUIT,
325 
326 	/// Push the depth of the execution stack onto the stack.
327 	BC_INST_EXEC_STACK_LEN,
328 
329 #endif // DC_ENABLED
330 
331 	/// Invalid instruction.
332 	BC_INST_INVALID,
333 
334 } BcInst;
335 
336 #if BC_C11
337 _Static_assert(BC_INST_INVALID <= UCHAR_MAX,
338                "Too many instructions to fit into an unsigned char");
339 #endif // BC_C11
340 
341 /// Used by maps to identify where items are in the array.
342 typedef struct BcId
343 {
344 	/// The name of the item.
345 	char* name;
346 
347 	/// The index into the array where the item is.
348 	size_t idx;
349 
350 } BcId;
351 
352 /// The location of a var, array, or array element.
353 typedef struct BcLoc
354 {
355 	/// The index of the var or array.
356 	size_t loc;
357 
358 	/// The index of the array or variable in the array stack. This is to
359 	/// prevent a bug with getting the wrong array element or variable after a
360 	/// function call. See the tests/bc/scripts/array.bc test for the array
361 	/// case; the variable case is in various variable tests.
362 	size_t stack_idx;
363 
364 	/// The index of the array element. Only used for array elements.
365 	size_t idx;
366 
367 } BcLoc;
368 
369 /// An entry for a constant.
370 typedef struct BcConst
371 {
372 	/// The original string as parsed from the source code.
373 	char* val;
374 
375 	/// The last base that the constant was parsed in.
376 	BcBigDig base;
377 
378 	/// The parsed constant.
379 	BcNum num;
380 
381 } BcConst;
382 
383 /// A function. This is also used in dc, not just bc. The reason is that strings
384 /// are executed in dc, and they are converted to functions in order to be
385 /// executed.
386 typedef struct BcFunc
387 {
388 	/// The bytecode instructions.
389 	BcVec code;
390 
391 #if BC_ENABLED
392 
393 	/// The labels. This is a vector of indices. The index is the index into
394 	/// the bytecode vector where the label is.
395 	BcVec labels;
396 
397 	/// The autos for the function. The first items are the parameters, and the
398 	/// arguments to the parameters must match the types in this vector.
399 	BcVec autos;
400 
401 	/// The number of parameters the function takes.
402 	size_t nparams;
403 
404 #endif // BC_ENABLED
405 
406 	/// The function's name.
407 	const char* name;
408 
409 #if BC_ENABLED
410 	/// True if the function is a void function.
411 	bool voidfn;
412 #endif // BC_ENABLED
413 
414 } BcFunc;
415 
416 /// Types of results that can be pushed onto the results stack.
417 typedef enum BcResultType
418 {
419 	/// Result is a variable.
420 	BC_RESULT_VAR,
421 
422 	/// Result is an array element.
423 	BC_RESULT_ARRAY_ELEM,
424 
425 	/// Result is an array. This is only allowed for function arguments or
426 	/// returning the length of the array.
427 	BC_RESULT_ARRAY,
428 
429 	/// Result is a string.
430 	BC_RESULT_STR,
431 
432 	/// Result is a temporary. This is used for the result of almost all
433 	/// expressions.
434 	BC_RESULT_TEMP,
435 
436 	/// Special casing the two below gave performance improvements.
437 
438 	/// Result is a 0.
439 	BC_RESULT_ZERO,
440 
441 	/// Result is a 1. Useful for inc/dec operators.
442 	BC_RESULT_ONE,
443 
444 #if BC_ENABLED
445 
446 	/// Result is the special "last" variable.
447 	BC_RESULT_LAST,
448 
449 	/// Result is the return value of a void function.
450 	BC_RESULT_VOID,
451 #endif // BC_ENABLED
452 
453 	/// Result is the value of ibase.
454 	BC_RESULT_IBASE,
455 
456 	/// Result is the value of obase.
457 	BC_RESULT_OBASE,
458 
459 	/// Result is the value of scale.
460 	BC_RESULT_SCALE,
461 
462 #if BC_ENABLE_EXTRA_MATH
463 
464 	/// Result is the value of seed.
465 	BC_RESULT_SEED,
466 
467 #endif // BC_ENABLE_EXTRA_MATH
468 
469 } BcResultType;
470 
471 /// A union to store data for various result types.
472 typedef union BcResultData
473 {
474 	/// A number. Strings are stored here too; they are numbers with
475 	/// cap == 0 && num == NULL. The string's index into the strings vector is
476 	/// stored in the scale field. But this is only used for strings stored in
477 	/// variables.
478 	BcNum n;
479 
480 	/// A vector.
481 	BcVec v;
482 
483 	/// A variable, array, or array element reference. This could also be a
484 	/// string if a string is not stored in a variable (dc only).
485 	BcLoc loc;
486 
487 } BcResultData;
488 
489 /// A tagged union for results.
490 typedef struct BcResult
491 {
492 	/// The tag. The type of the result.
493 	BcResultType t;
494 
495 	/// The data. The data for the result.
496 	BcResultData d;
497 
498 } BcResult;
499 
500 /// An instruction pointer. This is how bc knows where in the bytecode vector,
501 /// and which function, the current execution is.
502 typedef struct BcInstPtr
503 {
504 	/// The index of the currently executing function in the fns vector.
505 	size_t func;
506 
507 	/// The index into the bytecode vector of the *next* instruction.
508 	size_t idx;
509 
510 	/// The length of the results vector when this function started executing.
511 	/// This is mostly used for bc where functions should not affect the results
512 	/// of their callers.
513 	size_t len;
514 
515 } BcInstPtr;
516 
517 /// Types of identifiers.
518 typedef enum BcType
519 {
520 	/// Variable.
521 	BC_TYPE_VAR,
522 
523 	/// Array.
524 	BC_TYPE_ARRAY,
525 
526 #if BC_ENABLED
527 
528 	/// Array reference.
529 	BC_TYPE_REF,
530 
531 #endif // BC_ENABLED
532 
533 } BcType;
534 
535 #if BC_ENABLED
536 /// An auto variable in bc.
537 typedef struct BcAuto
538 {
539 	/// The index of the variable in the vars or arrs vectors.
540 	size_t idx;
541 
542 	/// The type of the variable.
543 	BcType type;
544 
545 } BcAuto;
546 #endif // BC_ENABLED
547 
548 /// Forward declaration.
549 struct BcProgram;
550 
551 /**
552  * Initializes a function.
553  * @param f     The function to initialize.
554  * @param name  The name of the function. The string is assumed to be owned by
555  *              some other entity.
556  */
557 void
558 bc_func_init(BcFunc* f, const char* name);
559 
560 /**
561  * Inserts an auto into the function.
562  * @param f     The function to insert into.
563  * @param p     The program. This is to search for the variable or array name.
564  * @param name  The name of the auto to insert.
565  * @param type  The type of the auto.
566  * @param line  The line in the source code where the insert happened. This is
567  *              solely for error reporting.
568  */
569 void
570 bc_func_insert(BcFunc* f, struct BcProgram* p, char* name, BcType type,
571                size_t line);
572 
573 /**
574  * Resets a function in preparation for it to be reused. This can happen in bc
575  * because it is a dynamic language and functions can be redefined.
576  * @param f  The functio to reset.
577  */
578 void
579 bc_func_reset(BcFunc* f);
580 
581 #if BC_DEBUG
582 /**
583  * Frees a function. This is a destructor. This is only used in debug builds
584  * because all functions are freed at exit. We free them in debug builds to
585  * check for memory leaks.
586  * @param func  The function to free as a void pointer.
587  */
588 void
589 bc_func_free(void* func);
590 #endif // BC_DEBUG
591 
592 /**
593  * Initializes an array, which is the array type in bc and dc source code. Since
594  * variables and arrays are both arrays (see the development manual,
595  * manuals/development.md#execution, for more information), the @a nums
596  * parameter tells bc whether to initialize an array of numbers or an array of
597  * arrays of numbers. If the latter, it does a recursive call with nums set to
598  * true.
599  * @param a     The array to initialize.
600  * @param nums  True if the array should be for numbers, false if it should be
601  *              for vectors.
602  */
603 void
604 bc_array_init(BcVec* a, bool nums);
605 
606 /**
607  * Copies an array to another array. This is used to do pass arrays to functions
608  * that do not take references to arrays. The arrays are passed entirely by
609  * value, which means that they need to be copied.
610  * @param d  The destination array.
611  * @param s  The source array.
612  */
613 void
614 bc_array_copy(BcVec* d, const BcVec* s);
615 
616 /**
617  * Frees a string stored in a function. This is a destructor.
618  * @param string  The string to free as a void pointer.
619  */
620 void
621 bc_string_free(void* string);
622 
623 /**
624  * Frees a constant stored in a function. This is a destructor.
625  * @param constant  The constant to free as a void pointer.
626  */
627 void
628 bc_const_free(void* constant);
629 
630 /**
631  * Clears a result. It sets the type to BC_RESULT_TEMP and clears the union by
632  * clearing the BcNum in the union. This is to ensure that bc does not use
633  * uninitialized data.
634  * @param r  The result to clear.
635  */
636 void
637 bc_result_clear(BcResult* r);
638 
639 /**
640  * Copies a result into another. This is done for things like duplicating the
641  * top of the results stack or copying the result of an assignment to put back
642  * on the results stack.
643  * @param d    The destination result.
644  * @param src  The source result.
645  */
646 void
647 bc_result_copy(BcResult* d, BcResult* src);
648 
649 /**
650  * Frees a result. This is a destructor.
651  * @param result  The result to free as a void pointer.
652  */
653 void
654 bc_result_free(void* result);
655 
656 /**
657  * Expands an array to @a len. This can happen because in bc, you do not have to
658  * explicitly initialize elements of an array. If you access an element that is
659  * not initialized, the array is expanded to fit it, and all missing elements
660  * are initialized to 0 if they are numbers, or arrays with one element of 0.
661  * This function does that expansion.
662  * @param a    The array to expand.
663  * @param len  The length to expand to.
664  */
665 void
666 bc_array_expand(BcVec* a, size_t len);
667 
668 #if BC_ENABLED
669 
670 /**
671  * Returns non-zero if the bytecode instruction i is an assignment instruction.
672  * @param i  The instruction to test.
673  * @return   Non-zero if i is an assignment instruction, zero otherwise.
674  */
675 #define BC_INST_IS_ASSIGN(i) \
676 	((i) == BC_INST_ASSIGN || (i) == BC_INST_ASSIGN_NO_VAL)
677 
678 /**
679  * Returns true if the bytecode instruction @a i requires the value to be
680  * returned for use.
681  * @param i  The instruction to test.
682  * @return   True if @a i requires the value to be returned for use, false
683  *           otherwise.
684  */
685 #define BC_INST_USE_VAL(i) ((i) <= BC_INST_ASSIGN)
686 
687 #else // BC_ENABLED
688 
689 /**
690  * Returns non-zero if the bytecode instruction i is an assignment instruction.
691  * @param i  The instruction to test.
692  * @return   Non-zero if i is an assignment instruction, zero otherwise.
693  */
694 #define BC_INST_IS_ASSIGN(i) ((i) == BC_INST_ASSIGN_NO_VAL)
695 
696 /**
697  * Returns true if the bytecode instruction @a i requires the value to be
698  * returned for use.
699  * @param i  The instruction to test.
700  * @return   True if @a i requires the value to be returned for use, false
701  *           otherwise.
702  */
703 #define BC_INST_USE_VAL(i) (false)
704 
705 #endif // BC_ENABLED
706 
707 #if BC_DEBUG_CODE
708 /// Reference to string names for all of the instructions. For debugging.
709 extern const char* bc_inst_names[];
710 #endif // BC_DEBUG_CODE
711 
712 /// References to the names of the main and read functions.
713 extern const char bc_func_main[];
714 extern const char bc_func_read[];
715 
716 #endif // BC_LANG_H
717