xref: /freebsd/contrib/bc/include/lang.h (revision 13ec1e3155c7e9bf037b12af186351b7fa9b9450)
1 /*
2  * *****************************************************************************
3  *
4  * SPDX-License-Identifier: BSD-2-Clause
5  *
6  * Copyright (c) 2018-2021 Gavin D. Howard and contributors.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions are met:
10  *
11  * * Redistributions of source code must retain the above copyright notice, this
12  *   list of conditions and the following disclaimer.
13  *
14  * * Redistributions in binary form must reproduce the above copyright notice,
15  *   this list of conditions and the following disclaimer in the documentation
16  *   and/or other materials provided with the distribution.
17  *
18  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
22  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
23  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
24  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
25  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
26  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
27  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
28  * POSSIBILITY OF SUCH DAMAGE.
29  *
30  * *****************************************************************************
31  *
32  * Definitions for program data.
33  *
34  */
35 
36 #ifndef BC_LANG_H
37 #define BC_LANG_H
38 
39 #include <stdbool.h>
40 #if BC_C11
41 #include <assert.h>
42 #endif // BC_C11
43 
44 #include <status.h>
45 #include <vector.h>
46 #include <num.h>
47 
48 /// The instructions for bytecode.
49 typedef enum BcInst {
50 
51 #if BC_ENABLED
52 
53 	/// Postfix increment and decrement. Prefix are translated into
54 	/// BC_INST_ONE with either BC_INST_ASSIGN_PLUS or BC_INST_ASSIGN_MINUS.
55 	BC_INST_INC = 0,
56 	BC_INST_DEC,
57 #endif // BC_ENABLED
58 
59 	/// Unary negation.
60 	BC_INST_NEG,
61 
62 	/// Boolean not.
63 	BC_INST_BOOL_NOT,
64 #if BC_ENABLE_EXTRA_MATH
65 	/// Truncation operator.
66 	BC_INST_TRUNC,
67 #endif // BC_ENABLE_EXTRA_MATH
68 
69 	/// These should be self-explanatory.
70 	BC_INST_POWER,
71 	BC_INST_MULTIPLY,
72 	BC_INST_DIVIDE,
73 	BC_INST_MODULUS,
74 	BC_INST_PLUS,
75 	BC_INST_MINUS,
76 
77 #if BC_ENABLE_EXTRA_MATH
78 
79 	/// Places operator.
80 	BC_INST_PLACES,
81 
82 	/// Shift operators.
83 	BC_INST_LSHIFT,
84 	BC_INST_RSHIFT,
85 #endif // BC_ENABLE_EXTRA_MATH
86 
87 	/// Comparison operators.
88 	BC_INST_REL_EQ,
89 	BC_INST_REL_LE,
90 	BC_INST_REL_GE,
91 	BC_INST_REL_NE,
92 	BC_INST_REL_LT,
93 	BC_INST_REL_GT,
94 
95 	/// Boolean or and and.
96 	BC_INST_BOOL_OR,
97 	BC_INST_BOOL_AND,
98 
99 #if BC_ENABLED
100 	/// Same as the normal operators, but assigment. So ^=, *=, /=, etc.
101 	BC_INST_ASSIGN_POWER,
102 	BC_INST_ASSIGN_MULTIPLY,
103 	BC_INST_ASSIGN_DIVIDE,
104 	BC_INST_ASSIGN_MODULUS,
105 	BC_INST_ASSIGN_PLUS,
106 	BC_INST_ASSIGN_MINUS,
107 #if BC_ENABLE_EXTRA_MATH
108 	/// Places and shift assignment operators.
109 	BC_INST_ASSIGN_PLACES,
110 	BC_INST_ASSIGN_LSHIFT,
111 	BC_INST_ASSIGN_RSHIFT,
112 #endif // BC_ENABLE_EXTRA_MATH
113 
114 	/// Normal assignment.
115 	BC_INST_ASSIGN,
116 
117 	/// bc and dc detect when the value from an assignment is not necessary.
118 	/// For example, a plain assignment statement means the value is never used.
119 	/// In those cases, we can get lots of performance back by not even creating
120 	/// a copy at all. In fact, it saves a copy, a push onto the results stack,
121 	/// a pop from the results stack, and a free. Definitely worth it to detect.
122 	BC_INST_ASSIGN_POWER_NO_VAL,
123 	BC_INST_ASSIGN_MULTIPLY_NO_VAL,
124 	BC_INST_ASSIGN_DIVIDE_NO_VAL,
125 	BC_INST_ASSIGN_MODULUS_NO_VAL,
126 	BC_INST_ASSIGN_PLUS_NO_VAL,
127 	BC_INST_ASSIGN_MINUS_NO_VAL,
128 #if BC_ENABLE_EXTRA_MATH
129 	/// Same as above.
130 	BC_INST_ASSIGN_PLACES_NO_VAL,
131 	BC_INST_ASSIGN_LSHIFT_NO_VAL,
132 	BC_INST_ASSIGN_RSHIFT_NO_VAL,
133 #endif // BC_ENABLE_EXTRA_MATH
134 #endif // BC_ENABLED
135 
136 	/// Normal assignment that pushes no value on the stack.
137 	BC_INST_ASSIGN_NO_VAL,
138 
139 	/// Push a constant onto the results stack.
140 	BC_INST_NUM,
141 
142 	/// Push a variable onto the results stack.
143 	BC_INST_VAR,
144 
145 	/// Push an array element onto the results stack.
146 	BC_INST_ARRAY_ELEM,
147 
148 	/// Push an array onto the results stack. This is different from pushing an
149 	/// array *element* onto the results stack; it pushes a reference to the
150 	/// whole array. This is needed in bc for function arguments that are
151 	/// arrays. It is also needed for returning the length of an array.
152 	BC_INST_ARRAY,
153 
154 	/// Push a zero or a one onto the stack. These are special cased because it
155 	/// does help performance, particularly for one since inc/dec operators
156 	/// use it.
157 	BC_INST_ZERO,
158 	BC_INST_ONE,
159 
160 #if BC_ENABLED
161 	/// Push the last printed value onto the stack.
162 	BC_INST_LAST,
163 #endif // BC_ENABLED
164 
165 	/// Push the value of any of the globals onto the stack.
166 	BC_INST_IBASE,
167 	BC_INST_OBASE,
168 	BC_INST_SCALE,
169 
170 #if BC_ENABLE_EXTRA_MATH
171 	/// Push the value of the seed global onto the stack.
172 	BC_INST_SEED,
173 #endif // BC_ENABLE_EXTRA_MATH
174 
175 	/// These are builtin functions.
176 	BC_INST_LENGTH,
177 	BC_INST_SCALE_FUNC,
178 	BC_INST_SQRT,
179 	BC_INST_ABS,
180 
181 #if BC_ENABLE_EXTRA_MATH
182 	/// Another builtin function.
183 	BC_INST_IRAND,
184 #endif // BC_ENABLE_EXTRA_MATH
185 
186 	/// Asciify.
187 	BC_INST_ASCIIFY,
188 
189 	/// Another builtin function.
190 	BC_INST_READ,
191 
192 #if BC_ENABLE_EXTRA_MATH
193 	/// Another builtin function.
194 	BC_INST_RAND,
195 #endif // BC_ENABLE_EXTRA_MATH
196 
197 	/// Return the max for the various globals.
198 	BC_INST_MAXIBASE,
199 	BC_INST_MAXOBASE,
200 	BC_INST_MAXSCALE,
201 #if BC_ENABLE_EXTRA_MATH
202 	/// Return the max value returned by rand().
203 	BC_INST_MAXRAND,
204 #endif // BC_ENABLE_EXTRA_MATH
205 
206 	/// bc line_length() builtin function.
207 	BC_INST_LINE_LENGTH,
208 
209 #if BC_ENABLED
210 
211 	/// bc global_stacks() builtin function.
212 	BC_INST_GLOBAL_STACKS,
213 
214 #endif // BC_ENABLED
215 
216 	/// bc leading_zero() builtin function.
217 	BC_INST_LEADING_ZERO,
218 
219 	/// This is slightly misnamed versus BC_INST_PRINT_POP. Well, it is in bc.
220 	/// dc uses this instruction to print, but not pop. That's valid in dc.
221 	/// However, in bc, it is *never* valid to print without popping. In bc,
222 	/// BC_INST_PRINT_POP is used to indicate when a string should be printed
223 	/// because of a print statement or whether it should be printed raw. The
224 	/// reason for this is because a print statement handles escaped characters.
225 	/// So BC_INST_PRINT_POP is for printing a string from a print statement,
226 	/// BC_INST_PRINT_STR is for printing a string by itself.
227 	///
228 	/// In dc, BC_INST_PRINT_POP prints and pops, and BC_INST_PRINT just prints.
229 	///
230 	/// Oh, and BC_INST_STR pushes a string onto the results stack.
231 	BC_INST_PRINT,
232 	BC_INST_PRINT_POP,
233 	BC_INST_STR,
234 #if BC_ENABLED
235 	BC_INST_PRINT_STR,
236 
237 	/// Jumps unconditionally.
238 	BC_INST_JUMP,
239 
240 	/// Jumps if the top of the results stack is zero (condition failed). It
241 	/// turns out that we only want to jump when conditions fail to "skip" code.
242 	BC_INST_JUMP_ZERO,
243 
244 	/// Call a function.
245 	BC_INST_CALL,
246 
247 	/// Return the top of the stack to the caller.
248 	BC_INST_RET,
249 
250 	/// Return 0 to the caller.
251 	BC_INST_RET0,
252 
253 	/// Special return instruction for void functions.
254 	BC_INST_RET_VOID,
255 
256 	/// Special halt instruction.
257 	BC_INST_HALT,
258 #endif // BC_ENABLED
259 
260 	/// Pop an item off of the results stack.
261 	BC_INST_POP,
262 
263 	/// Swaps the top two items on the results stack.
264 	BC_INST_SWAP,
265 
266 	/// Modular exponentiation.
267 	BC_INST_MODEXP,
268 
269 	/// Do divide and modulus at the same time.
270 	BC_INST_DIVMOD,
271 
272 	/// Turns a number into a string and prints it.
273 	BC_INST_PRINT_STREAM,
274 
275 #if DC_ENABLED
276 
277 	/// dc's return; it pops an executing string off of the stack.
278 	BC_INST_POP_EXEC,
279 
280 	/// Unconditionally execute a string.
281 	BC_INST_EXECUTE,
282 
283 	/// Conditionally execute a string.
284 	BC_INST_EXEC_COND,
285 
286 	/// Prints each item on the results stack, separated by newlines.
287 	BC_INST_PRINT_STACK,
288 
289 	/// Pops everything off of the results stack.
290 	BC_INST_CLEAR_STACK,
291 
292 	/// Pushes the current length of a register stack onto the results stack.
293 	BC_INST_REG_STACK_LEN,
294 
295 	/// Pushes the current length of the results stack onto the results stack.
296 	BC_INST_STACK_LEN,
297 
298 	/// Pushes a copy of the item on the top of the results stack onto the
299 	/// results stack.
300 	BC_INST_DUPLICATE,
301 
302 	/// Copies the value in a register and pushes the copy onto the results
303 	/// stack.
304 	BC_INST_LOAD,
305 
306 	/// Pops an item off of a register stack and pushes it onto the results
307 	/// stack.
308 	BC_INST_PUSH_VAR,
309 
310 	/// Pops an item off of the results stack and pushes it onto a register's
311 	/// stack.
312 	BC_INST_PUSH_TO_VAR,
313 
314 	/// Quit.
315 	BC_INST_QUIT,
316 
317 	/// Quit executing some number of strings.
318 	BC_INST_NQUIT,
319 
320 	/// Push the depth of the execution stack onto the stack.
321 	BC_INST_EXEC_STACK_LEN,
322 
323 #endif // DC_ENABLED
324 
325 	/// Invalid instruction.
326 	BC_INST_INVALID,
327 
328 } BcInst;
329 
330 #if BC_C11
331 static_assert(BC_INST_INVALID <= UCHAR_MAX,
332               "Too many instructions to fit into an unsigned char");
333 #endif // BC_C11
334 
335 /// Used by maps to identify where items are in the array.
336 typedef struct BcId {
337 
338 	/// The name of the item.
339 	char *name;
340 
341 	/// The index into the array where the item is.
342 	size_t idx;
343 
344 } BcId;
345 
346 /// The location of a var, array, or array element.
347 typedef struct BcLoc {
348 
349 	/// The index of the var or array.
350 	size_t loc;
351 
352 	/// The index of the array element. Only used for array elements.
353 	size_t idx;
354 
355 } BcLoc;
356 
357 /// An entry for a constant.
358 typedef struct BcConst {
359 
360 	/// The original string as parsed from the source code.
361 	char *val;
362 
363 	/// The last base that the constant was parsed in.
364 	BcBigDig base;
365 
366 	/// The parsed constant.
367 	BcNum num;
368 
369 } BcConst;
370 
371 /// A function. This is also used in dc, not just bc. The reason is that strings
372 /// are executed in dc, and they are converted to functions in order to be
373 /// executed.
374 typedef struct BcFunc {
375 
376 	/// The bytecode instructions.
377 	BcVec code;
378 
379 #if BC_ENABLED
380 
381 	/// The labels. This is a vector of indices. The index is the index into
382 	/// the bytecode vector where the label is.
383 	BcVec labels;
384 
385 	/// The autos for the function. The first items are the parameters, and the
386 	/// arguments to the parameters must match the types in this vector.
387 	BcVec autos;
388 
389 	/// The number of parameters the function takes.
390 	size_t nparams;
391 
392 #endif // BC_ENABLED
393 
394 	/// The strings encountered in the function.
395 	BcVec strs;
396 
397 	/// The constants encountered in the function.
398 	BcVec consts;
399 
400 	/// The function's name.
401 	const char *name;
402 
403 #if BC_ENABLED
404 	/// True if the function is a void function.
405 	bool voidfn;
406 #endif // BC_ENABLED
407 
408 } BcFunc;
409 
410 /// Types of results that can be pushed onto the results stack.
411 typedef enum BcResultType {
412 
413 	/// Result is a variable.
414 	BC_RESULT_VAR,
415 
416 	/// Result is an array element.
417 	BC_RESULT_ARRAY_ELEM,
418 
419 	/// Result is an array. This is only allowed for function arguments or
420 	/// returning the length of the array.
421 	BC_RESULT_ARRAY,
422 
423 	/// Result is a string.
424 	BC_RESULT_STR,
425 
426 	/// Result is a temporary. This is used for the result of almost all
427 	/// expressions.
428 	BC_RESULT_TEMP,
429 
430 	/// Special casing the two below gave performance improvements.
431 
432 	/// Result is a 0.
433 	BC_RESULT_ZERO,
434 
435 	/// Result is a 1. Useful for inc/dec operators.
436 	BC_RESULT_ONE,
437 
438 #if BC_ENABLED
439 
440 	/// Result is the special "last" variable.
441 	BC_RESULT_LAST,
442 
443 	/// Result is the return value of a void function.
444 	BC_RESULT_VOID,
445 #endif // BC_ENABLED
446 
447 	/// Result is the value of ibase.
448 	BC_RESULT_IBASE,
449 
450 	/// Result is the value of obase.
451 	BC_RESULT_OBASE,
452 
453 	/// Result is the value of scale.
454 	BC_RESULT_SCALE,
455 
456 #if BC_ENABLE_EXTRA_MATH
457 
458 	/// Result is the value of seed.
459 	BC_RESULT_SEED,
460 
461 #endif // BC_ENABLE_EXTRA_MATH
462 
463 } BcResultType;
464 
465 /// A union to store data for various result types.
466 typedef union BcResultData {
467 
468 	/// A number. Strings are stored here too; they are numbers with
469 	/// cap == 0 && num == NULL. The string's index into the strings vector is
470 	/// stored in the scale field. But this is only used for strings stored in
471 	/// variables.
472 	BcNum n;
473 
474 	/// A vector.
475 	BcVec v;
476 
477 	/// A variable, array, or array element reference. This could also be a
478 	/// string if a string is not stored in a variable (dc only).
479 	BcLoc loc;
480 
481 } BcResultData;
482 
483 /// A tagged union for results.
484 typedef struct BcResult {
485 
486 	/// The tag. The type of the result.
487 	BcResultType t;
488 
489 	/// The data. The data for the result.
490 	BcResultData d;
491 
492 } BcResult;
493 
494 /// An instruction pointer. This is how bc knows where in the bytecode vector,
495 /// and which function, the current execution is.
496 typedef struct BcInstPtr {
497 
498 	/// The index of the currently executing function in the fns vector.
499 	size_t func;
500 
501 	/// The index into the bytecode vector of the *next* instruction.
502 	size_t idx;
503 
504 	/// The length of the results vector when this function started executing.
505 	/// This is mostly used for bc where functions should not affect the results
506 	/// of their callers.
507 	size_t len;
508 
509 } BcInstPtr;
510 
511 /// Types of identifiers.
512 typedef enum BcType {
513 
514 	/// Variable.
515 	BC_TYPE_VAR,
516 
517 	/// Array.
518 	BC_TYPE_ARRAY,
519 
520 #if BC_ENABLED
521 
522 	/// Array reference.
523 	BC_TYPE_REF,
524 
525 #endif // BC_ENABLED
526 
527 } BcType;
528 
529 #if BC_ENABLED
530 /// An auto variable in bc.
531 typedef struct BcAuto {
532 
533 	/// The index of the variable in the vars or arrs vectors.
534 	size_t idx;
535 
536 	/// The type of the variable.
537 	BcType type;
538 
539 } BcAuto;
540 #endif // BC_ENABLED
541 
542 /// Forward declaration.
543 struct BcProgram;
544 
545 /**
546  * Initializes a function.
547  * @param f     The function to initialize.
548  * @param name  The name of the function. The string is assumed to be owned by
549  *              some other entity.
550  */
551 void bc_func_init(BcFunc *f, const char* name);
552 
553 /**
554  * Inserts an auto into the function.
555  * @param f     The function to insert into.
556  * @param p     The program. This is to search for the variable or array name.
557  * @param name  The name of the auto to insert.
558  * @param type  The type of the auto.
559  * @param line  The line in the source code where the insert happened. This is
560  *              solely for error reporting.
561  */
562 void bc_func_insert(BcFunc *f, struct BcProgram* p, char* name,
563                     BcType type, size_t line);
564 
565 /**
566  * Resets a function in preparation for it to be reused. This can happen in bc
567  * because it is a dynamic language and functions can be redefined.
568  * @param f  The functio to reset.
569  */
570 void bc_func_reset(BcFunc *f);
571 
572 #ifndef NDEBUG
573 /**
574  * Frees a function. This is a destructor. This is only used in debug builds
575  * because all functions are freed at exit. We free them in debug builds to
576  * check for memory leaks.
577  * @param func  The function to free as a void pointer.
578  */
579 void bc_func_free(void *func);
580 #endif // NDEBUG
581 
582 /**
583  * Initializes an array, which is the array type in bc and dc source code. Since
584  * variables and arrays are both arrays (see the development manual,
585  * manuals/development.md#execution, for more information), the @a nums
586  * parameter tells bc whether to initialize an array of numbers or an array of
587  * arrays of numbers. If the latter, it does a recursive call with nums set to
588  * true.
589  * @param a     The array to initialize.
590  * @param nums  True if the array should be for numbers, false if it should be
591  *              for vectors.
592  */
593 void bc_array_init(BcVec *a, bool nums);
594 
595 /**
596  * Copies an array to another array. This is used to do pass arrays to functions
597  * that do not take references to arrays. The arrays are passed entirely by
598  * value, which means that they need to be copied.
599  * @param d  The destination array.
600  * @param s  The source array.
601  */
602 void bc_array_copy(BcVec *d, const BcVec *s);
603 
604 /**
605  * Frees a string stored in a function. This is a destructor.
606  * @param string  The string to free as a void pointer.
607  */
608 void bc_string_free(void *string);
609 
610 /**
611  * Frees a constant stored in a function. This is a destructor.
612  * @param constant  The constant to free as a void pointer.
613  */
614 void bc_const_free(void *constant);
615 
616 /**
617  * Clears a result. It sets the type to BC_RESULT_TEMP and clears the union by
618  * clearing the BcNum in the union. This is to ensure that bc does not use
619  * uninitialized data.
620  * @param r  The result to clear.
621  */
622 void bc_result_clear(BcResult *r);
623 
624 /**
625  * Copies a result into another. This is done for things like duplicating the
626  * top of the results stack or copying the result of an assignment to put back
627  * on the results stack.
628  * @param d    The destination result.
629  * @param src  The source result.
630  */
631 void bc_result_copy(BcResult *d, BcResult *src);
632 
633 /**
634  * Frees a result. This is a destructor.
635  * @param result  The result to free as a void pointer.
636  */
637 void bc_result_free(void *result);
638 
639 /**
640  * Expands an array to @a len. This can happen because in bc, you do not have to
641  * explicitly initialize elements of an array. If you access an element that is
642  * not initialized, the array is expanded to fit it, and all missing elements
643  * are initialized to 0 if they are numbers, or arrays with one element of 0.
644  * This function does that expansion.
645  * @param a    The array to expand.
646  * @param len  The length to expand to.
647  */
648 void bc_array_expand(BcVec *a, size_t len);
649 
650 /**
651  * Compare two BcId's and return the result. Since they are just comparing the
652  * names in the BcId, I return the result from strcmp() exactly. This is used by
653  * maps in their binary search.
654  * @param e1  The first id.
655  * @param e2  The second id.
656  * @return    The result of strcmp() on the BcId's names.
657  */
658 int bc_id_cmp(const BcId *e1, const BcId *e2);
659 
660 #if BC_ENABLED
661 
662 /**
663  * Returns non-zero if the bytecode instruction i is an assignment instruction.
664  * @param i  The instruction to test.
665  * @return   Non-zero if i is an assignment instruction, zero otherwise.
666  */
667 #define BC_INST_IS_ASSIGN(i) \
668 	((i) == BC_INST_ASSIGN || (i) == BC_INST_ASSIGN_NO_VAL)
669 
670 /**
671  * Returns true if the bytecode instruction @a i requires the value to be
672  * returned for use.
673  * @param i  The instruction to test.
674  * @return   True if @a i requires the value to be returned for use, false
675  *           otherwise.
676  */
677 #define BC_INST_USE_VAL(i) ((i) <= BC_INST_ASSIGN)
678 
679 #else // BC_ENABLED
680 
681 /**
682  * Returns non-zero if the bytecode instruction i is an assignment instruction.
683  * @param i  The instruction to test.
684  * @return   Non-zero if i is an assignment instruction, zero otherwise.
685  */
686 #define BC_INST_IS_ASSIGN(i) ((i) == BC_INST_ASSIGN_NO_VAL)
687 
688 /**
689  * Returns true if the bytecode instruction @a i requires the value to be
690  * returned for use.
691  * @param i  The instruction to test.
692  * @return   True if @a i requires the value to be returned for use, false
693  *           otherwise.
694  */
695 #define BC_INST_USE_VAL(i) (false)
696 
697 #endif // BC_ENABLED
698 
699 #if BC_DEBUG_CODE
700 /// Reference to string names for all of the instructions. For debugging.
701 extern const char* bc_inst_names[];
702 #endif // BC_DEBUG_CODE
703 
704 /// References to the names of the main and read functions.
705 extern const char bc_func_main[];
706 extern const char bc_func_read[];
707 
708 #endif // BC_LANG_H
709