xref: /freebsd/contrib/bc/include/lang.h (revision ae7e8a02e6e93455e026036132c4d053b2c12ad9)
1 /*
2  * *****************************************************************************
3  *
4  * SPDX-License-Identifier: BSD-2-Clause
5  *
6  * Copyright (c) 2018-2021 Gavin D. Howard and contributors.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions are met:
10  *
11  * * Redistributions of source code must retain the above copyright notice, this
12  *   list of conditions and the following disclaimer.
13  *
14  * * Redistributions in binary form must reproduce the above copyright notice,
15  *   this list of conditions and the following disclaimer in the documentation
16  *   and/or other materials provided with the distribution.
17  *
18  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
22  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
23  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
24  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
25  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
26  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
27  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
28  * POSSIBILITY OF SUCH DAMAGE.
29  *
30  * *****************************************************************************
31  *
32  * Definitions for program data.
33  *
34  */
35 
36 #ifndef BC_LANG_H
37 #define BC_LANG_H
38 
39 #include <stdbool.h>
40 
41 #include <status.h>
42 #include <vector.h>
43 #include <num.h>
44 
45 /// The instructions for bytecode.
46 typedef enum BcInst {
47 
48 #if BC_ENABLED
49 
50 	/// Postfix increment and decrement. Prefix are translated into
51 	/// BC_INST_ONE with either BC_INST_ASSIGN_PLUS or BC_INST_ASSIGN_MINUS.
52 	BC_INST_INC = 0,
53 	BC_INST_DEC,
54 #endif // BC_ENABLED
55 
56 	/// Unary negation.
57 	BC_INST_NEG,
58 
59 	/// Boolean not.
60 	BC_INST_BOOL_NOT,
61 #if BC_ENABLE_EXTRA_MATH
62 	/// Truncation operator.
63 	BC_INST_TRUNC,
64 #endif // BC_ENABLE_EXTRA_MATH
65 
66 	/// These should be self-explanatory.
67 	BC_INST_POWER,
68 	BC_INST_MULTIPLY,
69 	BC_INST_DIVIDE,
70 	BC_INST_MODULUS,
71 	BC_INST_PLUS,
72 	BC_INST_MINUS,
73 
74 #if BC_ENABLE_EXTRA_MATH
75 
76 	/// Places operator.
77 	BC_INST_PLACES,
78 
79 	/// Shift operators.
80 	BC_INST_LSHIFT,
81 	BC_INST_RSHIFT,
82 #endif // BC_ENABLE_EXTRA_MATH
83 
84 	/// Comparison operators.
85 	BC_INST_REL_EQ,
86 	BC_INST_REL_LE,
87 	BC_INST_REL_GE,
88 	BC_INST_REL_NE,
89 	BC_INST_REL_LT,
90 	BC_INST_REL_GT,
91 
92 	/// Boolean or and and.
93 	BC_INST_BOOL_OR,
94 	BC_INST_BOOL_AND,
95 
96 #if BC_ENABLED
97 	/// Same as the normal operators, but assigment. So ^=, *=, /=, etc.
98 	BC_INST_ASSIGN_POWER,
99 	BC_INST_ASSIGN_MULTIPLY,
100 	BC_INST_ASSIGN_DIVIDE,
101 	BC_INST_ASSIGN_MODULUS,
102 	BC_INST_ASSIGN_PLUS,
103 	BC_INST_ASSIGN_MINUS,
104 #if BC_ENABLE_EXTRA_MATH
105 	/// Places and shift assignment operators.
106 	BC_INST_ASSIGN_PLACES,
107 	BC_INST_ASSIGN_LSHIFT,
108 	BC_INST_ASSIGN_RSHIFT,
109 #endif // BC_ENABLE_EXTRA_MATH
110 
111 	/// Normal assignment.
112 	BC_INST_ASSIGN,
113 
114 	/// bc and dc detect when the value from an assignment is not necessary.
115 	/// For example, a plain assignment statement means the value is never used.
116 	/// In those cases, we can get lots of performance back by not even creating
117 	/// a copy at all. In fact, it saves a copy, a push onto the results stack,
118 	/// a pop from the results stack, and a free. Definitely worth it to detect.
119 	BC_INST_ASSIGN_POWER_NO_VAL,
120 	BC_INST_ASSIGN_MULTIPLY_NO_VAL,
121 	BC_INST_ASSIGN_DIVIDE_NO_VAL,
122 	BC_INST_ASSIGN_MODULUS_NO_VAL,
123 	BC_INST_ASSIGN_PLUS_NO_VAL,
124 	BC_INST_ASSIGN_MINUS_NO_VAL,
125 #if BC_ENABLE_EXTRA_MATH
126 	/// Same as above.
127 	BC_INST_ASSIGN_PLACES_NO_VAL,
128 	BC_INST_ASSIGN_LSHIFT_NO_VAL,
129 	BC_INST_ASSIGN_RSHIFT_NO_VAL,
130 #endif // BC_ENABLE_EXTRA_MATH
131 #endif // BC_ENABLED
132 
133 	/// Normal assignment that pushes no value on the stack.
134 	BC_INST_ASSIGN_NO_VAL,
135 
136 	/// Push a constant onto the results stack.
137 	BC_INST_NUM,
138 
139 	/// Push a variable onto the results stack.
140 	BC_INST_VAR,
141 
142 	/// Push an array element onto the results stack.
143 	BC_INST_ARRAY_ELEM,
144 
145 	/// Push an array onto the results stack. This is different from pushing an
146 	/// array *element* onto the results stack; it pushes a reference to the
147 	/// whole array. This is needed in bc for function arguments that are
148 	/// arrays. It is also needed for returning the length of an array.
149 	BC_INST_ARRAY,
150 
151 	/// Push a zero or a one onto the stack. These are special cased because it
152 	/// does help performance, particularly for one since inc/dec operators
153 	/// use it.
154 	BC_INST_ZERO,
155 	BC_INST_ONE,
156 
157 #if BC_ENABLED
158 	/// Push the last printed value onto the stack.
159 	BC_INST_LAST,
160 #endif // BC_ENABLED
161 
162 	/// Push the value of any of the globals onto the stack.
163 	BC_INST_IBASE,
164 	BC_INST_OBASE,
165 	BC_INST_SCALE,
166 
167 #if BC_ENABLE_EXTRA_MATH
168 	/// Push the value of the seed global onto the stack.
169 	BC_INST_SEED,
170 #endif // BC_ENABLE_EXTRA_MATH
171 
172 	/// These are builtin functions.
173 	BC_INST_LENGTH,
174 	BC_INST_SCALE_FUNC,
175 	BC_INST_SQRT,
176 	BC_INST_ABS,
177 
178 #if BC_ENABLE_EXTRA_MATH
179 	/// Another builtin function.
180 	BC_INST_IRAND,
181 #endif // BC_ENABLE_EXTRA_MATH
182 
183 	/// Asciify.
184 	BC_INST_ASCIIFY,
185 
186 	/// Another builtin function.
187 	BC_INST_READ,
188 
189 #if BC_ENABLE_EXTRA_MATH
190 	/// Another builtin function.
191 	BC_INST_RAND,
192 #endif // BC_ENABLE_EXTRA_MATH
193 
194 	/// Return the max for the various globals.
195 	BC_INST_MAXIBASE,
196 	BC_INST_MAXOBASE,
197 	BC_INST_MAXSCALE,
198 #if BC_ENABLE_EXTRA_MATH
199 	/// Return the max value returned by rand().
200 	BC_INST_MAXRAND,
201 #endif // BC_ENABLE_EXTRA_MATH
202 
203 	/// This is slightly misnamed versus BC_INST_PRINT_POP. Well, it is in bc.
204 	/// dc uses this instruction to print, but not pop. That's valid in dc.
205 	/// However, in bc, it is *never* valid to print without popping. In bc,
206 	/// BC_INST_PRINT_POP is used to indicate when a string should be printed
207 	/// because of a print statement or whether it should be printed raw. The
208 	/// reason for this is because a print statement handles escaped characters.
209 	/// So BC_INST_PRINT_POP is for printing a string from a print statement,
210 	/// BC_INST_PRINT_STR is for printing a string by itself.
211 	///
212 	/// In dc, BC_INST_PRINT_POP prints and pops, and BC_INST_PRINT just prints.
213 	///
214 	/// Oh, and BC_INST_STR pushes a string onto the results stack.
215 	BC_INST_PRINT,
216 	BC_INST_PRINT_POP,
217 	BC_INST_STR,
218 #if BC_ENABLED
219 	BC_INST_PRINT_STR,
220 
221 	/// Jumps unconditionally.
222 	BC_INST_JUMP,
223 
224 	/// Jumps if the top of the results stack is zero (condition failed). It
225 	/// turns out that we only want to jump when conditions fail to "skip" code.
226 	BC_INST_JUMP_ZERO,
227 
228 	/// Call a function.
229 	BC_INST_CALL,
230 
231 	/// Return the top of the stack to the caller.
232 	BC_INST_RET,
233 
234 	/// Return 0 to the caller.
235 	BC_INST_RET0,
236 
237 	/// Special return instruction for void functions.
238 	BC_INST_RET_VOID,
239 
240 	/// Special halt instruction.
241 	BC_INST_HALT,
242 #endif // BC_ENABLED
243 
244 	/// Pop an item off of the results stack.
245 	BC_INST_POP,
246 
247 	/// Swaps the top two items on the results stack.
248 	BC_INST_SWAP,
249 
250 	/// Modular exponentiation.
251 	BC_INST_MODEXP,
252 
253 	/// Do divide and modulus at the same time.
254 	BC_INST_DIVMOD,
255 
256 	/// Turns a number into a string and prints it.
257 	BC_INST_PRINT_STREAM,
258 
259 #if DC_ENABLED
260 
261 	/// dc's return; it pops an executing string off of the stack.
262 	BC_INST_POP_EXEC,
263 
264 	/// Unconditionally execute a string.
265 	BC_INST_EXECUTE,
266 
267 	/// Conditionally execute a string.
268 	BC_INST_EXEC_COND,
269 
270 	/// Prints each item on the results stack, separated by newlines.
271 	BC_INST_PRINT_STACK,
272 
273 	/// Pops everything off of the results stack.
274 	BC_INST_CLEAR_STACK,
275 
276 	/// Pushes the current length of a register stack onto the results stack.
277 	BC_INST_REG_STACK_LEN,
278 
279 	/// Pushes the current length of the results stack onto the results stack.
280 	BC_INST_STACK_LEN,
281 
282 	/// Pushes a copy of the item on the top of the results stack onto the
283 	/// results stack.
284 	BC_INST_DUPLICATE,
285 
286 	/// Copies the value in a register and pushes the copy onto the results
287 	/// stack.
288 	BC_INST_LOAD,
289 
290 	/// Pops an item off of a register stack and pushes it onto the results
291 	/// stack.
292 	BC_INST_PUSH_VAR,
293 
294 	/// Pops an item off of the results stack and pushes it onto a register's
295 	/// stack.
296 	BC_INST_PUSH_TO_VAR,
297 
298 	/// Quit.
299 	BC_INST_QUIT,
300 
301 	/// Quit executing some number of strings.
302 	BC_INST_NQUIT,
303 
304 	/// Push the depth of the execution stack onto the stack.
305 	BC_INST_EXEC_STACK_LEN,
306 
307 #endif // DC_ENABLED
308 
309 	/// Invalid instruction.
310 	BC_INST_INVALID,
311 
312 } BcInst;
313 
314 /// Used by maps to identify where items are in the array.
315 typedef struct BcId {
316 
317 	/// The name of the item.
318 	char *name;
319 
320 	/// The index into the array where the item is.
321 	size_t idx;
322 
323 } BcId;
324 
325 /// The location of a var, array, or array element.
326 typedef struct BcLoc {
327 
328 	/// The index of the var or array.
329 	size_t loc;
330 
331 	/// The index of the array element. Only used for array elements.
332 	size_t idx;
333 
334 } BcLoc;
335 
336 /// An entry for a constant.
337 typedef struct BcConst {
338 
339 	/// The original string as parsed from the source code.
340 	char *val;
341 
342 	/// The last base that the constant was parsed in.
343 	BcBigDig base;
344 
345 	/// The parsed constant.
346 	BcNum num;
347 
348 } BcConst;
349 
350 /// A function. This is also used in dc, not just bc. The reason is that strings
351 /// are executed in dc, and they are converted to functions in order to be
352 /// executed.
353 typedef struct BcFunc {
354 
355 	/// The bytecode instructions.
356 	BcVec code;
357 
358 #if BC_ENABLED
359 
360 	/// The labels. This is a vector of indices. The index is the index into
361 	/// the bytecode vector where the label is.
362 	BcVec labels;
363 
364 	/// The autos for the function. The first items are the parameters, and the
365 	/// arguments to the parameters must match the types in this vector.
366 	BcVec autos;
367 
368 	/// The number of parameters the function takes.
369 	size_t nparams;
370 
371 #endif // BC_ENABLED
372 
373 	/// The strings encountered in the function.
374 	BcVec strs;
375 
376 	/// The constants encountered in the function.
377 	BcVec consts;
378 
379 	/// The function's name.
380 	const char *name;
381 
382 #if BC_ENABLED
383 	/// True if the function is a void function.
384 	bool voidfn;
385 #endif // BC_ENABLED
386 
387 } BcFunc;
388 
389 /// Types of results that can be pushed onto the results stack.
390 typedef enum BcResultType {
391 
392 	/// Result is a variable.
393 	BC_RESULT_VAR,
394 
395 	/// Result is an array element.
396 	BC_RESULT_ARRAY_ELEM,
397 
398 	/// Result is an array. This is only allowed for function arguments or
399 	/// returning the length of the array.
400 	BC_RESULT_ARRAY,
401 
402 	/// Result is a string.
403 	BC_RESULT_STR,
404 
405 	/// Result is a temporary. This is used for the result of almost all
406 	/// expressions.
407 	BC_RESULT_TEMP,
408 
409 	/// Special casing the two below gave performance improvements.
410 
411 	/// Result is a 0.
412 	BC_RESULT_ZERO,
413 
414 	/// Result is a 1. Useful for inc/dec operators.
415 	BC_RESULT_ONE,
416 
417 #if BC_ENABLED
418 
419 	/// Result is the special "last" variable.
420 	BC_RESULT_LAST,
421 
422 	/// Result is the return value of a void function.
423 	BC_RESULT_VOID,
424 #endif // BC_ENABLED
425 
426 	/// Result is the value of ibase.
427 	BC_RESULT_IBASE,
428 
429 	/// Result is the value of obase.
430 	BC_RESULT_OBASE,
431 
432 	/// Result is the value of scale.
433 	BC_RESULT_SCALE,
434 
435 #if BC_ENABLE_EXTRA_MATH
436 
437 	/// Result is the value of seed.
438 	BC_RESULT_SEED,
439 
440 #endif // BC_ENABLE_EXTRA_MATH
441 
442 } BcResultType;
443 
444 /// A union to store data for various result types.
445 typedef union BcResultData {
446 
447 	/// A number. Strings are stored here too; they are numbers with
448 	/// cap == 0 && num == NULL. The string's index into the strings vector is
449 	/// stored in the scale field. But this is only used for strings stored in
450 	/// variables.
451 	BcNum n;
452 
453 	/// A vector.
454 	BcVec v;
455 
456 	/// A variable, array, or array element reference. This could also be a
457 	/// string if a string is not stored in a variable (dc only).
458 	BcLoc loc;
459 
460 } BcResultData;
461 
462 /// A tagged union for results.
463 typedef struct BcResult {
464 
465 	/// The tag. The type of the result.
466 	BcResultType t;
467 
468 	/// The data. The data for the result.
469 	BcResultData d;
470 
471 } BcResult;
472 
473 /// An instruction pointer. This is how bc knows where in the bytecode vector,
474 /// and which function, the current execution is.
475 typedef struct BcInstPtr {
476 
477 	/// The index of the currently executing function in the fns vector.
478 	size_t func;
479 
480 	/// The index into the bytecode vector of the *next* instruction.
481 	size_t idx;
482 
483 	/// The length of the results vector when this function started executing.
484 	/// This is mostly used for bc where functions should not affect the results
485 	/// of their callers.
486 	size_t len;
487 
488 } BcInstPtr;
489 
490 /// Types of identifiers.
491 typedef enum BcType {
492 
493 	/// Variable.
494 	BC_TYPE_VAR,
495 
496 	/// Array.
497 	BC_TYPE_ARRAY,
498 
499 #if BC_ENABLED
500 
501 	/// Array reference.
502 	BC_TYPE_REF,
503 
504 #endif // BC_ENABLED
505 
506 } BcType;
507 
508 #if BC_ENABLED
509 /// An auto variable in bc.
510 typedef struct BcAuto {
511 
512 	/// The index of the variable in the vars or arrs vectors.
513 	size_t idx;
514 
515 	/// The type of the variable.
516 	BcType type;
517 
518 } BcAuto;
519 #endif // BC_ENABLED
520 
521 /// Forward declaration.
522 struct BcProgram;
523 
524 /**
525  * Initializes a function.
526  * @param f     The function to initialize.
527  * @param name  The name of the function. The string is assumed to be owned by
528  *              some other entity.
529  */
530 void bc_func_init(BcFunc *f, const char* name);
531 
532 /**
533  * Inserts an auto into the function.
534  * @param f     The function to insert into.
535  * @param p     The program. This is to search for the variable or array name.
536  * @param name  The name of the auto to insert.
537  * @param type  The type of the auto.
538  * @param line  The line in the source code where the insert happened. This is
539  *              solely for error reporting.
540  */
541 void bc_func_insert(BcFunc *f, struct BcProgram* p, char* name,
542                     BcType type, size_t line);
543 
544 /**
545  * Resets a function in preparation for it to be reused. This can happen in bc
546  * because it is a dynamic language and functions can be redefined.
547  * @param f  The functio to reset.
548  */
549 void bc_func_reset(BcFunc *f);
550 
551 #ifndef NDEBUG
552 /**
553  * Frees a function. This is a destructor. This is only used in debug builds
554  * because all functions are freed at exit. We free them in debug builds to
555  * check for memory leaks.
556  * @param func  The function to free as a void pointer.
557  */
558 void bc_func_free(void *func);
559 #endif // NDEBUG
560 
561 /**
562  * Initializes an array, which is the array type in bc and dc source code. Since
563  * variables and arrays are both arrays (see the development manual,
564  * manuals/development.md#execution, for more information), the @a nums
565  * parameter tells bc whether to initialize an array of numbers or an array of
566  * arrays of numbers. If the latter, it does a recursive call with nums set to
567  * true.
568  * @param a     The array to initialize.
569  * @param nums  True if the array should be for numbers, false if it should be
570  *              for vectors.
571  */
572 void bc_array_init(BcVec *a, bool nums);
573 
574 /**
575  * Copies an array to another array. This is used to do pass arrays to functions
576  * that do not take references to arrays. The arrays are passed entirely by
577  * value, which means that they need to be copied.
578  * @param d  The destination array.
579  * @param s  The source array.
580  */
581 void bc_array_copy(BcVec *d, const BcVec *s);
582 
583 /**
584  * Frees a string stored in a function. This is a destructor.
585  * @param string  The string to free as a void pointer.
586  */
587 void bc_string_free(void *string);
588 
589 /**
590  * Frees a constant stored in a function. This is a destructor.
591  * @param constant  The constant to free as a void pointer.
592  */
593 void bc_const_free(void *constant);
594 
595 /**
596  * Clears a result. It sets the type to BC_RESULT_TEMP and clears the union by
597  * clearing the BcNum in the union. This is to ensure that bc does not use
598  * uninitialized data.
599  * @param r  The result to clear.
600  */
601 void bc_result_clear(BcResult *r);
602 
603 /**
604  * Copies a result into another. This is done for things like duplicating the
605  * top of the results stack or copying the result of an assignment to put back
606  * on the results stack.
607  * @param d    The destination result.
608  * @param src  The source result.
609  */
610 void bc_result_copy(BcResult *d, BcResult *src);
611 
612 /**
613  * Frees a result. This is a destructor.
614  * @param result  The result to free as a void pointer.
615  */
616 void bc_result_free(void *result);
617 
618 /**
619  * Expands an array to @a len. This can happen because in bc, you do not have to
620  * explicitly initialize elements of an array. If you access an element that is
621  * not initialized, the array is expanded to fit it, and all missing elements
622  * are initialized to 0 if they are numbers, or arrays with one element of 0.
623  * This function does that expansion.
624  * @param a    The array to expand.
625  * @param len  The length to expand to.
626  */
627 void bc_array_expand(BcVec *a, size_t len);
628 
629 /**
630  * Compare two BcId's and return the result. Since they are just comparing the
631  * names in the BcId, I return the result from strcmp() exactly. This is used by
632  * maps in their binary search.
633  * @param e1  The first id.
634  * @param e2  The second id.
635  * @return    The result of strcmp() on the BcId's names.
636  */
637 int bc_id_cmp(const BcId *e1, const BcId *e2);
638 
639 #if BC_ENABLED
640 
641 /**
642  * Returns non-zero if the bytecode instruction i is an assignment instruction.
643  * @param i  The instruction to test.
644  * @return   Non-zero if i is an assignment instruction, zero otherwise.
645  */
646 #define BC_INST_IS_ASSIGN(i) \
647 	((i) == BC_INST_ASSIGN || (i) == BC_INST_ASSIGN_NO_VAL)
648 
649 /**
650  * Returns true if the bytecode instruction @a i requires the value to be
651  * returned for use.
652  * @param i  The instruction to test.
653  * @return   True if @a i requires the value to be returned for use, false
654  *           otherwise.
655  */
656 #define BC_INST_USE_VAL(i) ((i) <= BC_INST_ASSIGN)
657 
658 #else // BC_ENABLED
659 
660 /**
661  * Returns non-zero if the bytecode instruction i is an assignment instruction.
662  * @param i  The instruction to test.
663  * @return   Non-zero if i is an assignment instruction, zero otherwise.
664  */
665 #define BC_INST_IS_ASSIGN(i) ((i) == BC_INST_ASSIGN_NO_VAL)
666 
667 /**
668  * Returns true if the bytecode instruction @a i requires the value to be
669  * returned for use.
670  * @param i  The instruction to test.
671  * @return   True if @a i requires the value to be returned for use, false
672  *           otherwise.
673  */
674 #define BC_INST_USE_VAL(i) (false)
675 
676 #endif // BC_ENABLED
677 
678 #if BC_DEBUG_CODE
679 /// Reference to string names for all of the instructions. For debugging.
680 extern const char* bc_inst_names[];
681 #endif // BC_DEBUG_CODE
682 
683 /// References to the names of the main and read functions.
684 extern const char bc_func_main[];
685 extern const char bc_func_read[];
686 
687 #endif // BC_LANG_H
688