xref: /freebsd/contrib/bc/include/bc.h (revision e9ac41698b2f322d55ccf9da50a3596edb2c1800)
1 /*
2  * *****************************************************************************
3  *
4  * SPDX-License-Identifier: BSD-2-Clause
5  *
6  * Copyright (c) 2018-2024 Gavin D. Howard and contributors.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions are met:
10  *
11  * * Redistributions of source code must retain the above copyright notice, this
12  *   list of conditions and the following disclaimer.
13  *
14  * * Redistributions in binary form must reproduce the above copyright notice,
15  *   this list of conditions and the following disclaimer in the documentation
16  *   and/or other materials provided with the distribution.
17  *
18  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
22  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
23  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
24  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
25  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
26  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
27  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
28  * POSSIBILITY OF SUCH DAMAGE.
29  *
30  * *****************************************************************************
31  *
32  * Definitions for bc only.
33  *
34  */
35 
36 #ifndef BC_BC_H
37 #define BC_BC_H
38 
39 #if BC_ENABLED
40 
41 #include <limits.h>
42 #include <stdbool.h>
43 
44 #include <status.h>
45 #include <lex.h>
46 #include <parse.h>
47 
48 /**
49  * The main function for bc. It just sets variables and passes its arguments
50  * through to @a bc_vm_boot().
51  * @return  A status.
52  */
53 BcStatus
54 bc_main(int argc, char* argv[]);
55 
56 // These are references to the help text, the library text, and the "filename"
57 // for the library.
58 extern const char bc_help[];
59 extern const char bc_lib[];
60 extern const char* bc_lib_name;
61 
62 // These are references to the second math library and its "filename."
63 #if BC_ENABLE_EXTRA_MATH
64 extern const char bc_lib2[];
65 extern const char* bc_lib2_name;
66 #endif // BC_ENABLE_EXTRA_MATH
67 
68 /**
69  * A struct containing information about a bc keyword.
70  */
71 typedef struct BcLexKeyword
72 {
73 	/// Holds the length of the keyword along with a bit that, if set, means the
74 	/// keyword is used in POSIX bc.
75 	uchar data;
76 
77 	/// The keyword text.
78 	const char name[14];
79 } BcLexKeyword;
80 
81 /// Sets the most significant bit. Used for setting the POSIX bit in
82 /// BcLexKeyword's data field.
83 #define BC_LEX_CHAR_MSB(bit) ((bit) << (CHAR_BIT - 1))
84 
85 /// Returns non-zero if the keyword is POSIX, zero otherwise.
86 #define BC_LEX_KW_POSIX(kw) ((kw)->data & (BC_LEX_CHAR_MSB(1)))
87 
88 /// Returns the length of the keyword.
89 #define BC_LEX_KW_LEN(kw) ((size_t) ((kw)->data & ~(BC_LEX_CHAR_MSB(1))))
90 
91 /// A macro to easily build a keyword entry. See bc_lex_kws in src/data.c.
92 #define BC_LEX_KW_ENTRY(a, b, c) \
93 	{ .data = ((b) & ~(BC_LEX_CHAR_MSB(1))) | BC_LEX_CHAR_MSB(c), .name = a }
94 
95 #if BC_ENABLE_EXTRA_MATH
96 
97 /// A macro for the number of keywords bc has. This has to be updated if any are
98 /// added. This is for the redefined_kws field of the BcVm struct.
99 #define BC_LEX_NKWS (37)
100 
101 #else // BC_ENABLE_EXTRA_MATH
102 
103 /// A macro for the number of keywords bc has. This has to be updated if any are
104 /// added. This is for the redefined_kws field of the BcVm struct.
105 #define BC_LEX_NKWS (33)
106 
107 #endif // BC_ENABLE_EXTRA_MATH
108 
109 // The array of keywords and its length.
110 extern const BcLexKeyword bc_lex_kws[];
111 extern const size_t bc_lex_kws_len;
112 
113 /**
114  * The @a BcLexNext function for bc. (See include/lex.h for a definition of
115  * @a BcLexNext.)
116  * @param l  The lexer.
117  */
118 void
119 bc_lex_token(BcLex* l);
120 
121 // The following section is for flags needed when parsing bc code. These flags
122 // are complicated, but necessary. Why you ask? Because bc's standard is awful.
123 //
124 // If you don't believe me, go read the bc Parsing section of the Development
125 // manual (manuals/development.md). Then come back.
126 //
127 // In other words, these flags are the sign declaring, "Here be dragons."
128 
129 /**
130  * This returns a pointer to the set of flags at the top of the flag stack.
131  * @a p is expected to be a BcParse pointer.
132  * @param p  The parser.
133  * @return   A pointer to the top flag set.
134  */
135 #define BC_PARSE_TOP_FLAG_PTR(p) ((uint16_t*) bc_vec_top(&(p)->flags))
136 
137 /**
138  * This returns the flag set at the top of the flag stack. @a p is expected to
139  * be a BcParse pointer.
140  * @param p  The parser.
141  * @return   The top flag set.
142  */
143 #define BC_PARSE_TOP_FLAG(p) (*(BC_PARSE_TOP_FLAG_PTR(p)))
144 
145 // After this point, all flag #defines are in sets of 2: one to define the flag,
146 // and one to define a way to grab the flag from the flag set at the top of the
147 // flag stack. All `p` arguments are pointers to a BcParse.
148 
149 // This flag is set if the parser has seen a left brace.
150 #define BC_PARSE_FLAG_BRACE (UINTMAX_C(1) << 0)
151 #define BC_PARSE_BRACE(p) (BC_PARSE_TOP_FLAG(p) & BC_PARSE_FLAG_BRACE)
152 
153 // This flag is set if the parser is parsing inside of the braces of a function
154 // body.
155 #define BC_PARSE_FLAG_FUNC_INNER (UINTMAX_C(1) << 1)
156 #define BC_PARSE_FUNC_INNER(p) (BC_PARSE_TOP_FLAG(p) & BC_PARSE_FLAG_FUNC_INNER)
157 
158 // This flag is set if the parser is parsing a function. It is different from
159 // the one above because it is set if it is parsing a function body *or* header,
160 // not just if it's parsing a function body.
161 #define BC_PARSE_FLAG_FUNC (UINTMAX_C(1) << 2)
162 #define BC_PARSE_FUNC(p) (BC_PARSE_TOP_FLAG(p) & BC_PARSE_FLAG_FUNC)
163 
164 // This flag is set if the parser is expecting to parse a body, whether of a
165 // function, an if statement, or a loop.
166 #define BC_PARSE_FLAG_BODY (UINTMAX_C(1) << 3)
167 #define BC_PARSE_BODY(p) (BC_PARSE_TOP_FLAG(p) & BC_PARSE_FLAG_BODY)
168 
169 // This flag is set if bc is parsing a loop. This is important because the break
170 // and continue keywords are only valid inside of a loop.
171 #define BC_PARSE_FLAG_LOOP (UINTMAX_C(1) << 4)
172 #define BC_PARSE_LOOP(p) (BC_PARSE_TOP_FLAG(p) & BC_PARSE_FLAG_LOOP)
173 
174 // This flag is set if bc is parsing the body of a loop. It is different from
175 // the one above the same way @a BC_PARSE_FLAG_FUNC_INNER is different from
176 // @a BC_PARSE_FLAG_FUNC.
177 #define BC_PARSE_FLAG_LOOP_INNER (UINTMAX_C(1) << 5)
178 #define BC_PARSE_LOOP_INNER(p) (BC_PARSE_TOP_FLAG(p) & BC_PARSE_FLAG_LOOP_INNER)
179 
180 // This flag is set if bc is parsing an if statement.
181 #define BC_PARSE_FLAG_IF (UINTMAX_C(1) << 6)
182 #define BC_PARSE_IF(p) (BC_PARSE_TOP_FLAG(p) & BC_PARSE_FLAG_IF)
183 
184 // This flag is set if bc is parsing an else statement. This is important
185 // because of "else if" constructions, among other things.
186 #define BC_PARSE_FLAG_ELSE (UINTMAX_C(1) << 7)
187 #define BC_PARSE_ELSE(p) (BC_PARSE_TOP_FLAG(p) & BC_PARSE_FLAG_ELSE)
188 
189 // This flag is set if bc just finished parsing an if statement and its body.
190 // It tells the parser that it can probably expect an else statement next. This
191 // flag is, thus, one of the most subtle.
192 #define BC_PARSE_FLAG_IF_END (UINTMAX_C(1) << 8)
193 #define BC_PARSE_IF_END(p) (BC_PARSE_TOP_FLAG(p) & BC_PARSE_FLAG_IF_END)
194 
195 /**
196  * This returns true if bc is in a state where it should not execute any code
197  * at all.
198  * @param p  The parser.
199  * @return   True if execution cannot proceed, false otherwise.
200  */
201 #define BC_PARSE_NO_EXEC(p) ((p)->flags.len != 1 || BC_PARSE_TOP_FLAG(p) != 0)
202 
203 /**
204  * This returns true if the token @a t is a statement delimiter, which is
205  * either a newline or a semicolon.
206  * @param t  The token to check.
207  * @return   True if t is a statement delimiter token; false otherwise.
208  */
209 #define BC_PARSE_DELIMITER(t) \
210 	((t) == BC_LEX_SCOLON || (t) == BC_LEX_NLINE || (t) == BC_LEX_EOF)
211 
212 /**
213  * This is poorly named, but it basically returns whether or not the current
214  * state is valid for the end of an else statement.
215  * @param f  The flag set to be checked.
216  * @return   True if the state is valid for the end of an else statement.
217  */
218 #define BC_PARSE_BLOCK_STMT(f) \
219 	((f) & (BC_PARSE_FLAG_ELSE | BC_PARSE_FLAG_LOOP_INNER))
220 
221 /**
222  * This returns the value of the data for an operator with precedence @a p and
223  * associativity @a l (true if left associative, false otherwise). This is used
224  * to construct an array of operators, bc_parse_ops, in src/data.c.
225  * @param p  The precedence.
226  * @param l  True if the operator is left associative, false otherwise.
227  * @return   The data for the operator.
228  */
229 #define BC_PARSE_OP(p, l) (((p) & ~(BC_LEX_CHAR_MSB(1))) | (BC_LEX_CHAR_MSB(l)))
230 
231 /**
232  * Returns the operator data for the lex token @a t.
233  * @param t  The token to return operator data for.
234  * @return   The operator data for @a t.
235  */
236 #define BC_PARSE_OP_DATA(t) bc_parse_ops[((t) - BC_LEX_OP_INC)]
237 
238 /**
239  * Returns non-zero if operator @a op is left associative, zero otherwise.
240  * @param op  The operator to test for associativity.
241  * @return    Non-zero if the operator is left associative, zero otherwise.
242  */
243 #define BC_PARSE_OP_LEFT(op) (BC_PARSE_OP_DATA(op) & BC_LEX_CHAR_MSB(1))
244 
245 /**
246  * Returns the precedence of operator @a op. Lower number means higher
247  * precedence.
248  * @param op  The operator to return the precedence of.
249  * @return    The precedence of @a op.
250  */
251 #define BC_PARSE_OP_PREC(op) (BC_PARSE_OP_DATA(op) & ~(BC_LEX_CHAR_MSB(1)))
252 
253 /**
254  * A macro to easily define a series of bits for whether a lex token is an
255  * expression token or not. It takes 8 expression bits, corresponding to the 8
256  * bits in a uint8_t. You can see this in use for bc_parse_exprs in src/data.c.
257  * @param e1  The first bit.
258  * @param e2  The second bit.
259  * @param e3  The third bit.
260  * @param e4  The fourth bit.
261  * @param e5  The fifth bit.
262  * @param e6  The sixth bit.
263  * @param e7  The seventh bit.
264  * @param e8  The eighth bit.
265  * @return    An expression entry for bc_parse_exprs[].
266  */
267 #define BC_PARSE_EXPR_ENTRY(e1, e2, e3, e4, e5, e6, e7, e8)               \
268 	((UINTMAX_C(e1) << 7) | (UINTMAX_C(e2) << 6) | (UINTMAX_C(e3) << 5) | \
269 	 (UINTMAX_C(e4) << 4) | (UINTMAX_C(e5) << 3) | (UINTMAX_C(e6) << 2) | \
270 	 (UINTMAX_C(e7) << 1) | (UINTMAX_C(e8) << 0))
271 
272 /**
273  * Returns true if token @a i is a token that belongs in an expression.
274  * @param i  The token to test.
275  * @return   True if i is an expression token, false otherwise.
276  */
277 #define BC_PARSE_EXPR(i) \
278 	(bc_parse_exprs[(((i) & (uchar) ~(0x07)) >> 3)] & (1 << (7 - ((i) & 0x07))))
279 
280 /**
281  * Returns the operator (by lex token) that is at the top of the operator
282  * stack.
283  * @param p  The parser.
284  * @return   The operator that is at the top of the operator stack, as a lex
285  *           token.
286  */
287 #define BC_PARSE_TOP_OP(p) (*((BcLexType*) bc_vec_top(&(p)->ops)))
288 
289 /**
290  * Returns true if bc has a "leaf" token. A "leaf" token is one that can stand
291  * alone in an expression. For example, a number by itself can be an expression,
292  * but a binary operator, while valid for an expression, cannot be alone in the
293  * expression. It must have an expression to the left and right of itself. See
294  * the documentation for @a bc_parse_expr_err() in src/bc_parse.c.
295  * @param prev      The previous token as an instruction.
296  * @param bin_last  True if that last operator was a binary operator, false
297  *                  otherwise.
298  * @param rparen    True if the last operator was a right paren.
299  * return           True if the last token was a leaf token, false otherwise.
300  */
301 #define BC_PARSE_LEAF(prev, bin_last, rparen) \
302 	(!(bin_last) && ((rparen) || bc_parse_inst_isLeaf(prev)))
303 
304 /**
305  * This returns true if the token @a t should be treated as though it's a
306  * variable. This goes for actual variables, array elements, and globals.
307  * @param t  The token to test.
308  * @return   True if @a t should be treated as though it's a variable, false
309  *           otherwise.
310  */
311 #if BC_ENABLE_EXTRA_MATH
312 #define BC_PARSE_INST_VAR(t) \
313 	((t) >= BC_INST_VAR && (t) <= BC_INST_SEED && (t) != BC_INST_ARRAY)
314 #else // BC_ENABLE_EXTRA_MATH
315 #define BC_PARSE_INST_VAR(t) \
316 	((t) >= BC_INST_VAR && (t) <= BC_INST_SCALE && (t) != BC_INST_ARRAY)
317 #endif // BC_ENABLE_EXTRA_MATH
318 
319 /**
320  * Returns true if the previous token @a p (in the form of a bytecode
321  * instruction) is a prefix operator. The fact that it is for bytecode
322  * instructions is what makes it different from @a BC_PARSE_OP_PREFIX below.
323  * @param p  The previous token.
324  * @return   True if @a p is a prefix operator.
325  */
326 #define BC_PARSE_PREV_PREFIX(p) ((p) >= BC_INST_NEG && (p) <= BC_INST_BOOL_NOT)
327 
328 /**
329  * Returns true if token @a t is a prefix operator.
330  * @param t  The token to test.
331  * @return   True if @a t is a prefix operator, false otherwise.
332  */
333 #define BC_PARSE_OP_PREFIX(t) ((t) == BC_LEX_OP_BOOL_NOT || (t) == BC_LEX_NEG)
334 
335 /**
336  * We can calculate the conversion between tokens and bytecode instructions by
337  * subtracting the position of the first operator in the lex enum and adding the
338  * position of the first in the instruction enum. Note: This only works for
339  * binary operators.
340  * @param t  The token to turn into an instruction.
341  * @return   The token as an instruction.
342  */
343 #define BC_PARSE_TOKEN_INST(t) ((uchar) ((t) - BC_LEX_NEG + BC_INST_NEG))
344 
345 /**
346  * Returns true if the token is a bc keyword.
347  * @param t  The token to check.
348  * @return   True if @a t is a bc keyword, false otherwise.
349  */
350 #define BC_PARSE_IS_KEYWORD(t) ((t) >= BC_LEX_KW_AUTO && (t) <= BC_LEX_KW_ELSE)
351 
352 /// A struct that holds data about what tokens should be expected next. There
353 /// are a few instances of these, all named because they are used in specific
354 /// cases. Basically, in certain situations, it's useful to use the same code,
355 /// but have a list of valid tokens.
356 ///
357 /// Obviously, @a len is the number of tokens in the @a tokens array. If more
358 /// than 4 is needed in the future, @a tokens will have to be changed.
359 typedef struct BcParseNext
360 {
361 	/// The number of tokens in the tokens array.
362 	uchar len;
363 
364 	/// The tokens that can be expected next.
365 	uchar tokens[4];
366 
367 } BcParseNext;
368 
369 /// A macro to construct an array literal of tokens from a parameter list.
370 #define BC_PARSE_NEXT_TOKENS(...) .tokens = { __VA_ARGS__ }
371 
372 /// A macro to generate a BcParseNext literal from BcParseNext data. See
373 /// src/data.c for examples.
374 #define BC_PARSE_NEXT(a, ...) \
375 	{ .len = (uchar) (a), BC_PARSE_NEXT_TOKENS(__VA_ARGS__) }
376 
377 /// A status returned by @a bc_parse_expr_err(). It can either return success or
378 /// an error indicating an empty expression.
379 typedef enum BcParseStatus
380 {
381 	BC_PARSE_STATUS_SUCCESS,
382 	BC_PARSE_STATUS_EMPTY_EXPR,
383 
384 } BcParseStatus;
385 
386 /**
387  * The @a BcParseExpr function for bc. (See include/parse.h for a definition of
388  * @a BcParseExpr.)
389  * @param p      The parser.
390  * @param flags  Flags that define the requirements that the parsed code must
391  *               meet or an error will result. See @a BcParseExpr for more info.
392  */
393 void
394 bc_parse_expr(BcParse* p, uint8_t flags);
395 
396 /**
397  * The @a BcParseParse function for bc. (See include/parse.h for a definition of
398  * @a BcParseParse.)
399  * @param p  The parser.
400  */
401 void
402 bc_parse_parse(BcParse* p);
403 
404 /**
405  * Ends a series of if statements. This is to ensure that full parses happen
406  * when a file finishes or before defining a function. Without this, bc thinks
407  * that it cannot parse any further. But if we reach the end of a file or a
408  * function definition, we know we can add an empty else clause.
409  * @param p  The parser.
410  */
411 void
412 bc_parse_endif(BcParse* p);
413 
414 /// References to the signal message and its length.
415 extern const char bc_sig_msg[];
416 extern const uchar bc_sig_msg_len;
417 
418 /// A reference to an array of bits that are set if the corresponding lex token
419 /// is valid in an expression.
420 extern const uint8_t bc_parse_exprs[];
421 
422 /// A reference to an array of bc operators.
423 extern const uchar bc_parse_ops[];
424 
425 // References to the various instances of BcParseNext's.
426 
427 /// A reference to what tokens are valid as next tokens when parsing normal
428 /// expressions. More accurately. these are the tokens that are valid for
429 /// *ending* the expression.
430 extern const BcParseNext bc_parse_next_expr;
431 
432 /// A reference to what tokens are valid as next tokens when parsing function
433 /// parameters (well, actually arguments).
434 extern const BcParseNext bc_parse_next_arg;
435 
436 /// A reference to what tokens are valid as next tokens when parsing a print
437 /// statement.
438 extern const BcParseNext bc_parse_next_print;
439 
440 /// A reference to what tokens are valid as next tokens when parsing things like
441 /// loop headers and builtin functions where the only thing expected is a right
442 /// paren.
443 ///
444 /// The name is an artifact of history, and is related to @a BC_PARSE_REL (see
445 /// include/parse.h). It refers to how POSIX only allows some operators as part
446 /// of the conditional of for loops, while loops, and if statements.
447 extern const BcParseNext bc_parse_next_rel;
448 
449 // What tokens are valid as next tokens when parsing an array element
450 // expression.
451 extern const BcParseNext bc_parse_next_elem;
452 
453 /// A reference to what tokens are valid as next tokens when parsing the first
454 /// two parts of a for loop header.
455 extern const BcParseNext bc_parse_next_for;
456 
457 /// A reference to what tokens are valid as next tokens when parsing a read
458 /// expression.
459 extern const BcParseNext bc_parse_next_read;
460 
461 /// A reference to what tokens are valid as next tokens when parsing a builtin
462 /// function with multiple arguments.
463 extern const BcParseNext bc_parse_next_builtin;
464 
465 #else // BC_ENABLED
466 
467 // If bc is not enabled, execution is always possible because dc has strict
468 // rules that ensure execution can always proceed safely.
469 #define BC_PARSE_NO_EXEC(p) (0)
470 
471 #endif // BC_ENABLED
472 
473 #endif // BC_BC_H
474