xref: /freebsd/contrib/bc/include/bc.h (revision b4a58fbf640409a1e507d9f7b411c83a3f83a2f3)
1 /*
2  * *****************************************************************************
3  *
4  * SPDX-License-Identifier: BSD-2-Clause
5  *
6  * Copyright (c) 2018-2021 Gavin D. Howard and contributors.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions are met:
10  *
11  * * Redistributions of source code must retain the above copyright notice, this
12  *   list of conditions and the following disclaimer.
13  *
14  * * Redistributions in binary form must reproduce the above copyright notice,
15  *   this list of conditions and the following disclaimer in the documentation
16  *   and/or other materials provided with the distribution.
17  *
18  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
22  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
23  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
24  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
25  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
26  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
27  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
28  * POSSIBILITY OF SUCH DAMAGE.
29  *
30  * *****************************************************************************
31  *
32  * Definitions for bc only.
33  *
34  */
35 
36 #ifndef BC_BC_H
37 #define BC_BC_H
38 
39 #if BC_ENABLED
40 
41 #include <limits.h>
42 #include <stdbool.h>
43 
44 #include <status.h>
45 #include <lex.h>
46 #include <parse.h>
47 
48 /**
49  * The main function for bc. It just sets variables and passes its arguments
50  * through to @a bc_vm_boot().
51  */
52 void bc_main(int argc, char *argv[]);
53 
54 // These are references to the help text, the library text, and the "filename"
55 // for the library.
56 extern const char bc_help[];
57 extern const char bc_lib[];
58 extern const char* bc_lib_name;
59 
60 // These are references to the second math library and its "filename."
61 #if BC_ENABLE_EXTRA_MATH
62 extern const char bc_lib2[];
63 extern const char* bc_lib2_name;
64 #endif // BC_ENABLE_EXTRA_MATH
65 
66 /**
67  * A struct containing information about a bc keyword.
68  */
69 typedef struct BcLexKeyword {
70 
71 	/// Holds the length of the keyword along with a bit that, if set, means the
72 	/// keyword is used in POSIX bc.
73 	uchar data;
74 
75 	/// The keyword text.
76 	const char name[14];
77 } BcLexKeyword;
78 
79 /// Sets the most significant bit. Used for setting the POSIX bit in
80 /// BcLexKeyword's data field.
81 #define BC_LEX_CHAR_MSB(bit) ((bit) << (CHAR_BIT - 1))
82 
83 /// Returns non-zero if the keyword is POSIX, zero otherwise.
84 #define BC_LEX_KW_POSIX(kw) ((kw)->data & (BC_LEX_CHAR_MSB(1)))
85 
86 /// Returns the length of the keyword.
87 #define BC_LEX_KW_LEN(kw) ((size_t) ((kw)->data & ~(BC_LEX_CHAR_MSB(1))))
88 
89 /// A macro to easily build a keyword entry. See bc_lex_kws in src/data.c.
90 #define BC_LEX_KW_ENTRY(a, b, c) \
91 	{ .data = ((b) & ~(BC_LEX_CHAR_MSB(1))) | BC_LEX_CHAR_MSB(c), .name = a }
92 
93 #if BC_ENABLE_EXTRA_MATH
94 
95 /// A macro for the number of keywords bc has. This has to be updated if any are
96 /// added. This is for the redefined_kws field of the BcVm struct.
97 #define BC_LEX_NKWS (35)
98 
99 #else // BC_ENABLE_EXTRA_MATH
100 
101 /// A macro for the number of keywords bc has. This has to be updated if any are
102 /// added. This is for the redefined_kws field of the BcVm struct.
103 #define BC_LEX_NKWS (31)
104 
105 #endif // BC_ENABLE_EXTRA_MATH
106 
107 // The array of keywords and its length.
108 extern const BcLexKeyword bc_lex_kws[];
109 extern const size_t bc_lex_kws_len;
110 
111 /**
112  * The @a BcLexNext function for bc. (See include/lex.h for a definition of
113  * @a BcLexNext.)
114  * @param l  The lexer.
115  */
116 void bc_lex_token(BcLex *l);
117 
118 // The following section is for flags needed when parsing bc code. These flags
119 // are complicated, but necessary. Why you ask? Because bc's standard is awful.
120 //
121 // If you don't believe me, go read the bc Parsing section of the Development
122 // manual (manuals/development.md). Then come back.
123 //
124 // In other words, these flags are the sign declaring, "Here be dragons."
125 
126 /**
127  * This returns a pointer to the set of flags at the top of the flag stack.
128  * @a p is expected to be a BcParse pointer.
129  * @param p  The parser.
130  * @return   A pointer to the top flag set.
131  */
132 #define BC_PARSE_TOP_FLAG_PTR(p) ((uint16_t*) bc_vec_top(&(p)->flags))
133 
134 /**
135  * This returns the flag set at the top of the flag stack. @a p is expected to
136  * be a BcParse pointer.
137  * @param p  The parser.
138  * @return   The top flag set.
139  */
140 #define BC_PARSE_TOP_FLAG(p) (*(BC_PARSE_TOP_FLAG_PTR(p)))
141 
142 // After this point, all flag #defines are in sets of 2: one to define the flag,
143 // and one to define a way to grab the flag from the flag set at the top of the
144 // flag stack. All `p` arguments are pointers to a BcParse.
145 
146 // This flag is set if the parser has seen a left brace.
147 #define BC_PARSE_FLAG_BRACE (UINTMAX_C(1)<<0)
148 #define BC_PARSE_BRACE(p) (BC_PARSE_TOP_FLAG(p) & BC_PARSE_FLAG_BRACE)
149 
150 // This flag is set if the parser is parsing inside of the braces of a function
151 // body.
152 #define BC_PARSE_FLAG_FUNC_INNER (UINTMAX_C(1)<<1)
153 #define BC_PARSE_FUNC_INNER(p) (BC_PARSE_TOP_FLAG(p) & BC_PARSE_FLAG_FUNC_INNER)
154 
155 // This flag is set if the parser is parsing a function. It is different from
156 // the one above because it is set if it is parsing a function body *or* header,
157 // not just if it's parsing a function body.
158 #define BC_PARSE_FLAG_FUNC (UINTMAX_C(1)<<2)
159 #define BC_PARSE_FUNC(p) (BC_PARSE_TOP_FLAG(p) & BC_PARSE_FLAG_FUNC)
160 
161 // This flag is set if the parser is expecting to parse a body, whether of a
162 // function, an if statement, or a loop.
163 #define BC_PARSE_FLAG_BODY (UINTMAX_C(1)<<3)
164 #define BC_PARSE_BODY(p) (BC_PARSE_TOP_FLAG(p) & BC_PARSE_FLAG_BODY)
165 
166 // This flag is set if bc is parsing a loop. This is important because the break
167 // and continue keywords are only valid inside of a loop.
168 #define BC_PARSE_FLAG_LOOP (UINTMAX_C(1)<<4)
169 #define BC_PARSE_LOOP(p) (BC_PARSE_TOP_FLAG(p) & BC_PARSE_FLAG_LOOP)
170 
171 // This flag is set if bc is parsing the body of a loop. It is different from
172 // the one above the same way @a BC_PARSE_FLAG_FUNC_INNER is different from
173 // @a BC_PARSE_FLAG_FUNC.
174 #define BC_PARSE_FLAG_LOOP_INNER (UINTMAX_C(1)<<5)
175 #define BC_PARSE_LOOP_INNER(p) (BC_PARSE_TOP_FLAG(p) & BC_PARSE_FLAG_LOOP_INNER)
176 
177 // This flag is set if bc is parsing an if statement.
178 #define BC_PARSE_FLAG_IF (UINTMAX_C(1)<<6)
179 #define BC_PARSE_IF(p) (BC_PARSE_TOP_FLAG(p) & BC_PARSE_FLAG_IF)
180 
181 // This flag is set if bc is parsing an else statement. This is important
182 // because of "else if" constructions, among other things.
183 #define BC_PARSE_FLAG_ELSE (UINTMAX_C(1)<<7)
184 #define BC_PARSE_ELSE(p) (BC_PARSE_TOP_FLAG(p) & BC_PARSE_FLAG_ELSE)
185 
186 // This flag is set if bc just finished parsing an if statement and its body.
187 // It tells the parser that it can probably expect an else statement next. This
188 // flag is, thus, one of the most subtle.
189 #define BC_PARSE_FLAG_IF_END (UINTMAX_C(1)<<8)
190 #define BC_PARSE_IF_END(p) (BC_PARSE_TOP_FLAG(p) & BC_PARSE_FLAG_IF_END)
191 
192 /**
193  * This returns true if bc is in a state where it should not execute any code
194  * at all.
195  * @param p  The parser.
196  * @return   True if execution cannot proceed, false otherwise.
197  */
198 #define BC_PARSE_NO_EXEC(p) ((p)->flags.len != 1 || BC_PARSE_TOP_FLAG(p) != 0)
199 
200 /**
201  * This returns true if the token @a t is a statement delimiter, which is
202  * either a newline or a semicolon.
203  * @param t  The token to check.
204  * @return   True if t is a statement delimiter token; false otherwise.
205  */
206 #define BC_PARSE_DELIMITER(t) \
207 	((t) == BC_LEX_SCOLON || (t) == BC_LEX_NLINE || (t) == BC_LEX_EOF)
208 
209 /**
210  * This is poorly named, but it basically returns whether or not the current
211  * state is valid for the end of an else statement.
212  * @param f  The flag set to be checked.
213  * @return   True if the state is valid for the end of an else statement.
214  */
215 #define BC_PARSE_BLOCK_STMT(f) \
216 	((f) & (BC_PARSE_FLAG_ELSE | BC_PARSE_FLAG_LOOP_INNER))
217 
218 /**
219  * This returns the value of the data for an operator with precedence @a p and
220  * associativity @a l (true if left associative, false otherwise). This is used
221  * to construct an array of operators, bc_parse_ops, in src/data.c.
222  * @param p  The precedence.
223  * @param l  True if the operator is left associative, false otherwise.
224  * @return   The data for the operator.
225  */
226 #define BC_PARSE_OP(p, l) (((p) & ~(BC_LEX_CHAR_MSB(1))) | (BC_LEX_CHAR_MSB(l)))
227 
228 /**
229  * Returns the operator data for the lex token @a t.
230  * @param t  The token to return operator data for.
231  * @return   The operator data for @a t.
232  */
233 #define BC_PARSE_OP_DATA(t) bc_parse_ops[((t) - BC_LEX_OP_INC)]
234 
235 /**
236  * Returns non-zero if operator @a op is left associative, zero otherwise.
237  * @param op  The operator to test for associativity.
238  * @return    Non-zero if the operator is left associative, zero otherwise.
239  */
240 #define BC_PARSE_OP_LEFT(op) (BC_PARSE_OP_DATA(op) & BC_LEX_CHAR_MSB(1))
241 
242 /**
243  * Returns the precedence of operator @a op. Lower number means higher
244  * precedence.
245  * @param op  The operator to return the precedence of.
246  * @return    The precedence of @a op.
247  */
248 #define BC_PARSE_OP_PREC(op) (BC_PARSE_OP_DATA(op) & ~(BC_LEX_CHAR_MSB(1)))
249 
250 /**
251  * A macro to easily define a series of bits for whether a lex token is an
252  * expression token or not. It takes 8 expression bits, corresponding to the 8
253  * bits in a uint8_t. You can see this in use for bc_parse_exprs in src/data.c.
254  * @param e1  The first bit.
255  * @param e2  The second bit.
256  * @param e3  The third bit.
257  * @param e4  The fourth bit.
258  * @param e5  The fifth bit.
259  * @param e6  The sixth bit.
260  * @param e7  The seventh bit.
261  * @param e8  The eighth bit.
262  * @return    An expression entry for bc_parse_exprs[].
263  */
264 #define BC_PARSE_EXPR_ENTRY(e1, e2, e3, e4, e5, e6, e7, e8)  \
265 	((UINTMAX_C(e1) << 7) | (UINTMAX_C(e2) << 6) | (UINTMAX_C(e3) << 5) | \
266 	 (UINTMAX_C(e4) << 4) | (UINTMAX_C(e5) << 3) | (UINTMAX_C(e6) << 2) | \
267 	 (UINTMAX_C(e7) << 1) | (UINTMAX_C(e8) << 0))
268 
269 /**
270  * Returns true if token @a i is a token that belongs in an expression.
271  * @param i  The token to test.
272  * @return   True if i is an expression token, false otherwise.
273  */
274 #define BC_PARSE_EXPR(i) \
275 	(bc_parse_exprs[(((i) & (uchar) ~(0x07)) >> 3)] & (1 << (7 - ((i) & 0x07))))
276 
277 /**
278  * Returns the operator (by lex token) that is at the top of the operator
279  * stack.
280  * @param p  The parser.
281  * @return   The operator that is at the top of the operator stack, as a lex
282  *           token.
283  */
284 #define BC_PARSE_TOP_OP(p) (*((BcLexType*) bc_vec_top(&(p)->ops)))
285 
286 /**
287  * Returns true if bc has a "leaf" token. A "leaf" token is one that can stand
288  * alone in an expression. For example, a number by itself can be an expression,
289  * but a binary operator, while valid for an expression, cannot be alone in the
290  * expression. It must have an expression to the left and right of itself. See
291  * the documentation for @a bc_parse_expr_err() in src/bc_parse.c.
292  * @param prev      The previous token as an instruction.
293  * @param bin_last  True if that last operator was a binary operator, false
294  *                  otherwise.
295  * @param rparen    True if the last operator was a right paren.
296  * return           True if the last token was a leaf token, false otherwise.
297  */
298 #define BC_PARSE_LEAF(prev, bin_last, rparen) \
299 	(!(bin_last) && ((rparen) || bc_parse_inst_isLeaf(prev)))
300 
301 /**
302  * This returns true if the token @a t should be treated as though it's a
303  * variable. This goes for actual variables, array elements, and globals.
304  * @param t  The token to test.
305  * @return   True if @a t should be treated as though it's a variable, false
306  *           otherwise.
307  */
308 #if BC_ENABLE_EXTRA_MATH
309 #define BC_PARSE_INST_VAR(t) \
310 	((t) >= BC_INST_VAR && (t) <= BC_INST_SEED && (t) != BC_INST_ARRAY)
311 #else // BC_ENABLE_EXTRA_MATH
312 #define BC_PARSE_INST_VAR(t) \
313 	((t) >= BC_INST_VAR && (t) <= BC_INST_SCALE && (t) != BC_INST_ARRAY)
314 #endif // BC_ENABLE_EXTRA_MATH
315 
316 /**
317  * Returns true if the previous token @a p (in the form of a bytecode
318  * instruction) is a prefix operator. The fact that it is for bytecode
319  * instructions is what makes it different from @a BC_PARSE_OP_PREFIX below.
320  * @param p  The previous token.
321  * @return   True if @a p is a prefix operator.
322  */
323 #define BC_PARSE_PREV_PREFIX(p) ((p) >= BC_INST_NEG && (p) <= BC_INST_BOOL_NOT)
324 
325 /**
326  * Returns true if token @a t is a prefix operator.
327  * @param t  The token to test.
328  * @return   True if @a t is a prefix operator, false otherwise.
329  */
330 #define BC_PARSE_OP_PREFIX(t) ((t) == BC_LEX_OP_BOOL_NOT || (t) == BC_LEX_NEG)
331 
332 /**
333  * We can calculate the conversion between tokens and bytecode instructions by
334  * subtracting the position of the first operator in the lex enum and adding the
335  * position of the first in the instruction enum. Note: This only works for
336  * binary operators.
337  * @param t  The token to turn into an instruction.
338  * @return   The token as an instruction.
339  */
340 #define BC_PARSE_TOKEN_INST(t) ((uchar) ((t) - BC_LEX_NEG + BC_INST_NEG))
341 
342 /**
343  * Returns true if the token is a bc keyword.
344  * @param t  The token to check.
345  * @return   True if @a t is a bc keyword, false otherwise.
346  */
347 #define BC_PARSE_IS_KEYWORD(t) ((t) >= BC_LEX_KW_AUTO && (t) <= BC_LEX_KW_ELSE)
348 
349 /// A struct that holds data about what tokens should be expected next. There
350 /// are a few instances of these, all named because they are used in specific
351 /// cases. Basically, in certain situations, it's useful to use the same code,
352 /// but have a list of valid tokens.
353 ///
354 /// Obviously, @a len is the number of tokens in the @a tokens array. If more
355 /// than 4 is needed in the future, @a tokens will have to be changed.
356 typedef struct BcParseNext {
357 
358 	/// The number of tokens in the tokens array.
359 	uchar len;
360 
361 	/// The tokens that can be expected next.
362 	uchar tokens[4];
363 
364 } BcParseNext;
365 
366 /// A macro to construct an array literal of tokens from a parameter list.
367 #define BC_PARSE_NEXT_TOKENS(...) .tokens = { __VA_ARGS__ }
368 
369 /// A macro to generate a BcParseNext literal from BcParseNext data. See
370 /// src/data.c for examples.
371 #define BC_PARSE_NEXT(a, ...) \
372 	{ .len = (uchar) (a), BC_PARSE_NEXT_TOKENS(__VA_ARGS__) }
373 
374 /// A status returned by @a bc_parse_expr_err(). It can either return success or
375 /// an error indicating an empty expression.
376 typedef enum BcParseStatus {
377 
378 	BC_PARSE_STATUS_SUCCESS,
379 	BC_PARSE_STATUS_EMPTY_EXPR,
380 
381 } BcParseStatus;
382 
383 /**
384  * The @a BcParseExpr function for bc. (See include/parse.h for a definition of
385  * @a BcParseExpr.)
386  * @param p      The parser.
387  * @param flags  Flags that define the requirements that the parsed code must
388  *               meet or an error will result. See @a BcParseExpr for more info.
389  */
390 void bc_parse_expr(BcParse *p, uint8_t flags);
391 
392 /**
393  * The @a BcParseParse function for bc. (See include/parse.h for a definition of
394  * @a BcParseParse.)
395  * @param p  The parser.
396  */
397 void bc_parse_parse(BcParse *p);
398 
399 /**
400  * Ends a series of if statements. This is to ensure that full parses happen
401  * when a file finishes or before defining a function. Without this, bc thinks
402  * that it cannot parse any further. But if we reach the end of a file or a
403  * function definition, we know we can add an empty else clause.
404  * @param p  The parser.
405  */
406 void bc_parse_endif(BcParse *p);
407 
408 /// References to the signal message and its length.
409 extern const char bc_sig_msg[];
410 extern const uchar bc_sig_msg_len;
411 
412 /// A reference to an array of bits that are set if the corresponding lex token
413 /// is valid in an expression.
414 extern const uint8_t bc_parse_exprs[];
415 
416 /// A reference to an array of bc operators.
417 extern const uchar bc_parse_ops[];
418 
419 // References to the various instances of BcParseNext's.
420 
421 /// A reference to what tokens are valid as next tokens when parsing normal
422 /// expressions. More accurately. these are the tokens that are valid for
423 /// *ending* the expression.
424 extern const BcParseNext bc_parse_next_expr;
425 
426 /// A reference to what tokens are valid as next tokens when parsing function
427 /// parameters (well, actually arguments).
428 extern const BcParseNext bc_parse_next_arg;
429 
430 /// A reference to what tokens are valid as next tokens when parsing a print
431 /// statement.
432 extern const BcParseNext bc_parse_next_print;
433 
434 /// A reference to what tokens are valid as next tokens when parsing things like
435 /// loop headers and builtin functions where the only thing expected is a right
436 /// paren.
437 ///
438 /// The name is an artifact of history, and is related to @a BC_PARSE_REL (see
439 /// include/parse.h). It refers to how POSIX only allows some operators as part
440 /// of the conditional of for loops, while loops, and if statements.
441 extern const BcParseNext bc_parse_next_rel;
442 
443 // What tokens are valid as next tokens when parsing an array element
444 // expression.
445 extern const BcParseNext bc_parse_next_elem;
446 
447 /// A reference to what tokens are valid as next tokens when parsing the first
448 /// two parts of a for loop header.
449 extern const BcParseNext bc_parse_next_for;
450 
451 /// A reference to what tokens are valid as next tokens when parsing a read
452 /// expression.
453 extern const BcParseNext bc_parse_next_read;
454 
455 /// A reference to what tokens are valid as next tokens when parsing a builtin
456 /// function with multiple arguments.
457 extern const BcParseNext bc_parse_next_builtin;
458 
459 #else // BC_ENABLED
460 
461 // If bc is not enabled, execution is always possible because dc has strict
462 // rules that ensure execution can always proceed safely.
463 #define BC_PARSE_NO_EXEC(p) (0)
464 
465 #endif // BC_ENABLED
466 
467 #endif // BC_BC_H
468