xref: /freebsd/contrib/bc/include/parse.h (revision d4eeb02986980bf33dd56c41ceb9fc5f180c0d47)
1 /*
2  * *****************************************************************************
3  *
4  * SPDX-License-Identifier: BSD-2-Clause
5  *
6  * Copyright (c) 2018-2021 Gavin D. Howard and contributors.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions are met:
10  *
11  * * Redistributions of source code must retain the above copyright notice, this
12  *   list of conditions and the following disclaimer.
13  *
14  * * Redistributions in binary form must reproduce the above copyright notice,
15  *   this list of conditions and the following disclaimer in the documentation
16  *   and/or other materials provided with the distribution.
17  *
18  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
22  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
23  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
24  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
25  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
26  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
27  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
28  * POSSIBILITY OF SUCH DAMAGE.
29  *
30  * *****************************************************************************
31  *
32  * Definitions for bc's parser.
33  *
34  */
35 
36 #ifndef BC_PARSE_H
37 #define BC_PARSE_H
38 
39 #include <limits.h>
40 #include <stdbool.h>
41 #include <stdint.h>
42 
43 #include <status.h>
44 #include <vector.h>
45 #include <lex.h>
46 #include <lang.h>
47 
48 // The following are flags that can be passed to @a BcParseExpr functions. They
49 // define the requirements that the parsed expression must meet to not have an
50 // error thrown.
51 
52 /// A flag that requires that the expression is valid for conditionals in for
53 /// loops, while loops, and if statements. This is because POSIX requires that
54 /// certain operators are *only* used in those cases. It's whacked, but that's
55 /// how it is.
56 #define BC_PARSE_REL (UINTMAX_C(1) << 0)
57 
58 /// A flag that requires that the expression is valid for a print statement.
59 #define BC_PARSE_PRINT (UINTMAX_C(1) << 1)
60 
61 /// A flag that requires that the expression does *not* have any function call.
62 #define BC_PARSE_NOCALL (UINTMAX_C(1) << 2)
63 
64 /// A flag that requires that the expression does *not* have a read()
65 /// expression.
66 #define BC_PARSE_NOREAD (UINTMAX_C(1) << 3)
67 
68 /// A flag that *allows* (rather than requires) that an array appear in the
69 /// expression. This is mostly used as parameters in bc.
70 #define BC_PARSE_ARRAY (UINTMAX_C(1) << 4)
71 
72 /// A flag that requires that the expression is not empty and returns a value.
73 #define BC_PARSE_NEEDVAL (UINTMAX_C(1) << 5)
74 
75 /**
76  * Returns true if the parser has been initialized.
77  * @param p    The parser.
78  * @param prg  The program.
79  * @return     True if @a p has been initialized, false otherwise.
80  */
81 #define BC_PARSE_IS_INITED(p, prg) ((p)->prog == (prg))
82 
83 #if BC_ENABLED
84 
85 /**
86  * Returns true if the current parser state allows parsing, false otherwise.
87  * @param p  The parser.
88  * @return   True if parsing can proceed, false otherwise.
89  */
90 #define BC_PARSE_CAN_PARSE(p) \
91 	((p).l.t != BC_LEX_EOF && (p).l.t != BC_LEX_KW_DEFINE)
92 
93 #else // BC_ENABLED
94 
95 /**
96  * Returns true if the current parser state allows parsing, false otherwise.
97  * @param p  The parser.
98  * @return   True if parsing can proceed, false otherwise.
99  */
100 #define BC_PARSE_CAN_PARSE(p) ((p).l.t != BC_LEX_EOF)
101 
102 #endif // BC_ENABLED
103 
104 /**
105  * Pushes the instruction @a i onto the bytecode vector for the current
106  * function.
107  * @param p  The parser.
108  * @param i  The instruction to push onto the bytecode vector.
109  */
110 #define bc_parse_push(p, i) (bc_vec_pushByte(&(p)->func->code, (uchar) (i)))
111 
112 /**
113  * Pushes an index onto the bytecode vector. For more information, see
114  * @a bc_vec_pushIndex() in src/vector.c and @a bc_program_index() in
115  * src/program.c.
116  * @param p    The parser.
117  * @param idx  The index to push onto the bytecode vector.
118  */
119 #define bc_parse_pushIndex(p, idx) (bc_vec_pushIndex(&(p)->func->code, (idx)))
120 
121 /**
122  * A convenience macro for throwing errors in parse code. They take care of
123  * plumbing like passing in the current line the lexer is on.
124  * @param p  The parser.
125  * @param e  The error.
126  */
127 #define bc_parse_err(p, e) (bc_vm_handleError((e), (p)->l.line))
128 
129 /**
130  * A convenience macro for throwing errors in parse code. They take care of
131  * plumbing like passing in the current line the lexer is on.
132  * @param p    The parser.
133  * @param e    The error.
134  * @param ...  The varags that are needed.
135  */
136 #define bc_parse_verr(p, e, ...) \
137 	(bc_vm_handleError((e), (p)->l.line, __VA_ARGS__))
138 
139 // Forward declarations.
140 struct BcParse;
141 struct BcProgram;
142 
143 /**
144  * A function pointer to call when more parsing is needed.
145  * @param p  The parser.
146  */
147 typedef void (*BcParseParse)(struct BcParse* p);
148 
149 /**
150  * A function pointer to call when an expression needs to be parsed. This can
151  * happen for read() expressions or dc strings.
152  * @param p      The parser.
153  * @param flags  The flags for what is allowed or required. (See flags above.)
154  */
155 typedef void (*BcParseExpr)(struct BcParse* p, uint8_t flags);
156 
157 /// The parser struct.
158 typedef struct BcParse
159 {
160 	/// The lexer.
161 	BcLex l;
162 
163 #if BC_ENABLED
164 	/// The stack of flags for bc. (See comments in include/bc.h.) This stack is
165 	/// *required* to have one item at all times. Not maintaining that invariant
166 	/// will cause problems.
167 	BcVec flags;
168 
169 	/// The stack of exits. These are indices into the bytecode vector where
170 	/// blocks for loops and if statements end. Basically, these are the places
171 	/// to jump to when skipping code.
172 	BcVec exits;
173 
174 	/// The stack of conditionals. Unlike exits, which are indices to jump
175 	/// *forward* to, this is a vector of indices to jump *backward* to, usually
176 	/// to the conditional of a loop, hence the name.
177 	BcVec conds;
178 
179 	/// A stack of operators. When parsing expressions, the bc parser uses the
180 	/// Shunting-Yard algorithm, which requires a stack of operators. This can
181 	/// hold the stack for multiple expressions at once because the expressions
182 	/// stack as well. For more information, see the Expression Parsing section
183 	/// of the Development manual (manuals/development.md).
184 	BcVec ops;
185 
186 	/// A buffer to temporarily store a string in. This is because the lexer
187 	/// might generate a string as part of its work, and the parser needs that
188 	/// string, but it also needs the lexer to continue lexing, which might
189 	/// overwrite the string stored in the lexer. This buffer is for copying
190 	/// that string from the lexer to keep it safe.
191 	BcVec buf;
192 #endif // BC_ENABLED
193 
194 	/// A reference to the program to grab the current function when necessary.
195 	struct BcProgram* prog;
196 
197 	/// A reference to the current function. The function is what holds the
198 	/// bytecode vector that the parser is filling.
199 	BcFunc* func;
200 
201 	/// The index of the function.
202 	size_t fidx;
203 
204 #if BC_ENABLED
205 	/// True if the bc parser just entered a function and an auto statement
206 	/// would be valid.
207 	bool auto_part;
208 #endif // BC_ENABLED
209 
210 } BcParse;
211 
212 /**
213  * Initializes a parser.
214  * @param p     The parser to initialize.
215  * @param prog  A referenc to the program.
216  * @param func  The index of the current function.
217  */
218 void
219 bc_parse_init(BcParse* p, struct BcProgram* prog, size_t func);
220 
221 /**
222  * Frees a parser. This is not guarded by #ifndef NDEBUG because a separate
223  * parser is created at runtime to parse read() expressions and dc strings.
224  * @param p  The parser to free.
225  */
226 void
227 bc_parse_free(BcParse* p);
228 
229 /**
230  * Resets the parser. Resetting means erasing all state to the point that the
231  * parser would think it was just initialized.
232  * @param p  The parser to reset.
233  */
234 void
235 bc_parse_reset(BcParse* p);
236 
237 /**
238  * Adds a string. See @a BcProgram in include/program.h for more details.
239  * @param p  The parser that parsed the string.
240  */
241 void
242 bc_parse_addString(BcParse* p);
243 
244 /**
245  * Adds a number. See @a BcProgram in include/program.h for more details.
246  * @param p  The parser that parsed the number.
247  */
248 void
249 bc_parse_number(BcParse* p);
250 
251 /**
252  * Update the current function in the parser.
253  * @param p     The parser.
254  * @param fidx  The index of the new function.
255  */
256 void
257 bc_parse_updateFunc(BcParse* p, size_t fidx);
258 
259 /**
260  * Adds a new variable or array. See @a BcProgram in include/program.h for more
261  * details.
262  * @param p     The parser that parsed the variable or array name.
263  * @param name  The name of the variable or array to add.
264  * @param var   True if the name is for a variable, false if it's for an array.
265  */
266 void
267 bc_parse_pushName(const BcParse* p, char* name, bool var);
268 
269 /**
270  * Sets the text that the parser will parse.
271  * @param p         The parser.
272  * @param text      The text to lex.
273  * @param is_stdin  True if the text is from stdin, false otherwise.
274  * @param is_exprs  True if the text is from command-line expressions, false
275  *                  otherwise.
276  */
277 void
278 bc_parse_text(BcParse* p, const char* text, bool is_stdin, bool is_exprs);
279 
280 // References to const 0 and 1 strings for special cases. bc and dc have
281 // specific instructions for 0 and 1 because they pop up so often and (in the
282 // case of 1), increment/decrement operators.
283 extern const char bc_parse_zero[2];
284 extern const char bc_parse_one[2];
285 
286 #endif // BC_PARSE_H
287