1 /* 2 * ***************************************************************************** 3 * 4 * SPDX-License-Identifier: BSD-2-Clause 5 * 6 * Copyright (c) 2018-2024 Gavin D. Howard and contributors. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions are met: 10 * 11 * * Redistributions of source code must retain the above copyright notice, this 12 * list of conditions and the following disclaimer. 13 * 14 * * Redistributions in binary form must reproduce the above copyright notice, 15 * this list of conditions and the following disclaimer in the documentation 16 * and/or other materials provided with the distribution. 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 19 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 21 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE 22 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 23 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 24 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 25 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 26 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 27 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 28 * POSSIBILITY OF SUCH DAMAGE. 29 * 30 * ***************************************************************************** 31 * 32 * Definitions for bc only. 33 * 34 */ 35 36 #ifndef BC_BC_H 37 #define BC_BC_H 38 39 #if BC_ENABLED 40 41 #include <limits.h> 42 #include <stdbool.h> 43 44 #include <status.h> 45 #include <lex.h> 46 #include <parse.h> 47 48 /** 49 * The main function for bc. It just sets variables and passes its arguments 50 * through to @a bc_vm_boot(). 51 * @return A status. 52 */ 53 BcStatus 54 bc_main(int argc, const char* argv[]); 55 56 // These are references to the help text, the library text, and the "filename" 57 // for the library. 58 extern const char bc_help[]; 59 extern const char bc_lib[]; 60 extern const char* bc_lib_name; 61 62 // These are references to the second math library and its "filename." 63 #if BC_ENABLE_EXTRA_MATH 64 extern const char bc_lib2[]; 65 extern const char* bc_lib2_name; 66 #endif // BC_ENABLE_EXTRA_MATH 67 68 /** 69 * A struct containing information about a bc keyword. 70 */ 71 typedef struct BcLexKeyword 72 { 73 /// Holds the length of the keyword along with a bit that, if set, means the 74 /// keyword is used in POSIX bc. 75 uchar data; 76 77 /// The keyword text. 78 const char name[14]; 79 } BcLexKeyword; 80 81 /// Sets the most significant bit. Used for setting the POSIX bit in 82 /// BcLexKeyword's data field. 83 #define BC_LEX_CHAR_MSB(bit) ((bit) << (CHAR_BIT - 1)) 84 85 /// Returns non-zero if the keyword is POSIX, zero otherwise. 86 #define BC_LEX_KW_POSIX(kw) ((kw)->data & (BC_LEX_CHAR_MSB(1))) 87 88 /// Returns the length of the keyword. 89 #define BC_LEX_KW_LEN(kw) ((size_t) ((kw)->data & ~(BC_LEX_CHAR_MSB(1)))) 90 91 /// A macro to easily build a keyword entry. See bc_lex_kws in src/data.c. 92 #define BC_LEX_KW_ENTRY(a, b, c) \ 93 { .data = ((b) & ~(BC_LEX_CHAR_MSB(1))) | BC_LEX_CHAR_MSB(c), .name = a } 94 95 #if BC_ENABLE_EXTRA_MATH 96 97 /// A macro for the number of keywords bc has. This has to be updated if any are 98 /// added. This is for the redefined_kws field of the BcVm struct. 99 #define BC_LEX_NKWS (37) 100 101 #else // BC_ENABLE_EXTRA_MATH 102 103 /// A macro for the number of keywords bc has. This has to be updated if any are 104 /// added. This is for the redefined_kws field of the BcVm struct. 105 #define BC_LEX_NKWS (33) 106 107 #endif // BC_ENABLE_EXTRA_MATH 108 109 // The array of keywords and its length. 110 extern const BcLexKeyword bc_lex_kws[]; 111 extern const size_t bc_lex_kws_len; 112 113 /** 114 * The @a BcLexNext function for bc. (See include/lex.h for a definition of 115 * @a BcLexNext.) 116 * @param l The lexer. 117 */ 118 void 119 bc_lex_token(BcLex* l); 120 121 // The following section is for flags needed when parsing bc code. These flags 122 // are complicated, but necessary. Why you ask? Because bc's standard is awful. 123 // 124 // If you don't believe me, go read the bc Parsing section of the Development 125 // manual (manuals/development.md). Then come back. 126 // 127 // In other words, these flags are the sign declaring, "Here be dragons." 128 129 /** 130 * This returns a pointer to the set of flags at the top of the flag stack. 131 * @a p is expected to be a BcParse pointer. 132 * @param p The parser. 133 * @return A pointer to the top flag set. 134 */ 135 #define BC_PARSE_TOP_FLAG_PTR(p) ((uint16_t*) bc_vec_top(&(p)->flags)) 136 137 /** 138 * This returns the flag set at the top of the flag stack. @a p is expected to 139 * be a BcParse pointer. 140 * @param p The parser. 141 * @return The top flag set. 142 */ 143 #define BC_PARSE_TOP_FLAG(p) (*(BC_PARSE_TOP_FLAG_PTR(p))) 144 145 // After this point, all flag #defines are in sets of 2: one to define the flag, 146 // and one to define a way to grab the flag from the flag set at the top of the 147 // flag stack. All `p` arguments are pointers to a BcParse. 148 149 // This flag is set if the parser has seen a left brace. 150 #define BC_PARSE_FLAG_BRACE (UINTMAX_C(1) << 0) 151 #define BC_PARSE_BRACE(p) (BC_PARSE_TOP_FLAG(p) & BC_PARSE_FLAG_BRACE) 152 153 // This flag is set if the parser is parsing inside of the braces of a function 154 // body. 155 #define BC_PARSE_FLAG_FUNC_INNER (UINTMAX_C(1) << 1) 156 #define BC_PARSE_FUNC_INNER(p) (BC_PARSE_TOP_FLAG(p) & BC_PARSE_FLAG_FUNC_INNER) 157 158 // This flag is set if the parser is parsing a function. It is different from 159 // the one above because it is set if it is parsing a function body *or* header, 160 // not just if it's parsing a function body. 161 #define BC_PARSE_FLAG_FUNC (UINTMAX_C(1) << 2) 162 #define BC_PARSE_FUNC(p) (BC_PARSE_TOP_FLAG(p) & BC_PARSE_FLAG_FUNC) 163 164 // This flag is set if the parser is expecting to parse a body, whether of a 165 // function, an if statement, or a loop. 166 #define BC_PARSE_FLAG_BODY (UINTMAX_C(1) << 3) 167 #define BC_PARSE_BODY(p) (BC_PARSE_TOP_FLAG(p) & BC_PARSE_FLAG_BODY) 168 169 // This flag is set if bc is parsing a loop. This is important because the break 170 // and continue keywords are only valid inside of a loop. 171 #define BC_PARSE_FLAG_LOOP (UINTMAX_C(1) << 4) 172 #define BC_PARSE_LOOP(p) (BC_PARSE_TOP_FLAG(p) & BC_PARSE_FLAG_LOOP) 173 174 // This flag is set if bc is parsing the body of a loop. It is different from 175 // the one above the same way @a BC_PARSE_FLAG_FUNC_INNER is different from 176 // @a BC_PARSE_FLAG_FUNC. 177 #define BC_PARSE_FLAG_LOOP_INNER (UINTMAX_C(1) << 5) 178 #define BC_PARSE_LOOP_INNER(p) (BC_PARSE_TOP_FLAG(p) & BC_PARSE_FLAG_LOOP_INNER) 179 180 // This flag is set if bc is parsing an if statement. 181 #define BC_PARSE_FLAG_IF (UINTMAX_C(1) << 6) 182 #define BC_PARSE_IF(p) (BC_PARSE_TOP_FLAG(p) & BC_PARSE_FLAG_IF) 183 184 // This flag is set if bc is parsing an else statement. This is important 185 // because of "else if" constructions, among other things. 186 #define BC_PARSE_FLAG_ELSE (UINTMAX_C(1) << 7) 187 #define BC_PARSE_ELSE(p) (BC_PARSE_TOP_FLAG(p) & BC_PARSE_FLAG_ELSE) 188 189 // This flag is set if bc just finished parsing an if statement and its body. 190 // It tells the parser that it can probably expect an else statement next. This 191 // flag is, thus, one of the most subtle. 192 #define BC_PARSE_FLAG_IF_END (UINTMAX_C(1) << 8) 193 #define BC_PARSE_IF_END(p) (BC_PARSE_TOP_FLAG(p) & BC_PARSE_FLAG_IF_END) 194 195 /** 196 * This returns true if bc is in a state where it should not execute any code 197 * at all. 198 * @param p The parser. 199 * @return True if execution cannot proceed, false otherwise. 200 */ 201 #define BC_PARSE_NO_EXEC(p) ((p)->flags.len != 1 || BC_PARSE_TOP_FLAG(p) != 0) 202 203 /** 204 * This returns true if the token @a t is a statement delimiter, which is 205 * either a newline or a semicolon. 206 * @param t The token to check. 207 * @return True if t is a statement delimiter token; false otherwise. 208 */ 209 #define BC_PARSE_DELIMITER(t) \ 210 ((t) == BC_LEX_SCOLON || (t) == BC_LEX_NLINE || (t) == BC_LEX_EOF) 211 212 /** 213 * This is poorly named, but it basically returns whether or not the current 214 * state is valid for the end of an else statement. 215 * @param f The flag set to be checked. 216 * @return True if the state is valid for the end of an else statement. 217 */ 218 #define BC_PARSE_BLOCK_STMT(f) \ 219 ((f) & (BC_PARSE_FLAG_ELSE | BC_PARSE_FLAG_LOOP_INNER)) 220 221 /** 222 * This returns the value of the data for an operator with precedence @a p and 223 * associativity @a l (true if left associative, false otherwise). This is used 224 * to construct an array of operators, bc_parse_ops, in src/data.c. 225 * @param p The precedence. 226 * @param l True if the operator is left associative, false otherwise. 227 * @return The data for the operator. 228 */ 229 #define BC_PARSE_OP(p, l) (((p) & ~(BC_LEX_CHAR_MSB(1))) | (BC_LEX_CHAR_MSB(l))) 230 231 /** 232 * Returns the operator data for the lex token @a t. 233 * @param t The token to return operator data for. 234 * @return The operator data for @a t. 235 */ 236 #define BC_PARSE_OP_DATA(t) bc_parse_ops[((t) - BC_LEX_OP_INC)] 237 238 /** 239 * Returns non-zero if operator @a op is left associative, zero otherwise. 240 * @param op The operator to test for associativity. 241 * @return Non-zero if the operator is left associative, zero otherwise. 242 */ 243 #define BC_PARSE_OP_LEFT(op) (BC_PARSE_OP_DATA(op) & BC_LEX_CHAR_MSB(1)) 244 245 /** 246 * Returns the precedence of operator @a op. Lower number means higher 247 * precedence. 248 * @param op The operator to return the precedence of. 249 * @return The precedence of @a op. 250 */ 251 #define BC_PARSE_OP_PREC(op) (BC_PARSE_OP_DATA(op) & ~(BC_LEX_CHAR_MSB(1))) 252 253 /** 254 * A macro to easily define a series of bits for whether a lex token is an 255 * expression token or not. It takes 8 expression bits, corresponding to the 8 256 * bits in a uint8_t. You can see this in use for bc_parse_exprs in src/data.c. 257 * @param e1 The first bit. 258 * @param e2 The second bit. 259 * @param e3 The third bit. 260 * @param e4 The fourth bit. 261 * @param e5 The fifth bit. 262 * @param e6 The sixth bit. 263 * @param e7 The seventh bit. 264 * @param e8 The eighth bit. 265 * @return An expression entry for bc_parse_exprs[]. 266 */ 267 #define BC_PARSE_EXPR_ENTRY(e1, e2, e3, e4, e5, e6, e7, e8) \ 268 ((UINTMAX_C(e1) << 7) | (UINTMAX_C(e2) << 6) | (UINTMAX_C(e3) << 5) | \ 269 (UINTMAX_C(e4) << 4) | (UINTMAX_C(e5) << 3) | (UINTMAX_C(e6) << 2) | \ 270 (UINTMAX_C(e7) << 1) | (UINTMAX_C(e8) << 0)) 271 272 /** 273 * Returns true if token @a i is a token that belongs in an expression. 274 * @param i The token to test. 275 * @return True if i is an expression token, false otherwise. 276 */ 277 #define BC_PARSE_EXPR(i) \ 278 (bc_parse_exprs[(((i) & (uchar) ~(0x07)) >> 3)] & (1 << (7 - ((i) & 0x07)))) 279 280 /** 281 * Returns the operator (by lex token) that is at the top of the operator 282 * stack. 283 * @param p The parser. 284 * @return The operator that is at the top of the operator stack, as a lex 285 * token. 286 */ 287 #define BC_PARSE_TOP_OP(p) (*((BcLexType*) bc_vec_top(&(p)->ops))) 288 289 /** 290 * Returns true if bc has a "leaf" token. A "leaf" token is one that can stand 291 * alone in an expression. For example, a number by itself can be an expression, 292 * but a binary operator, while valid for an expression, cannot be alone in the 293 * expression. It must have an expression to the left and right of itself. See 294 * the documentation for @a bc_parse_expr_err() in src/bc_parse.c. 295 * @param prev The previous token as an instruction. 296 * @param bin_last True if that last operator was a binary operator, false 297 * otherwise. 298 * @param rparen True if the last operator was a right paren. 299 * return True if the last token was a leaf token, false otherwise. 300 */ 301 #define BC_PARSE_LEAF(prev, bin_last, rparen) \ 302 (!(bin_last) && ((rparen) || bc_parse_inst_isLeaf(prev))) 303 304 /** 305 * This returns true if the token @a t should be treated as though it's a 306 * variable. This goes for actual variables, array elements, and globals. 307 * @param t The token to test. 308 * @return True if @a t should be treated as though it's a variable, false 309 * otherwise. 310 */ 311 #if BC_ENABLE_EXTRA_MATH 312 #define BC_PARSE_INST_VAR(t) \ 313 ((t) >= BC_INST_VAR && (t) <= BC_INST_SEED && (t) != BC_INST_ARRAY) 314 #else // BC_ENABLE_EXTRA_MATH 315 #define BC_PARSE_INST_VAR(t) \ 316 ((t) >= BC_INST_VAR && (t) <= BC_INST_SCALE && (t) != BC_INST_ARRAY) 317 #endif // BC_ENABLE_EXTRA_MATH 318 319 /** 320 * Returns true if the previous token @a p (in the form of a bytecode 321 * instruction) is a prefix operator. The fact that it is for bytecode 322 * instructions is what makes it different from @a BC_PARSE_OP_PREFIX below. 323 * @param p The previous token. 324 * @return True if @a p is a prefix operator. 325 */ 326 #define BC_PARSE_PREV_PREFIX(p) ((p) >= BC_INST_NEG && (p) <= BC_INST_BOOL_NOT) 327 328 /** 329 * Returns true if token @a t is a prefix operator. 330 * @param t The token to test. 331 * @return True if @a t is a prefix operator, false otherwise. 332 */ 333 #define BC_PARSE_OP_PREFIX(t) ((t) == BC_LEX_OP_BOOL_NOT || (t) == BC_LEX_NEG) 334 335 /** 336 * We can calculate the conversion between tokens and bytecode instructions by 337 * subtracting the position of the first operator in the lex enum and adding the 338 * position of the first in the instruction enum. Note: This only works for 339 * binary operators. 340 * @param t The token to turn into an instruction. 341 * @return The token as an instruction. 342 */ 343 #define BC_PARSE_TOKEN_INST(t) ((uchar) ((t) - BC_LEX_NEG + BC_INST_NEG)) 344 345 /** 346 * Returns true if the token is a bc keyword. 347 * @param t The token to check. 348 * @return True if @a t is a bc keyword, false otherwise. 349 */ 350 #define BC_PARSE_IS_KEYWORD(t) ((t) >= BC_LEX_KW_AUTO && (t) <= BC_LEX_KW_ELSE) 351 352 /// A struct that holds data about what tokens should be expected next. There 353 /// are a few instances of these, all named because they are used in specific 354 /// cases. Basically, in certain situations, it's useful to use the same code, 355 /// but have a list of valid tokens. 356 /// 357 /// Obviously, @a len is the number of tokens in the @a tokens array. If more 358 /// than 4 is needed in the future, @a tokens will have to be changed. 359 typedef struct BcParseNext 360 { 361 /// The number of tokens in the tokens array. 362 uchar len; 363 364 /// The tokens that can be expected next. 365 uchar tokens[4]; 366 367 } BcParseNext; 368 369 /// A macro to construct an array literal of tokens from a parameter list. 370 #define BC_PARSE_NEXT_TOKENS(...) .tokens = { __VA_ARGS__ } 371 372 /// A macro to generate a BcParseNext literal from BcParseNext data. See 373 /// src/data.c for examples. 374 #define BC_PARSE_NEXT(a, ...) \ 375 { .len = (uchar) (a), BC_PARSE_NEXT_TOKENS(__VA_ARGS__) } 376 377 /// A status returned by @a bc_parse_expr_err(). It can either return success or 378 /// an error indicating an empty expression. 379 typedef enum BcParseStatus 380 { 381 BC_PARSE_STATUS_SUCCESS, 382 BC_PARSE_STATUS_EMPTY_EXPR, 383 384 } BcParseStatus; 385 386 /** 387 * The @a BcParseExpr function for bc. (See include/parse.h for a definition of 388 * @a BcParseExpr.) 389 * @param p The parser. 390 * @param flags Flags that define the requirements that the parsed code must 391 * meet or an error will result. See @a BcParseExpr for more info. 392 */ 393 void 394 bc_parse_expr(BcParse* p, uint8_t flags); 395 396 /** 397 * The @a BcParseParse function for bc. (See include/parse.h for a definition of 398 * @a BcParseParse.) 399 * @param p The parser. 400 */ 401 void 402 bc_parse_parse(BcParse* p); 403 404 /** 405 * Ends a series of if statements. This is to ensure that full parses happen 406 * when a file finishes or before defining a function. Without this, bc thinks 407 * that it cannot parse any further. But if we reach the end of a file or a 408 * function definition, we know we can add an empty else clause. 409 * @param p The parser. 410 */ 411 void 412 bc_parse_endif(BcParse* p); 413 414 /// References to the signal message and its length. 415 extern const char bc_sig_msg[]; 416 extern const uchar bc_sig_msg_len; 417 418 /// A reference to an array of bits that are set if the corresponding lex token 419 /// is valid in an expression. 420 extern const uint8_t bc_parse_exprs[]; 421 422 /// A reference to an array of bc operators. 423 extern const uchar bc_parse_ops[]; 424 425 // References to the various instances of BcParseNext's. 426 427 /// A reference to what tokens are valid as next tokens when parsing normal 428 /// expressions. More accurately. these are the tokens that are valid for 429 /// *ending* the expression. 430 extern const BcParseNext bc_parse_next_expr; 431 432 /// A reference to what tokens are valid as next tokens when parsing function 433 /// parameters (well, actually arguments). 434 extern const BcParseNext bc_parse_next_arg; 435 436 /// A reference to what tokens are valid as next tokens when parsing a print 437 /// statement. 438 extern const BcParseNext bc_parse_next_print; 439 440 /// A reference to what tokens are valid as next tokens when parsing things like 441 /// loop headers and builtin functions where the only thing expected is a right 442 /// paren. 443 /// 444 /// The name is an artifact of history, and is related to @a BC_PARSE_REL (see 445 /// include/parse.h). It refers to how POSIX only allows some operators as part 446 /// of the conditional of for loops, while loops, and if statements. 447 extern const BcParseNext bc_parse_next_rel; 448 449 // What tokens are valid as next tokens when parsing an array element 450 // expression. 451 extern const BcParseNext bc_parse_next_elem; 452 453 /// A reference to what tokens are valid as next tokens when parsing the first 454 /// two parts of a for loop header. 455 extern const BcParseNext bc_parse_next_for; 456 457 /// A reference to what tokens are valid as next tokens when parsing a read 458 /// expression. 459 extern const BcParseNext bc_parse_next_read; 460 461 /// A reference to what tokens are valid as next tokens when parsing a builtin 462 /// function with multiple arguments. 463 extern const BcParseNext bc_parse_next_builtin; 464 465 #else // BC_ENABLED 466 467 // If bc is not enabled, execution is always possible because dc has strict 468 // rules that ensure execution can always proceed safely. 469 #define BC_PARSE_NO_EXEC(p) (0) 470 471 #endif // BC_ENABLED 472 473 #endif // BC_BC_H 474