1 /* 2 * ***************************************************************************** 3 * 4 * SPDX-License-Identifier: BSD-2-Clause 5 * 6 * Copyright (c) 2018-2023 Gavin D. Howard and contributors. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions are met: 10 * 11 * * Redistributions of source code must retain the above copyright notice, this 12 * list of conditions and the following disclaimer. 13 * 14 * * Redistributions in binary form must reproduce the above copyright notice, 15 * this list of conditions and the following disclaimer in the documentation 16 * and/or other materials provided with the distribution. 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 19 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 21 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE 22 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 23 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 24 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 25 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 26 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 27 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 28 * POSSIBILITY OF SUCH DAMAGE. 29 * 30 * ***************************************************************************** 31 * 32 * Definitions for bc only. 33 * 34 */ 35 36 #ifndef BC_BC_H 37 #define BC_BC_H 38 39 #if BC_ENABLED 40 41 #include <limits.h> 42 #include <stdbool.h> 43 44 #include <status.h> 45 #include <lex.h> 46 #include <parse.h> 47 48 /** 49 * The main function for bc. It just sets variables and passes its arguments 50 * through to @a bc_vm_boot(). 51 */ 52 void 53 bc_main(int argc, char* argv[]); 54 55 // These are references to the help text, the library text, and the "filename" 56 // for the library. 57 extern const char bc_help[]; 58 extern const char bc_lib[]; 59 extern const char* bc_lib_name; 60 61 // These are references to the second math library and its "filename." 62 #if BC_ENABLE_EXTRA_MATH 63 extern const char bc_lib2[]; 64 extern const char* bc_lib2_name; 65 #endif // BC_ENABLE_EXTRA_MATH 66 67 /** 68 * A struct containing information about a bc keyword. 69 */ 70 typedef struct BcLexKeyword 71 { 72 /// Holds the length of the keyword along with a bit that, if set, means the 73 /// keyword is used in POSIX bc. 74 uchar data; 75 76 /// The keyword text. 77 const char name[14]; 78 } BcLexKeyword; 79 80 /// Sets the most significant bit. Used for setting the POSIX bit in 81 /// BcLexKeyword's data field. 82 #define BC_LEX_CHAR_MSB(bit) ((bit) << (CHAR_BIT - 1)) 83 84 /// Returns non-zero if the keyword is POSIX, zero otherwise. 85 #define BC_LEX_KW_POSIX(kw) ((kw)->data & (BC_LEX_CHAR_MSB(1))) 86 87 /// Returns the length of the keyword. 88 #define BC_LEX_KW_LEN(kw) ((size_t) ((kw)->data & ~(BC_LEX_CHAR_MSB(1)))) 89 90 /// A macro to easily build a keyword entry. See bc_lex_kws in src/data.c. 91 #define BC_LEX_KW_ENTRY(a, b, c) \ 92 { \ 93 .data = ((b) & ~(BC_LEX_CHAR_MSB(1))) | BC_LEX_CHAR_MSB(c), .name = a \ 94 } 95 96 #if BC_ENABLE_EXTRA_MATH 97 98 /// A macro for the number of keywords bc has. This has to be updated if any are 99 /// added. This is for the redefined_kws field of the BcVm struct. 100 #define BC_LEX_NKWS (37) 101 102 #else // BC_ENABLE_EXTRA_MATH 103 104 /// A macro for the number of keywords bc has. This has to be updated if any are 105 /// added. This is for the redefined_kws field of the BcVm struct. 106 #define BC_LEX_NKWS (33) 107 108 #endif // BC_ENABLE_EXTRA_MATH 109 110 // The array of keywords and its length. 111 extern const BcLexKeyword bc_lex_kws[]; 112 extern const size_t bc_lex_kws_len; 113 114 /** 115 * The @a BcLexNext function for bc. (See include/lex.h for a definition of 116 * @a BcLexNext.) 117 * @param l The lexer. 118 */ 119 void 120 bc_lex_token(BcLex* l); 121 122 // The following section is for flags needed when parsing bc code. These flags 123 // are complicated, but necessary. Why you ask? Because bc's standard is awful. 124 // 125 // If you don't believe me, go read the bc Parsing section of the Development 126 // manual (manuals/development.md). Then come back. 127 // 128 // In other words, these flags are the sign declaring, "Here be dragons." 129 130 /** 131 * This returns a pointer to the set of flags at the top of the flag stack. 132 * @a p is expected to be a BcParse pointer. 133 * @param p The parser. 134 * @return A pointer to the top flag set. 135 */ 136 #define BC_PARSE_TOP_FLAG_PTR(p) ((uint16_t*) bc_vec_top(&(p)->flags)) 137 138 /** 139 * This returns the flag set at the top of the flag stack. @a p is expected to 140 * be a BcParse pointer. 141 * @param p The parser. 142 * @return The top flag set. 143 */ 144 #define BC_PARSE_TOP_FLAG(p) (*(BC_PARSE_TOP_FLAG_PTR(p))) 145 146 // After this point, all flag #defines are in sets of 2: one to define the flag, 147 // and one to define a way to grab the flag from the flag set at the top of the 148 // flag stack. All `p` arguments are pointers to a BcParse. 149 150 // This flag is set if the parser has seen a left brace. 151 #define BC_PARSE_FLAG_BRACE (UINTMAX_C(1) << 0) 152 #define BC_PARSE_BRACE(p) (BC_PARSE_TOP_FLAG(p) & BC_PARSE_FLAG_BRACE) 153 154 // This flag is set if the parser is parsing inside of the braces of a function 155 // body. 156 #define BC_PARSE_FLAG_FUNC_INNER (UINTMAX_C(1) << 1) 157 #define BC_PARSE_FUNC_INNER(p) (BC_PARSE_TOP_FLAG(p) & BC_PARSE_FLAG_FUNC_INNER) 158 159 // This flag is set if the parser is parsing a function. It is different from 160 // the one above because it is set if it is parsing a function body *or* header, 161 // not just if it's parsing a function body. 162 #define BC_PARSE_FLAG_FUNC (UINTMAX_C(1) << 2) 163 #define BC_PARSE_FUNC(p) (BC_PARSE_TOP_FLAG(p) & BC_PARSE_FLAG_FUNC) 164 165 // This flag is set if the parser is expecting to parse a body, whether of a 166 // function, an if statement, or a loop. 167 #define BC_PARSE_FLAG_BODY (UINTMAX_C(1) << 3) 168 #define BC_PARSE_BODY(p) (BC_PARSE_TOP_FLAG(p) & BC_PARSE_FLAG_BODY) 169 170 // This flag is set if bc is parsing a loop. This is important because the break 171 // and continue keywords are only valid inside of a loop. 172 #define BC_PARSE_FLAG_LOOP (UINTMAX_C(1) << 4) 173 #define BC_PARSE_LOOP(p) (BC_PARSE_TOP_FLAG(p) & BC_PARSE_FLAG_LOOP) 174 175 // This flag is set if bc is parsing the body of a loop. It is different from 176 // the one above the same way @a BC_PARSE_FLAG_FUNC_INNER is different from 177 // @a BC_PARSE_FLAG_FUNC. 178 #define BC_PARSE_FLAG_LOOP_INNER (UINTMAX_C(1) << 5) 179 #define BC_PARSE_LOOP_INNER(p) (BC_PARSE_TOP_FLAG(p) & BC_PARSE_FLAG_LOOP_INNER) 180 181 // This flag is set if bc is parsing an if statement. 182 #define BC_PARSE_FLAG_IF (UINTMAX_C(1) << 6) 183 #define BC_PARSE_IF(p) (BC_PARSE_TOP_FLAG(p) & BC_PARSE_FLAG_IF) 184 185 // This flag is set if bc is parsing an else statement. This is important 186 // because of "else if" constructions, among other things. 187 #define BC_PARSE_FLAG_ELSE (UINTMAX_C(1) << 7) 188 #define BC_PARSE_ELSE(p) (BC_PARSE_TOP_FLAG(p) & BC_PARSE_FLAG_ELSE) 189 190 // This flag is set if bc just finished parsing an if statement and its body. 191 // It tells the parser that it can probably expect an else statement next. This 192 // flag is, thus, one of the most subtle. 193 #define BC_PARSE_FLAG_IF_END (UINTMAX_C(1) << 8) 194 #define BC_PARSE_IF_END(p) (BC_PARSE_TOP_FLAG(p) & BC_PARSE_FLAG_IF_END) 195 196 /** 197 * This returns true if bc is in a state where it should not execute any code 198 * at all. 199 * @param p The parser. 200 * @return True if execution cannot proceed, false otherwise. 201 */ 202 #define BC_PARSE_NO_EXEC(p) ((p)->flags.len != 1 || BC_PARSE_TOP_FLAG(p) != 0) 203 204 /** 205 * This returns true if the token @a t is a statement delimiter, which is 206 * either a newline or a semicolon. 207 * @param t The token to check. 208 * @return True if t is a statement delimiter token; false otherwise. 209 */ 210 #define BC_PARSE_DELIMITER(t) \ 211 ((t) == BC_LEX_SCOLON || (t) == BC_LEX_NLINE || (t) == BC_LEX_EOF) 212 213 /** 214 * This is poorly named, but it basically returns whether or not the current 215 * state is valid for the end of an else statement. 216 * @param f The flag set to be checked. 217 * @return True if the state is valid for the end of an else statement. 218 */ 219 #define BC_PARSE_BLOCK_STMT(f) \ 220 ((f) & (BC_PARSE_FLAG_ELSE | BC_PARSE_FLAG_LOOP_INNER)) 221 222 /** 223 * This returns the value of the data for an operator with precedence @a p and 224 * associativity @a l (true if left associative, false otherwise). This is used 225 * to construct an array of operators, bc_parse_ops, in src/data.c. 226 * @param p The precedence. 227 * @param l True if the operator is left associative, false otherwise. 228 * @return The data for the operator. 229 */ 230 #define BC_PARSE_OP(p, l) (((p) & ~(BC_LEX_CHAR_MSB(1))) | (BC_LEX_CHAR_MSB(l))) 231 232 /** 233 * Returns the operator data for the lex token @a t. 234 * @param t The token to return operator data for. 235 * @return The operator data for @a t. 236 */ 237 #define BC_PARSE_OP_DATA(t) bc_parse_ops[((t) -BC_LEX_OP_INC)] 238 239 /** 240 * Returns non-zero if operator @a op is left associative, zero otherwise. 241 * @param op The operator to test for associativity. 242 * @return Non-zero if the operator is left associative, zero otherwise. 243 */ 244 #define BC_PARSE_OP_LEFT(op) (BC_PARSE_OP_DATA(op) & BC_LEX_CHAR_MSB(1)) 245 246 /** 247 * Returns the precedence of operator @a op. Lower number means higher 248 * precedence. 249 * @param op The operator to return the precedence of. 250 * @return The precedence of @a op. 251 */ 252 #define BC_PARSE_OP_PREC(op) (BC_PARSE_OP_DATA(op) & ~(BC_LEX_CHAR_MSB(1))) 253 254 /** 255 * A macro to easily define a series of bits for whether a lex token is an 256 * expression token or not. It takes 8 expression bits, corresponding to the 8 257 * bits in a uint8_t. You can see this in use for bc_parse_exprs in src/data.c. 258 * @param e1 The first bit. 259 * @param e2 The second bit. 260 * @param e3 The third bit. 261 * @param e4 The fourth bit. 262 * @param e5 The fifth bit. 263 * @param e6 The sixth bit. 264 * @param e7 The seventh bit. 265 * @param e8 The eighth bit. 266 * @return An expression entry for bc_parse_exprs[]. 267 */ 268 #define BC_PARSE_EXPR_ENTRY(e1, e2, e3, e4, e5, e6, e7, e8) \ 269 ((UINTMAX_C(e1) << 7) | (UINTMAX_C(e2) << 6) | (UINTMAX_C(e3) << 5) | \ 270 (UINTMAX_C(e4) << 4) | (UINTMAX_C(e5) << 3) | (UINTMAX_C(e6) << 2) | \ 271 (UINTMAX_C(e7) << 1) | (UINTMAX_C(e8) << 0)) 272 273 /** 274 * Returns true if token @a i is a token that belongs in an expression. 275 * @param i The token to test. 276 * @return True if i is an expression token, false otherwise. 277 */ 278 #define BC_PARSE_EXPR(i) \ 279 (bc_parse_exprs[(((i) & (uchar) ~(0x07)) >> 3)] & (1 << (7 - ((i) & 0x07)))) 280 281 /** 282 * Returns the operator (by lex token) that is at the top of the operator 283 * stack. 284 * @param p The parser. 285 * @return The operator that is at the top of the operator stack, as a lex 286 * token. 287 */ 288 #define BC_PARSE_TOP_OP(p) (*((BcLexType*) bc_vec_top(&(p)->ops))) 289 290 /** 291 * Returns true if bc has a "leaf" token. A "leaf" token is one that can stand 292 * alone in an expression. For example, a number by itself can be an expression, 293 * but a binary operator, while valid for an expression, cannot be alone in the 294 * expression. It must have an expression to the left and right of itself. See 295 * the documentation for @a bc_parse_expr_err() in src/bc_parse.c. 296 * @param prev The previous token as an instruction. 297 * @param bin_last True if that last operator was a binary operator, false 298 * otherwise. 299 * @param rparen True if the last operator was a right paren. 300 * return True if the last token was a leaf token, false otherwise. 301 */ 302 #define BC_PARSE_LEAF(prev, bin_last, rparen) \ 303 (!(bin_last) && ((rparen) || bc_parse_inst_isLeaf(prev))) 304 305 /** 306 * This returns true if the token @a t should be treated as though it's a 307 * variable. This goes for actual variables, array elements, and globals. 308 * @param t The token to test. 309 * @return True if @a t should be treated as though it's a variable, false 310 * otherwise. 311 */ 312 #if BC_ENABLE_EXTRA_MATH 313 #define BC_PARSE_INST_VAR(t) \ 314 ((t) >= BC_INST_VAR && (t) <= BC_INST_SEED && (t) != BC_INST_ARRAY) 315 #else // BC_ENABLE_EXTRA_MATH 316 #define BC_PARSE_INST_VAR(t) \ 317 ((t) >= BC_INST_VAR && (t) <= BC_INST_SCALE && (t) != BC_INST_ARRAY) 318 #endif // BC_ENABLE_EXTRA_MATH 319 320 /** 321 * Returns true if the previous token @a p (in the form of a bytecode 322 * instruction) is a prefix operator. The fact that it is for bytecode 323 * instructions is what makes it different from @a BC_PARSE_OP_PREFIX below. 324 * @param p The previous token. 325 * @return True if @a p is a prefix operator. 326 */ 327 #define BC_PARSE_PREV_PREFIX(p) ((p) >= BC_INST_NEG && (p) <= BC_INST_BOOL_NOT) 328 329 /** 330 * Returns true if token @a t is a prefix operator. 331 * @param t The token to test. 332 * @return True if @a t is a prefix operator, false otherwise. 333 */ 334 #define BC_PARSE_OP_PREFIX(t) ((t) == BC_LEX_OP_BOOL_NOT || (t) == BC_LEX_NEG) 335 336 /** 337 * We can calculate the conversion between tokens and bytecode instructions by 338 * subtracting the position of the first operator in the lex enum and adding the 339 * position of the first in the instruction enum. Note: This only works for 340 * binary operators. 341 * @param t The token to turn into an instruction. 342 * @return The token as an instruction. 343 */ 344 #define BC_PARSE_TOKEN_INST(t) ((uchar) ((t) -BC_LEX_NEG + BC_INST_NEG)) 345 346 /** 347 * Returns true if the token is a bc keyword. 348 * @param t The token to check. 349 * @return True if @a t is a bc keyword, false otherwise. 350 */ 351 #define BC_PARSE_IS_KEYWORD(t) ((t) >= BC_LEX_KW_AUTO && (t) <= BC_LEX_KW_ELSE) 352 353 /// A struct that holds data about what tokens should be expected next. There 354 /// are a few instances of these, all named because they are used in specific 355 /// cases. Basically, in certain situations, it's useful to use the same code, 356 /// but have a list of valid tokens. 357 /// 358 /// Obviously, @a len is the number of tokens in the @a tokens array. If more 359 /// than 4 is needed in the future, @a tokens will have to be changed. 360 typedef struct BcParseNext 361 { 362 /// The number of tokens in the tokens array. 363 uchar len; 364 365 /// The tokens that can be expected next. 366 uchar tokens[4]; 367 368 } BcParseNext; 369 370 /// A macro to construct an array literal of tokens from a parameter list. 371 #define BC_PARSE_NEXT_TOKENS(...) .tokens = { __VA_ARGS__ } 372 373 /// A macro to generate a BcParseNext literal from BcParseNext data. See 374 /// src/data.c for examples. 375 #define BC_PARSE_NEXT(a, ...) \ 376 { \ 377 .len = (uchar) (a), BC_PARSE_NEXT_TOKENS(__VA_ARGS__) \ 378 } 379 380 /// A status returned by @a bc_parse_expr_err(). It can either return success or 381 /// an error indicating an empty expression. 382 typedef enum BcParseStatus 383 { 384 BC_PARSE_STATUS_SUCCESS, 385 BC_PARSE_STATUS_EMPTY_EXPR, 386 387 } BcParseStatus; 388 389 /** 390 * The @a BcParseExpr function for bc. (See include/parse.h for a definition of 391 * @a BcParseExpr.) 392 * @param p The parser. 393 * @param flags Flags that define the requirements that the parsed code must 394 * meet or an error will result. See @a BcParseExpr for more info. 395 */ 396 void 397 bc_parse_expr(BcParse* p, uint8_t flags); 398 399 /** 400 * The @a BcParseParse function for bc. (See include/parse.h for a definition of 401 * @a BcParseParse.) 402 * @param p The parser. 403 */ 404 void 405 bc_parse_parse(BcParse* p); 406 407 /** 408 * Ends a series of if statements. This is to ensure that full parses happen 409 * when a file finishes or before defining a function. Without this, bc thinks 410 * that it cannot parse any further. But if we reach the end of a file or a 411 * function definition, we know we can add an empty else clause. 412 * @param p The parser. 413 */ 414 void 415 bc_parse_endif(BcParse* p); 416 417 /// References to the signal message and its length. 418 extern const char bc_sig_msg[]; 419 extern const uchar bc_sig_msg_len; 420 421 /// A reference to an array of bits that are set if the corresponding lex token 422 /// is valid in an expression. 423 extern const uint8_t bc_parse_exprs[]; 424 425 /// A reference to an array of bc operators. 426 extern const uchar bc_parse_ops[]; 427 428 // References to the various instances of BcParseNext's. 429 430 /// A reference to what tokens are valid as next tokens when parsing normal 431 /// expressions. More accurately. these are the tokens that are valid for 432 /// *ending* the expression. 433 extern const BcParseNext bc_parse_next_expr; 434 435 /// A reference to what tokens are valid as next tokens when parsing function 436 /// parameters (well, actually arguments). 437 extern const BcParseNext bc_parse_next_arg; 438 439 /// A reference to what tokens are valid as next tokens when parsing a print 440 /// statement. 441 extern const BcParseNext bc_parse_next_print; 442 443 /// A reference to what tokens are valid as next tokens when parsing things like 444 /// loop headers and builtin functions where the only thing expected is a right 445 /// paren. 446 /// 447 /// The name is an artifact of history, and is related to @a BC_PARSE_REL (see 448 /// include/parse.h). It refers to how POSIX only allows some operators as part 449 /// of the conditional of for loops, while loops, and if statements. 450 extern const BcParseNext bc_parse_next_rel; 451 452 // What tokens are valid as next tokens when parsing an array element 453 // expression. 454 extern const BcParseNext bc_parse_next_elem; 455 456 /// A reference to what tokens are valid as next tokens when parsing the first 457 /// two parts of a for loop header. 458 extern const BcParseNext bc_parse_next_for; 459 460 /// A reference to what tokens are valid as next tokens when parsing a read 461 /// expression. 462 extern const BcParseNext bc_parse_next_read; 463 464 /// A reference to what tokens are valid as next tokens when parsing a builtin 465 /// function with multiple arguments. 466 extern const BcParseNext bc_parse_next_builtin; 467 468 #else // BC_ENABLED 469 470 // If bc is not enabled, execution is always possible because dc has strict 471 // rules that ensure execution can always proceed safely. 472 #define BC_PARSE_NO_EXEC(p) (0) 473 474 #endif // BC_ENABLED 475 476 #endif // BC_BC_H 477