/* * ***************************************************************************** * * SPDX-License-Identifier: BSD-2-Clause * * Copyright (c) 2018-2021 Gavin D. Howard and contributors. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * * Redistributions of source code must retain the above copyright notice, this * list of conditions and the following disclaimer. * * * Redistributions in binary form must reproduce the above copyright notice, * this list of conditions and the following disclaimer in the documentation * and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. * * ***************************************************************************** * * Definitions for bc's lexer. * */ #ifndef BC_LEX_H #define BC_LEX_H #include #include #include #include #include // Two convencience macros for throwing errors in lex code. They take care of // plumbing like passing in the current line the lexer is on. #define bc_lex_err(l, e) (bc_vm_handleError((e), (l)->line)) #define bc_lex_verr(l, e, ...) (bc_vm_handleError((e), (l)->line, __VA_ARGS__)) // BC_LEX_NEG_CHAR returns the char that corresponds to negative for the // current calculator. // // BC_LEX_LAST_NUM_CHAR returns the char that corresponds to the last valid // char for numbers. In bc and dc, capital letters are part of numbers, to a // point. (dc only goes up to hex, so its last valid char is 'F'.) #if BC_ENABLED #if DC_ENABLED #define BC_LEX_NEG_CHAR (BC_IS_BC ? '-' : '_') #define BC_LEX_LAST_NUM_CHAR (BC_IS_BC ? 'Z' : 'F') #else // DC_ENABLED #define BC_LEX_NEG_CHAR ('-') #define BC_LEX_LAST_NUM_CHAR ('Z') #endif // DC_ENABLED #else // BC_ENABLED #define BC_LEX_NEG_CHAR ('_') #define BC_LEX_LAST_NUM_CHAR ('F') #endif // BC_ENABLED /** * Returns true if c is a valid number character. * @param c The char to check. * @param pt If a decimal point has already been seen. * @param int_only True if the number is expected to be an int only, false if * non-integers are allowed. * @return True if @a c is a valid number character. */ #define BC_LEX_NUM_CHAR(c, pt, int_only) \ (isdigit(c) != 0 || ((c) >= 'A' && (c) <= BC_LEX_LAST_NUM_CHAR) || \ ((c) == '.' && !(pt) && !(int_only))) /// An enum of lex token types. typedef enum BcLexType { /// End of file. BC_LEX_EOF, /// Marker for invalid tokens, used by bc and dc for const data. BC_LEX_INVALID, #if BC_ENABLED /// Increment operator. BC_LEX_OP_INC, /// Decrement operator. BC_LEX_OP_DEC, #endif // BC_ENABLED /// BC_LEX_NEG is not used in lexing; it is only for parsing. The lexer /// marks all '-' characters as BC_LEX_OP_MINUS, but the parser needs to be /// able to distinguish them. BC_LEX_NEG, /// Boolean not. BC_LEX_OP_BOOL_NOT, #if BC_ENABLE_EXTRA_MATH /// Truncation operator. BC_LEX_OP_TRUNC, #endif // BC_ENABLE_EXTRA_MATH /// Power operator. BC_LEX_OP_POWER, /// Multiplication operator. BC_LEX_OP_MULTIPLY, /// Division operator. BC_LEX_OP_DIVIDE, /// Modulus operator. BC_LEX_OP_MODULUS, /// Addition operator. BC_LEX_OP_PLUS, /// Subtraction operator. BC_LEX_OP_MINUS, #if BC_ENABLE_EXTRA_MATH /// Places (truncate or extend) operator. BC_LEX_OP_PLACES, /// Left (decimal) shift operator. BC_LEX_OP_LSHIFT, /// Right (decimal) shift operator. BC_LEX_OP_RSHIFT, #endif // BC_ENABLE_EXTRA_MATH /// Equal operator. BC_LEX_OP_REL_EQ, /// Less than or equal operator. BC_LEX_OP_REL_LE, /// Greater than or equal operator. BC_LEX_OP_REL_GE, /// Not equal operator. BC_LEX_OP_REL_NE, /// Less than operator. BC_LEX_OP_REL_LT, /// Greater than operator. BC_LEX_OP_REL_GT, /// Boolean or operator. BC_LEX_OP_BOOL_OR, /// Boolean and operator. BC_LEX_OP_BOOL_AND, #if BC_ENABLED /// Power assignment operator. BC_LEX_OP_ASSIGN_POWER, /// Multiplication assignment operator. BC_LEX_OP_ASSIGN_MULTIPLY, /// Division assignment operator. BC_LEX_OP_ASSIGN_DIVIDE, /// Modulus assignment operator. BC_LEX_OP_ASSIGN_MODULUS, /// Addition assignment operator. BC_LEX_OP_ASSIGN_PLUS, /// Subtraction assignment operator. BC_LEX_OP_ASSIGN_MINUS, #if BC_ENABLE_EXTRA_MATH /// Places (truncate or extend) assignment operator. BC_LEX_OP_ASSIGN_PLACES, /// Left (decimal) shift assignment operator. BC_LEX_OP_ASSIGN_LSHIFT, /// Right (decimal) shift assignment operator. BC_LEX_OP_ASSIGN_RSHIFT, #endif // BC_ENABLE_EXTRA_MATH #endif // BC_ENABLED /// Assignment operator. BC_LEX_OP_ASSIGN, /// Newline. BC_LEX_NLINE, /// Whitespace. BC_LEX_WHITESPACE, /// Left parenthesis. BC_LEX_LPAREN, /// Right parenthesis. BC_LEX_RPAREN, /// Left bracket. BC_LEX_LBRACKET, /// Comma. BC_LEX_COMMA, /// Right bracket. BC_LEX_RBRACKET, /// Left brace. BC_LEX_LBRACE, /// Semicolon. BC_LEX_SCOLON, /// Right brace. BC_LEX_RBRACE, /// String. BC_LEX_STR, /// Identifier/name. BC_LEX_NAME, /// Constant number. BC_LEX_NUMBER, // These keywords are in the order they are in for a reason. Don't change // the order unless you want a bunch of weird failures in the test suite. // In fact, almost all of these tokens are in a specific order for a reason. #if BC_ENABLED /// bc auto keyword. BC_LEX_KW_AUTO, /// bc break keyword. BC_LEX_KW_BREAK, /// bc continue keyword. BC_LEX_KW_CONTINUE, /// bc define keyword. BC_LEX_KW_DEFINE, /// bc for keyword. BC_LEX_KW_FOR, /// bc if keyword. BC_LEX_KW_IF, /// bc limits keyword. BC_LEX_KW_LIMITS, /// bc return keyword. BC_LEX_KW_RETURN, /// bc while keyword. BC_LEX_KW_WHILE, /// bc halt keyword. BC_LEX_KW_HALT, /// bc last keyword. BC_LEX_KW_LAST, #endif // BC_ENABLED /// bc ibase keyword. BC_LEX_KW_IBASE, /// bc obase keyword. BC_LEX_KW_OBASE, /// bc scale keyword. BC_LEX_KW_SCALE, #if BC_ENABLE_EXTRA_MATH /// bc seed keyword. BC_LEX_KW_SEED, #endif // BC_ENABLE_EXTRA_MATH /// bc length keyword. BC_LEX_KW_LENGTH, /// bc print keyword. BC_LEX_KW_PRINT, /// bc sqrt keyword. BC_LEX_KW_SQRT, /// bc abs keyword. BC_LEX_KW_ABS, #if BC_ENABLE_EXTRA_MATH /// bc irand keyword. BC_LEX_KW_IRAND, #endif // BC_ENABLE_EXTRA_MATH /// bc asciffy keyword. BC_LEX_KW_ASCIIFY, /// bc modexp keyword. BC_LEX_KW_MODEXP, /// bc divmod keyword. BC_LEX_KW_DIVMOD, /// bc quit keyword. BC_LEX_KW_QUIT, /// bc read keyword. BC_LEX_KW_READ, #if BC_ENABLE_EXTRA_MATH /// bc rand keyword. BC_LEX_KW_RAND, #endif // BC_ENABLE_EXTRA_MATH /// bc maxibase keyword. BC_LEX_KW_MAXIBASE, /// bc maxobase keyword. BC_LEX_KW_MAXOBASE, /// bc maxscale keyword. BC_LEX_KW_MAXSCALE, #if BC_ENABLE_EXTRA_MATH /// bc maxrand keyword. BC_LEX_KW_MAXRAND, #endif // BC_ENABLE_EXTRA_MATH /// bc line_length keyword. BC_LEX_KW_LINE_LENGTH, #if BC_ENABLED /// bc global_stacks keyword. BC_LEX_KW_GLOBAL_STACKS, #endif // BC_ENABLED /// bc leading_zero keyword. BC_LEX_KW_LEADING_ZERO, /// bc stream keyword. BC_LEX_KW_STREAM, /// bc else keyword. BC_LEX_KW_ELSE, #if DC_ENABLED /// A special token for dc to calculate equal without a register. BC_LEX_EQ_NO_REG, /// Colon (array) operator. BC_LEX_COLON, /// Execute command. BC_LEX_EXECUTE, /// Print stack command. BC_LEX_PRINT_STACK, /// Clear stack command. BC_LEX_CLEAR_STACK, /// Register stack level command. BC_LEX_REG_STACK_LEVEL, /// Main stack level command. BC_LEX_STACK_LEVEL, /// Duplicate command. BC_LEX_DUPLICATE, /// Swap (reverse) command. BC_LEX_SWAP, /// Pop (remove) command. BC_LEX_POP, /// Store ibase command. BC_LEX_STORE_IBASE, /// Store obase command. BC_LEX_STORE_OBASE, /// Store scale command. BC_LEX_STORE_SCALE, #if BC_ENABLE_EXTRA_MATH /// Store seed command. BC_LEX_STORE_SEED, #endif // BC_ENABLE_EXTRA_MATH /// Load variable onto stack command. BC_LEX_LOAD, /// Pop off of variable stack onto results stack command. BC_LEX_LOAD_POP, /// Push onto variable stack command. BC_LEX_STORE_PUSH, /// Print with pop command. BC_LEX_PRINT_POP, /// Parameterized quit command. BC_LEX_NQUIT, /// Execution stack depth command. BC_LEX_EXEC_STACK_LENGTH, /// Scale of number command. This is needed specifically for dc because bc /// parses the scale function in parts. BC_LEX_SCALE_FACTOR, /// Array length command. This is needed specifically for dc because bc /// just reuses its length keyword. BC_LEX_ARRAY_LENGTH, #endif // DC_ENABLED } BcLexType; struct BcLex; /** * A function pointer to call when another token is needed. Mostly called by the * parser. * @param l The lexer. */ typedef void (*BcLexNext)(struct BcLex* l); /// The lexer. typedef struct BcLex { /// A pointer to the text to lex. const char* buf; /// The current index into buf. size_t i; /// The current line. size_t line; /// The length of buf. size_t len; /// The current token. BcLexType t; /// The previous token. BcLexType last; /// A string to store extra data for tokens. For example, the @a BC_LEX_STR /// token really needs to store the actual string, and numbers also need the /// string. BcVec str; /// If this is true, the lexer is processing stdin and can ask for more data /// if a string or comment are not properly terminated. bool is_stdin; /// If this is true, the lexer is processing expressions from the /// command-line and can ask for more data if a string or comment are not /// properly terminated. bool is_exprs; } BcLex; /** * Initializes a lexer. * @param l The lexer to initialize. */ void bc_lex_init(BcLex* l); /** * Frees a lexer. This is not guarded by #ifndef NDEBUG because a separate * parser is created at runtime to parse read() expressions and dc strings, and * that parser needs a lexer. * @param l The lexer to free. */ void bc_lex_free(BcLex* l); /** * Sets the filename that the lexer will be lexing. * @param l The lexer. * @param file The filename that the lexer will lex. */ void bc_lex_file(BcLex* l, const char* file); /** * Sets the text the lexer will lex. * @param l The lexer. * @param text The text to lex. * @param is_stdin True if the text is from stdin, false otherwise. * @param is_exprs True if the text is from command-line expressions, false * otherwise. */ void bc_lex_text(BcLex* l, const char* text, bool is_stdin, bool is_exprs); /** * Generic next function for the parser to call. It takes care of calling the * correct @a BcLexNext function and consuming whitespace. * @param l The lexer. */ void bc_lex_next(BcLex* l); /** * Lexes a line comment (one beginning with '#' and going to a newline). * @param l The lexer. */ void bc_lex_lineComment(BcLex* l); /** * Lexes a general comment (C-style comment). * @param l The lexer. */ void bc_lex_comment(BcLex* l); /** * Lexes whitespace, finding as much as possible. * @param l The lexer. */ void bc_lex_whitespace(BcLex* l); /** * Lexes a number that begins with char @a start. This takes care of parsing * numbers in scientific and engineering notations. * @param l The lexer. * @param start The starting char of the number. To detect a number and call * this function, the lexer had to eat the first char. It fixes * that by passing it in. */ void bc_lex_number(BcLex* l, char start); /** * Lexes a name/identifier. * @param l The lexer. */ void bc_lex_name(BcLex* l); /** * Lexes common whitespace characters. * @param l The lexer. * @param c The character to lex. */ void bc_lex_commonTokens(BcLex* l, char c); /** * Throws a parse error because char @a c was invalid. * @param l The lexer. * @param c The problem character. */ void bc_lex_invalidChar(BcLex* l, char c); /** * Reads a line from stdin and puts it into the lexer's buffer. * @param l The lexer. */ bool bc_lex_readLine(BcLex* l); #endif // BC_LEX_H