/* * ***************************************************************************** * * SPDX-License-Identifier: BSD-2-Clause * * Copyright (c) 2018-2024 Gavin D. Howard and contributors. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * * Redistributions of source code must retain the above copyright notice, this * list of conditions and the following disclaimer. * * * Redistributions in binary form must reproduce the above copyright notice, * this list of conditions and the following disclaimer in the documentation * and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. * * ***************************************************************************** * * The lexer for dc. * */ #if DC_ENABLED #include #include #include bool dc_lex_negCommand(BcLex* l) { char c = l->buf[l->i]; return !BC_LEX_NUM_CHAR(c, false, false); } /** * Processes a dc command that needs a register. This is where the * extended-register extension is implemented. * @param l The lexer. */ static void dc_lex_register(BcLex* l) { // If extended register is enabled and the character is whitespace... if (DC_X && isspace(l->buf[l->i - 1])) { char c; // Eat the whitespace. bc_lex_whitespace(l); c = l->buf[l->i]; // Check for a letter or underscore. if (BC_ERR(!isalpha(c) && c != '_')) { bc_lex_verr(l, BC_ERR_PARSE_CHAR, c); } // Parse a normal identifier. l->i += 1; bc_lex_name(l); } else { // I don't allow newlines because newlines are used for controlling when // execution happens, and allowing newlines would just be complex. if (BC_ERR(l->buf[l->i - 1] == '\n')) { bc_lex_verr(l, BC_ERR_PARSE_CHAR, l->buf[l->i - 1]); } // Set the lexer string and token. bc_vec_popAll(&l->str); bc_vec_pushByte(&l->str, (uchar) l->buf[l->i - 1]); bc_vec_pushByte(&l->str, '\0'); l->t = BC_LEX_NAME; } } /** * Parses a dc string. Since dc's strings need to check for balanced brackets, * we can't just parse bc and dc strings with different start and end * characters. Oh, and dc strings need to check for escaped brackets. * @param l The lexer. */ static void dc_lex_string(BcLex* l) { size_t depth, nls, i; char c; bool got_more; // Set the token and clear the string. l->t = BC_LEX_STR; bc_vec_popAll(&l->str); do { depth = 1; nls = 0; got_more = false; #if !BC_ENABLE_OSSFUZZ assert(l->mode != BC_MODE_STDIN || l->buf == vm->buffer.v); #endif // !BC_ENABLE_OSSFUZZ // This is the meat. As long as we don't run into the NUL byte, and we // have "depth", which means we haven't completely balanced brackets // yet, we continue eating the string. for (i = l->i; (c = l->buf[i]) && depth; ++i) { // Check for escaped brackets and set the depths as appropriate. if (c == '\\') { c = l->buf[++i]; if (!c) break; } else { depth += (c == '['); depth -= (c == ']'); } // We want to adjust the line in the lexer as necessary. nls += (c == '\n'); if (depth) bc_vec_push(&l->str, &c); } if (BC_ERR(c == '\0' && depth)) { if (!vm->eof && l->mode != BC_MODE_FILE) { got_more = bc_lex_readLine(l); } if (got_more) { bc_vec_popAll(&l->str); } } } while (got_more && depth); // Obviously, if we didn't balance, that's an error. if (BC_ERR(c == '\0' && depth)) { l->i = i; bc_lex_err(l, BC_ERR_PARSE_STRING); } bc_vec_pushByte(&l->str, '\0'); l->i = i; l->line += nls; } /** * Lexes a dc token. This is the dc implementation of BcLexNext. * @param l The lexer. */ void dc_lex_token(BcLex* l) { char c = l->buf[l->i++], c2; size_t i; BC_SIG_ASSERT_LOCKED; // If the last token was a command that needs a register, we need to parse a // register, so do so. for (i = 0; i < dc_lex_regs_len; ++i) { // If the token is a register token, take care of it and return. if (l->last == dc_lex_regs[i]) { dc_lex_register(l); return; } } // These lines are for tokens that easily correspond to one character. We // just set the token. if (c >= '"' && c <= '~' && (l->t = dc_lex_tokens[(c - '"')]) != BC_LEX_INVALID) { return; } // This is the workhorse of the lexer when more complicated things are // needed. switch (c) { case '\0': case '\n': case '\t': case '\v': case '\f': case '\r': case ' ': { bc_lex_commonTokens(l, c); break; } // We don't have the ! command, so we always expect certain things // after the exclamation point. case '!': { c2 = l->buf[l->i]; if (c2 == '=') l->t = BC_LEX_OP_REL_NE; else if (c2 == '<') l->t = BC_LEX_OP_REL_LE; else if (c2 == '>') l->t = BC_LEX_OP_REL_GE; else bc_lex_invalidChar(l, c); l->i += 1; break; } case '#': { bc_lex_lineComment(l); break; } case '.': { c2 = l->buf[l->i]; // If the character after is a number, this dot is part of a number. // Otherwise, it's the BSD dot (equivalent to last). if (BC_NO_ERR(BC_LEX_NUM_CHAR(c2, true, false))) { bc_lex_number(l, c); } else bc_lex_invalidChar(l, c); break; } case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': { bc_lex_number(l, c); break; } case 'g': { c2 = l->buf[l->i]; if (c2 == 'l') l->t = BC_LEX_KW_LINE_LENGTH; else if (c2 == 'x') l->t = BC_LEX_EXTENDED_REGISTERS; else if (c2 == 'z') l->t = BC_LEX_KW_LEADING_ZERO; else bc_lex_invalidChar(l, c2); l->i += 1; break; } case '[': { dc_lex_string(l); break; } default: { bc_lex_invalidChar(l, c); } } } #endif // DC_ENABLED