1 /* 2 * ***************************************************************************** 3 * 4 * SPDX-License-Identifier: BSD-2-Clause 5 * 6 * Copyright (c) 2018-2021 Gavin D. Howard and contributors. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions are met: 10 * 11 * * Redistributions of source code must retain the above copyright notice, this 12 * list of conditions and the following disclaimer. 13 * 14 * * Redistributions in binary form must reproduce the above copyright notice, 15 * this list of conditions and the following disclaimer in the documentation 16 * and/or other materials provided with the distribution. 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 19 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 21 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE 22 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 23 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 24 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 25 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 26 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 27 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 28 * POSSIBILITY OF SUCH DAMAGE. 29 * 30 * ***************************************************************************** 31 * 32 * The lexer for dc. 33 * 34 */ 35 36 #if DC_ENABLED 37 38 #include <ctype.h> 39 40 #include <dc.h> 41 #include <vm.h> 42 43 bool dc_lex_negCommand(BcLex *l) { 44 char c = l->buf[l->i]; 45 return !BC_LEX_NUM_CHAR(c, false, false); 46 } 47 48 /** 49 * Processes a dc command that needs a register. This is where the 50 * extended-register extension is implemented. 51 * @param l The lexer. 52 */ 53 static void dc_lex_register(BcLex *l) { 54 55 // If extended register is enabled and the character is whitespace... 56 if (DC_X && isspace(l->buf[l->i - 1])) { 57 58 char c; 59 60 // Eat the whitespace. 61 bc_lex_whitespace(l); 62 c = l->buf[l->i]; 63 64 // Check for a letter or underscore. 65 if (BC_ERR(!isalpha(c) && c != '_')) 66 bc_lex_verr(l, BC_ERR_PARSE_CHAR, c); 67 68 // Parse a normal identifier. 69 l->i += 1; 70 bc_lex_name(l); 71 } 72 else { 73 74 // I don't allow newlines because newlines are used for controlling when 75 // execution happens, and allowing newlines would just be complex. 76 if (BC_ERR(l->buf[l->i - 1] == '\n')) 77 bc_lex_verr(l, BC_ERR_PARSE_CHAR, l->buf[l->i - 1]); 78 79 // Set the lexer string and token. 80 bc_vec_popAll(&l->str); 81 bc_vec_pushByte(&l->str, (uchar) l->buf[l->i - 1]); 82 bc_vec_pushByte(&l->str, '\0'); 83 l->t = BC_LEX_NAME; 84 } 85 } 86 87 /** 88 * Parses a dc string. Since dc's strings need to check for balanced brackets, 89 * we can't just parse bc and dc strings with different start and end 90 * characters. Oh, and dc strings need to check for escaped brackets. 91 * @param l The lexer. 92 */ 93 static void dc_lex_string(BcLex *l) { 94 95 size_t depth, nls, i; 96 char c; 97 bool got_more; 98 99 // Set the token and clear the string. 100 l->t = BC_LEX_STR; 101 bc_vec_popAll(&l->str); 102 103 do { 104 105 depth = 1; 106 nls = 0; 107 got_more = false; 108 109 assert(!l->is_stdin || l->buf == vm.buffer.v); 110 111 // This is the meat. As long as we don't run into the NUL byte, and we 112 // have "depth", which means we haven't completely balanced brackets 113 // yet, we continue eating the string. 114 for (i = l->i; (c = l->buf[i]) && depth; ++i) { 115 116 // Check for escaped brackets and set the depths as appropriate. 117 if (c == '\\') { 118 c = l->buf[++i]; 119 if (!c) break; 120 } 121 else { 122 depth += (c == '['); 123 depth -= (c == ']'); 124 } 125 126 // We want to adjust the line in the lexer as necessary. 127 nls += (c == '\n'); 128 129 if (depth) bc_vec_push(&l->str, &c); 130 } 131 132 if (BC_ERR(c == '\0' && depth)) { 133 if (!vm.eof && (l->is_stdin || l->is_exprs)) 134 got_more = bc_lex_readLine(l); 135 if (got_more) bc_vec_popAll(&l->str); 136 } 137 138 } while (got_more && depth); 139 140 // Obviously, if we didn't balance, that's an error. 141 if (BC_ERR(c == '\0' && depth)) { 142 l->i = i; 143 bc_lex_err(l, BC_ERR_PARSE_STRING); 144 } 145 146 bc_vec_pushByte(&l->str, '\0'); 147 148 l->i = i; 149 l->line += nls; 150 } 151 152 /** 153 * Lexes a dc token. This is the dc implementation of BcLexNext. 154 * @param l The lexer. 155 */ 156 void dc_lex_token(BcLex *l) { 157 158 char c = l->buf[l->i++], c2; 159 size_t i; 160 161 BC_SIG_ASSERT_LOCKED; 162 163 // If the last token was a command that needs a register, we need to parse a 164 // register, so do so. 165 for (i = 0; i < dc_lex_regs_len; ++i) { 166 167 // If the token is a register token, take care of it and return. 168 if (l->last == dc_lex_regs[i]) { 169 dc_lex_register(l); 170 return; 171 } 172 } 173 174 // These lines are for tokens that easily correspond to one character. We 175 // just set the token. 176 if (c >= '"' && c <= '~' && 177 (l->t = dc_lex_tokens[(c - '"')]) != BC_LEX_INVALID) 178 { 179 return; 180 } 181 182 // This is the workhorse of the lexer when more complicated things are 183 // needed. 184 switch (c) { 185 186 case '\0': 187 case '\n': 188 case '\t': 189 case '\v': 190 case '\f': 191 case '\r': 192 case ' ': 193 { 194 bc_lex_commonTokens(l, c); 195 break; 196 } 197 198 // We don't have the ! command, so we always expect certain things 199 // after the exclamation point. 200 case '!': 201 { 202 c2 = l->buf[l->i]; 203 204 if (c2 == '=') l->t = BC_LEX_OP_REL_NE; 205 else if (c2 == '<') l->t = BC_LEX_OP_REL_LE; 206 else if (c2 == '>') l->t = BC_LEX_OP_REL_GE; 207 else bc_lex_invalidChar(l, c); 208 209 l->i += 1; 210 211 break; 212 } 213 214 case '#': 215 { 216 bc_lex_lineComment(l); 217 break; 218 } 219 220 case '.': 221 { 222 c2 = l->buf[l->i]; 223 224 // If the character after is a number, this dot is part of a number. 225 // Otherwise, it's the BSD dot (equivalent to last). 226 if (BC_NO_ERR(BC_LEX_NUM_CHAR(c2, true, false))) 227 bc_lex_number(l, c); 228 else bc_lex_invalidChar(l, c); 229 230 break; 231 } 232 233 case '0': 234 case '1': 235 case '2': 236 case '3': 237 case '4': 238 case '5': 239 case '6': 240 case '7': 241 case '8': 242 case '9': 243 case 'A': 244 case 'B': 245 case 'C': 246 case 'D': 247 case 'E': 248 case 'F': 249 { 250 bc_lex_number(l, c); 251 break; 252 } 253 254 case 'g': 255 { 256 c2 = l->buf[l->i]; 257 258 if (c2 == 'l') l->t = BC_LEX_KW_LINE_LENGTH; 259 else if (c2 == 'z') l->t = BC_LEX_KW_LEADING_ZERO; 260 else bc_lex_invalidChar(l, c2); 261 262 l->i += 1; 263 264 break; 265 } 266 267 case '[': 268 { 269 dc_lex_string(l); 270 break; 271 } 272 273 default: 274 { 275 bc_lex_invalidChar(l, c); 276 } 277 } 278 } 279 #endif // DC_ENABLED 280