1 /* 2 * ***************************************************************************** 3 * 4 * SPDX-License-Identifier: BSD-2-Clause 5 * 6 * Copyright (c) 2018-2023 Gavin D. Howard and contributors. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions are met: 10 * 11 * * Redistributions of source code must retain the above copyright notice, this 12 * list of conditions and the following disclaimer. 13 * 14 * * Redistributions in binary form must reproduce the above copyright notice, 15 * this list of conditions and the following disclaimer in the documentation 16 * and/or other materials provided with the distribution. 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 19 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 21 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE 22 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 23 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 24 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 25 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 26 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 27 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 28 * POSSIBILITY OF SUCH DAMAGE. 29 * 30 * ***************************************************************************** 31 * 32 * The parser for dc. 33 * 34 */ 35 36 #if DC_ENABLED 37 38 #include <assert.h> 39 #include <stdlib.h> 40 #include <string.h> 41 #include <setjmp.h> 42 43 #include <dc.h> 44 #include <program.h> 45 #include <vm.h> 46 47 /** 48 * Parses a register. The lexer should have already lexed the true name of the 49 * register, per extended registers and such. 50 * @param p The parser. 51 * @param var True if the parser is for a variable, false otherwise. 52 */ 53 static void 54 dc_parse_register(BcParse* p, bool var) 55 { 56 bc_lex_next(&p->l); 57 if (p->l.t != BC_LEX_NAME) bc_parse_err(p, BC_ERR_PARSE_TOKEN); 58 59 bc_parse_pushName(p, p->l.str.v, var); 60 } 61 62 /** 63 * Parses a dc string. 64 * @param p The parser. 65 */ 66 static inline void 67 dc_parse_string(BcParse* p) 68 { 69 bc_parse_addString(p); 70 bc_lex_next(&p->l); 71 } 72 73 /** 74 * Parses a token that requires a memory operation, like load or store. 75 * @param p The parser. 76 * @param inst The instruction to push for the memory operation. 77 * @param name Whether the load or store is to a variable or array, and not to 78 * a global. 79 * @param store True if the operation is a store, false otherwise. 80 */ 81 static void 82 dc_parse_mem(BcParse* p, uchar inst, bool name, bool store) 83 { 84 // Push the instruction. 85 bc_parse_push(p, inst); 86 87 // Parse the register if necessary. 88 if (name) dc_parse_register(p, inst != BC_INST_ARRAY_ELEM); 89 90 // Stores use the bc assign infrastructure, but they need to do a swap 91 // first. 92 if (store) 93 { 94 bc_parse_push(p, BC_INST_SWAP); 95 bc_parse_push(p, BC_INST_ASSIGN_NO_VAL); 96 } 97 98 bc_lex_next(&p->l); 99 } 100 101 /** 102 * Parses a conditional execution instruction. 103 * @param p The parser. 104 * @param inst The instruction for the condition. 105 */ 106 static void 107 dc_parse_cond(BcParse* p, uchar inst) 108 { 109 // Push the instruction for the condition and the conditional execution. 110 bc_parse_push(p, inst); 111 bc_parse_push(p, BC_INST_EXEC_COND); 112 113 // Parse the register. 114 dc_parse_register(p, true); 115 116 bc_lex_next(&p->l); 117 118 // If the next token is an else, parse the else. 119 if (p->l.t == BC_LEX_KW_ELSE) 120 { 121 dc_parse_register(p, true); 122 bc_lex_next(&p->l); 123 } 124 // Otherwise, push a marker for no else. 125 else bc_parse_pushIndex(p, SIZE_MAX); 126 } 127 128 /** 129 * Parses a token for dc. 130 * @param p The parser. 131 * @param t The token to parse. 132 * @param flags The flags that say what is allowed or not. 133 */ 134 static void 135 dc_parse_token(BcParse* p, BcLexType t, uint8_t flags) 136 { 137 uchar inst; 138 bool assign, get_token = false; 139 140 switch (t) 141 { 142 case BC_LEX_OP_REL_EQ: 143 case BC_LEX_OP_REL_LE: 144 case BC_LEX_OP_REL_GE: 145 case BC_LEX_OP_REL_NE: 146 case BC_LEX_OP_REL_LT: 147 case BC_LEX_OP_REL_GT: 148 { 149 inst = (uchar) (t - BC_LEX_OP_REL_EQ + BC_INST_REL_EQ); 150 dc_parse_cond(p, inst); 151 break; 152 } 153 154 case BC_LEX_SCOLON: 155 case BC_LEX_COLON: 156 { 157 dc_parse_mem(p, BC_INST_ARRAY_ELEM, true, t == BC_LEX_COLON); 158 break; 159 } 160 161 case BC_LEX_STR: 162 { 163 dc_parse_string(p); 164 break; 165 } 166 167 case BC_LEX_NEG: 168 { 169 // This tells us whether or not the neg is for a command or at the 170 // beginning of a number. If it's a command, push it. Otherwise, 171 // fallthrough and parse the number. 172 if (dc_lex_negCommand(&p->l)) 173 { 174 bc_parse_push(p, BC_INST_NEG); 175 get_token = true; 176 break; 177 } 178 179 bc_lex_next(&p->l); 180 181 // Fallthrough. 182 BC_FALLTHROUGH 183 } 184 185 case BC_LEX_NUMBER: 186 { 187 bc_parse_number(p); 188 189 // Push the negative instruction if we fell through from above. 190 if (t == BC_LEX_NEG) bc_parse_push(p, BC_INST_NEG); 191 get_token = true; 192 193 break; 194 } 195 196 case BC_LEX_KW_READ: 197 { 198 // Make sure the read is not recursive. 199 if (BC_ERR(flags & BC_PARSE_NOREAD)) 200 { 201 bc_parse_err(p, BC_ERR_EXEC_REC_READ); 202 } 203 else bc_parse_push(p, BC_INST_READ); 204 205 get_token = true; 206 207 break; 208 } 209 210 case BC_LEX_OP_ASSIGN: 211 case BC_LEX_STORE_PUSH: 212 { 213 assign = t == BC_LEX_OP_ASSIGN; 214 inst = assign ? BC_INST_VAR : BC_INST_PUSH_TO_VAR; 215 dc_parse_mem(p, inst, true, assign); 216 break; 217 } 218 219 case BC_LEX_LOAD: 220 case BC_LEX_LOAD_POP: 221 { 222 inst = t == BC_LEX_LOAD_POP ? BC_INST_PUSH_VAR : BC_INST_LOAD; 223 dc_parse_mem(p, inst, true, false); 224 break; 225 } 226 227 case BC_LEX_REG_STACK_LEVEL: 228 { 229 dc_parse_mem(p, BC_INST_REG_STACK_LEN, true, false); 230 break; 231 } 232 233 case BC_LEX_STORE_IBASE: 234 case BC_LEX_STORE_OBASE: 235 case BC_LEX_STORE_SCALE: 236 #if BC_ENABLE_EXTRA_MATH 237 case BC_LEX_STORE_SEED: 238 #endif // BC_ENABLE_EXTRA_MATH 239 { 240 inst = (uchar) (t - BC_LEX_STORE_IBASE + BC_INST_IBASE); 241 dc_parse_mem(p, inst, false, true); 242 break; 243 } 244 245 case BC_LEX_ARRAY_LENGTH: 246 { 247 // Need to push the array first, based on how length is implemented. 248 bc_parse_push(p, BC_INST_ARRAY); 249 dc_parse_register(p, false); 250 251 bc_parse_push(p, BC_INST_LENGTH); 252 253 get_token = true; 254 255 break; 256 } 257 258 case BC_LEX_EOF: 259 case BC_LEX_INVALID: 260 #if BC_ENABLED 261 case BC_LEX_OP_INC: 262 case BC_LEX_OP_DEC: 263 #endif // BC_ENABLED 264 case BC_LEX_OP_BOOL_NOT: 265 #if BC_ENABLE_EXTRA_MATH 266 case BC_LEX_OP_TRUNC: 267 #endif // BC_ENABLE_EXTRA_MATH 268 case BC_LEX_OP_POWER: 269 case BC_LEX_OP_MULTIPLY: 270 case BC_LEX_OP_DIVIDE: 271 case BC_LEX_OP_MODULUS: 272 case BC_LEX_OP_PLUS: 273 case BC_LEX_OP_MINUS: 274 #if BC_ENABLE_EXTRA_MATH 275 case BC_LEX_OP_PLACES: 276 case BC_LEX_OP_LSHIFT: 277 case BC_LEX_OP_RSHIFT: 278 #endif // BC_ENABLE_EXTRA_MATH 279 case BC_LEX_OP_BOOL_OR: 280 case BC_LEX_OP_BOOL_AND: 281 #if BC_ENABLED 282 case BC_LEX_OP_ASSIGN_POWER: 283 case BC_LEX_OP_ASSIGN_MULTIPLY: 284 case BC_LEX_OP_ASSIGN_DIVIDE: 285 case BC_LEX_OP_ASSIGN_MODULUS: 286 case BC_LEX_OP_ASSIGN_PLUS: 287 case BC_LEX_OP_ASSIGN_MINUS: 288 #if BC_ENABLE_EXTRA_MATH 289 case BC_LEX_OP_ASSIGN_PLACES: 290 case BC_LEX_OP_ASSIGN_LSHIFT: 291 case BC_LEX_OP_ASSIGN_RSHIFT: 292 #endif // BC_ENABLE_EXTRA_MATH 293 #endif // BC_ENABLED 294 case BC_LEX_NLINE: 295 case BC_LEX_WHITESPACE: 296 case BC_LEX_LPAREN: 297 case BC_LEX_RPAREN: 298 case BC_LEX_LBRACKET: 299 case BC_LEX_COMMA: 300 case BC_LEX_RBRACKET: 301 case BC_LEX_LBRACE: 302 case BC_LEX_NAME: 303 case BC_LEX_RBRACE: 304 #if BC_ENABLED 305 case BC_LEX_KW_AUTO: 306 case BC_LEX_KW_BREAK: 307 case BC_LEX_KW_CONTINUE: 308 case BC_LEX_KW_DEFINE: 309 case BC_LEX_KW_FOR: 310 case BC_LEX_KW_IF: 311 case BC_LEX_KW_LIMITS: 312 case BC_LEX_KW_RETURN: 313 case BC_LEX_KW_WHILE: 314 case BC_LEX_KW_HALT: 315 case BC_LEX_KW_LAST: 316 #endif // BC_ENABLED 317 case BC_LEX_KW_IBASE: 318 case BC_LEX_KW_OBASE: 319 case BC_LEX_KW_SCALE: 320 #if BC_ENABLE_EXTRA_MATH 321 case BC_LEX_KW_SEED: 322 #endif // BC_ENABLE_EXTRA_MATH 323 case BC_LEX_KW_LENGTH: 324 case BC_LEX_KW_PRINT: 325 case BC_LEX_KW_SQRT: 326 case BC_LEX_KW_ABS: 327 case BC_LEX_KW_IS_NUMBER: 328 case BC_LEX_KW_IS_STRING: 329 #if BC_ENABLE_EXTRA_MATH 330 case BC_LEX_KW_IRAND: 331 #endif // BC_ENABLE_EXTRA_MATH 332 case BC_LEX_KW_ASCIIFY: 333 case BC_LEX_KW_MODEXP: 334 case BC_LEX_KW_DIVMOD: 335 case BC_LEX_KW_QUIT: 336 #if BC_ENABLE_EXTRA_MATH 337 case BC_LEX_KW_RAND: 338 #endif // BC_ENABLE_EXTRA_MATH 339 case BC_LEX_KW_MAXIBASE: 340 case BC_LEX_KW_MAXOBASE: 341 case BC_LEX_KW_MAXSCALE: 342 #if BC_ENABLE_EXTRA_MATH 343 case BC_LEX_KW_MAXRAND: 344 #endif // BC_ENABLE_EXTRA_MATH 345 case BC_LEX_KW_LINE_LENGTH: 346 #if BC_ENABLED 347 case BC_LEX_KW_GLOBAL_STACKS: 348 #endif // BC_ENABLED 349 case BC_LEX_KW_LEADING_ZERO: 350 case BC_LEX_KW_STREAM: 351 case BC_LEX_KW_ELSE: 352 case BC_LEX_EQ_NO_REG: 353 case BC_LEX_EXECUTE: 354 case BC_LEX_PRINT_STACK: 355 case BC_LEX_CLEAR_STACK: 356 case BC_LEX_STACK_LEVEL: 357 case BC_LEX_DUPLICATE: 358 case BC_LEX_SWAP: 359 case BC_LEX_POP: 360 case BC_LEX_PRINT_POP: 361 case BC_LEX_NQUIT: 362 case BC_LEX_EXEC_STACK_LENGTH: 363 case BC_LEX_SCALE_FACTOR: 364 { 365 // All other tokens should be taken care of by the caller, or they 366 // actually *are* invalid. 367 bc_parse_err(p, BC_ERR_PARSE_TOKEN); 368 } 369 } 370 371 if (get_token) bc_lex_next(&p->l); 372 } 373 374 void 375 dc_parse_expr(BcParse* p, uint8_t flags) 376 { 377 BcInst inst; 378 BcLexType t; 379 bool need_expr, have_expr = false; 380 381 need_expr = ((flags & BC_PARSE_NOREAD) != 0); 382 383 // dc can just keep parsing forever basically, unlike bc, which has to have 384 // a whole bunch of complicated nonsense because its language was horribly 385 // designed. 386 387 // While we don't have EOF... 388 while ((t = p->l.t) != BC_LEX_EOF) 389 { 390 // Eat newline. 391 if (t == BC_LEX_NLINE) 392 { 393 bc_lex_next(&p->l); 394 continue; 395 } 396 397 // Get the instruction that corresponds to the token. 398 inst = dc_parse_insts[t]; 399 400 // If the instruction is invalid, that means we have to do some harder 401 // parsing. So if not invalid, just push the instruction; otherwise, 402 // parse the token. 403 if (inst != BC_INST_INVALID) 404 { 405 bc_parse_push(p, inst); 406 bc_lex_next(&p->l); 407 } 408 else dc_parse_token(p, t, flags); 409 410 have_expr = true; 411 } 412 413 // If we don't have an expression and need one, barf. Otherwise, just push a 414 // BC_INST_POP_EXEC if we have EOF and BC_PARSE_NOCALL, which dc uses to 415 // indicate that it is executing a string. 416 if (BC_ERR(need_expr && !have_expr)) bc_err(BC_ERR_EXEC_READ_EXPR); 417 else if (p->l.t == BC_LEX_EOF && (flags & BC_PARSE_NOCALL)) 418 { 419 bc_parse_push(p, BC_INST_POP_EXEC); 420 } 421 } 422 423 void 424 dc_parse_parse(BcParse* p) 425 { 426 assert(p != NULL); 427 428 BC_SETJMP_LOCKED(vm, exit); 429 430 // If we have EOF, someone called this function one too many times. 431 // Otherwise, parse. 432 if (BC_ERR(p->l.t == BC_LEX_EOF)) bc_parse_err(p, BC_ERR_PARSE_EOF); 433 else dc_parse_expr(p, 0); 434 435 exit: 436 437 // Need to reset if there was an error. 438 if (BC_SIG_EXC(vm)) bc_parse_reset(p); 439 440 BC_LONGJMP_CONT(vm); 441 BC_SIG_MAYLOCK; 442 } 443 #endif // DC_ENABLED 444