1 /* 2 * ***************************************************************************** 3 * 4 * SPDX-License-Identifier: BSD-2-Clause 5 * 6 * Copyright (c) 2018-2024 Gavin D. Howard and contributors. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions are met: 10 * 11 * * Redistributions of source code must retain the above copyright notice, this 12 * list of conditions and the following disclaimer. 13 * 14 * * Redistributions in binary form must reproduce the above copyright notice, 15 * this list of conditions and the following disclaimer in the documentation 16 * and/or other materials provided with the distribution. 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 19 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 21 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE 22 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 23 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 24 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 25 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 26 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 27 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 28 * POSSIBILITY OF SUCH DAMAGE. 29 * 30 * ***************************************************************************** 31 * 32 * Definitions for bc's lexer. 33 * 34 */ 35 36 #ifndef BC_LEX_H 37 #define BC_LEX_H 38 39 #include <stdbool.h> 40 #include <stddef.h> 41 42 #include <status.h> 43 #include <vector.h> 44 #include <lang.h> 45 46 /** 47 * A convenience macro for throwing errors in lex code. This takes care of 48 * plumbing like passing in the current line the lexer is on. 49 * @param l The lexer. 50 * @param e The error. 51 */ 52 #if BC_DEBUG 53 #define bc_lex_err(l, e) (bc_vm_handleError((e), __FILE__, __LINE__, (l)->line)) 54 #else // BC_DEBUG 55 #define bc_lex_err(l, e) (bc_vm_handleError((e), (l)->line)) 56 #endif // BC_DEBUG 57 58 /** 59 * A convenience macro for throwing errors in lex code. This takes care of 60 * plumbing like passing in the current line the lexer is on. 61 * @param l The lexer. 62 * @param e The error. 63 */ 64 #if BC_DEBUG 65 #define bc_lex_verr(l, e, ...) \ 66 (bc_vm_handleError((e), __FILE__, __LINE__, (l)->line, __VA_ARGS__)) 67 #else // BC_DEBUG 68 #define bc_lex_verr(l, e, ...) (bc_vm_handleError((e), (l)->line, __VA_ARGS__)) 69 #endif // BC_DEBUG 70 71 // BC_LEX_NEG_CHAR returns the char that corresponds to negative for the 72 // current calculator. 73 // 74 // BC_LEX_LAST_NUM_CHAR returns the char that corresponds to the last valid 75 // char for numbers. In bc and dc, capital letters are part of numbers, to a 76 // point. (dc only goes up to hex, so its last valid char is 'F'.) 77 #if BC_ENABLED 78 79 #if DC_ENABLED 80 #define BC_LEX_NEG_CHAR (BC_IS_BC ? '-' : '_') 81 #define BC_LEX_LAST_NUM_CHAR (BC_IS_BC ? 'Z' : 'F') 82 #else // DC_ENABLED 83 #define BC_LEX_NEG_CHAR ('-') 84 #define BC_LEX_LAST_NUM_CHAR ('Z') 85 #endif // DC_ENABLED 86 87 #else // BC_ENABLED 88 89 #define BC_LEX_NEG_CHAR ('_') 90 #define BC_LEX_LAST_NUM_CHAR ('F') 91 92 #endif // BC_ENABLED 93 94 /** 95 * Returns true if c is a valid number character. 96 * @param c The char to check. 97 * @param pt If a decimal point has already been seen. 98 * @param int_only True if the number is expected to be an int only, false if 99 * non-integers are allowed. 100 * @return True if @a c is a valid number character. 101 */ 102 #define BC_LEX_NUM_CHAR(c, pt, int_only) \ 103 (isdigit(c) != 0 || ((c) >= 'A' && (c) <= BC_LEX_LAST_NUM_CHAR) || \ 104 ((c) == '.' && !(pt) && !(int_only))) 105 106 /// An enum of lex token types. 107 typedef enum BcLexType 108 { 109 /// End of file. 110 BC_LEX_EOF, 111 112 /// Marker for invalid tokens, used by bc and dc for const data. 113 BC_LEX_INVALID, 114 115 #if BC_ENABLED 116 117 /// Increment operator. 118 BC_LEX_OP_INC, 119 120 /// Decrement operator. 121 BC_LEX_OP_DEC, 122 123 #endif // BC_ENABLED 124 125 /// BC_LEX_NEG is not used in lexing; it is only for parsing. The lexer 126 /// marks all '-' characters as BC_LEX_OP_MINUS, but the parser needs to be 127 /// able to distinguish them. 128 BC_LEX_NEG, 129 130 /// Boolean not. 131 BC_LEX_OP_BOOL_NOT, 132 133 #if BC_ENABLE_EXTRA_MATH 134 135 /// Truncation operator. 136 BC_LEX_OP_TRUNC, 137 138 #endif // BC_ENABLE_EXTRA_MATH 139 140 /// Power operator. 141 BC_LEX_OP_POWER, 142 143 /// Multiplication operator. 144 BC_LEX_OP_MULTIPLY, 145 146 /// Division operator. 147 BC_LEX_OP_DIVIDE, 148 149 /// Modulus operator. 150 BC_LEX_OP_MODULUS, 151 152 /// Addition operator. 153 BC_LEX_OP_PLUS, 154 155 /// Subtraction operator. 156 BC_LEX_OP_MINUS, 157 158 #if BC_ENABLE_EXTRA_MATH 159 160 /// Places (truncate or extend) operator. 161 BC_LEX_OP_PLACES, 162 163 /// Left (decimal) shift operator. 164 BC_LEX_OP_LSHIFT, 165 166 /// Right (decimal) shift operator. 167 BC_LEX_OP_RSHIFT, 168 169 #endif // BC_ENABLE_EXTRA_MATH 170 171 /// Equal operator. 172 BC_LEX_OP_REL_EQ, 173 174 /// Less than or equal operator. 175 BC_LEX_OP_REL_LE, 176 177 /// Greater than or equal operator. 178 BC_LEX_OP_REL_GE, 179 180 /// Not equal operator. 181 BC_LEX_OP_REL_NE, 182 183 /// Less than operator. 184 BC_LEX_OP_REL_LT, 185 186 /// Greater than operator. 187 BC_LEX_OP_REL_GT, 188 189 /// Boolean or operator. 190 BC_LEX_OP_BOOL_OR, 191 192 /// Boolean and operator. 193 BC_LEX_OP_BOOL_AND, 194 195 #if BC_ENABLED 196 197 /// Power assignment operator. 198 BC_LEX_OP_ASSIGN_POWER, 199 200 /// Multiplication assignment operator. 201 BC_LEX_OP_ASSIGN_MULTIPLY, 202 203 /// Division assignment operator. 204 BC_LEX_OP_ASSIGN_DIVIDE, 205 206 /// Modulus assignment operator. 207 BC_LEX_OP_ASSIGN_MODULUS, 208 209 /// Addition assignment operator. 210 BC_LEX_OP_ASSIGN_PLUS, 211 212 /// Subtraction assignment operator. 213 BC_LEX_OP_ASSIGN_MINUS, 214 215 #if BC_ENABLE_EXTRA_MATH 216 217 /// Places (truncate or extend) assignment operator. 218 BC_LEX_OP_ASSIGN_PLACES, 219 220 /// Left (decimal) shift assignment operator. 221 BC_LEX_OP_ASSIGN_LSHIFT, 222 223 /// Right (decimal) shift assignment operator. 224 BC_LEX_OP_ASSIGN_RSHIFT, 225 226 #endif // BC_ENABLE_EXTRA_MATH 227 #endif // BC_ENABLED 228 229 /// Assignment operator. 230 BC_LEX_OP_ASSIGN, 231 232 /// Newline. 233 BC_LEX_NLINE, 234 235 /// Whitespace. 236 BC_LEX_WHITESPACE, 237 238 /// Left parenthesis. 239 BC_LEX_LPAREN, 240 241 /// Right parenthesis. 242 BC_LEX_RPAREN, 243 244 /// Left bracket. 245 BC_LEX_LBRACKET, 246 247 /// Comma. 248 BC_LEX_COMMA, 249 250 /// Right bracket. 251 BC_LEX_RBRACKET, 252 253 /// Left brace. 254 BC_LEX_LBRACE, 255 256 /// Semicolon. 257 BC_LEX_SCOLON, 258 259 /// Right brace. 260 BC_LEX_RBRACE, 261 262 /// String. 263 BC_LEX_STR, 264 265 /// Identifier/name. 266 BC_LEX_NAME, 267 268 /// Constant number. 269 BC_LEX_NUMBER, 270 271 // These keywords are in the order they are in for a reason. Don't change 272 // the order unless you want a bunch of weird failures in the test suite. 273 // In fact, almost all of these tokens are in a specific order for a reason. 274 275 #if BC_ENABLED 276 277 /// bc auto keyword. 278 BC_LEX_KW_AUTO, 279 280 /// bc break keyword. 281 BC_LEX_KW_BREAK, 282 283 /// bc continue keyword. 284 BC_LEX_KW_CONTINUE, 285 286 /// bc define keyword. 287 BC_LEX_KW_DEFINE, 288 289 /// bc for keyword. 290 BC_LEX_KW_FOR, 291 292 /// bc if keyword. 293 BC_LEX_KW_IF, 294 295 /// bc limits keyword. 296 BC_LEX_KW_LIMITS, 297 298 /// bc return keyword. 299 BC_LEX_KW_RETURN, 300 301 /// bc while keyword. 302 BC_LEX_KW_WHILE, 303 304 /// bc halt keyword. 305 BC_LEX_KW_HALT, 306 307 /// bc last keyword. 308 BC_LEX_KW_LAST, 309 310 #endif // BC_ENABLED 311 312 /// bc ibase keyword. 313 BC_LEX_KW_IBASE, 314 315 /// bc obase keyword. 316 BC_LEX_KW_OBASE, 317 318 /// bc scale keyword. 319 BC_LEX_KW_SCALE, 320 321 #if BC_ENABLE_EXTRA_MATH 322 323 /// bc seed keyword. 324 BC_LEX_KW_SEED, 325 326 #endif // BC_ENABLE_EXTRA_MATH 327 328 /// bc length keyword. 329 BC_LEX_KW_LENGTH, 330 331 /// bc print keyword. 332 BC_LEX_KW_PRINT, 333 334 /// bc sqrt keyword. 335 BC_LEX_KW_SQRT, 336 337 /// bc abs keyword. 338 BC_LEX_KW_ABS, 339 340 /// bc is_number keyword. 341 BC_LEX_KW_IS_NUMBER, 342 343 /// bc is_string keyword. 344 BC_LEX_KW_IS_STRING, 345 346 #if BC_ENABLE_EXTRA_MATH 347 348 /// bc irand keyword. 349 BC_LEX_KW_IRAND, 350 351 #endif // BC_ENABLE_EXTRA_MATH 352 353 /// bc asciffy keyword. 354 BC_LEX_KW_ASCIIFY, 355 356 /// bc modexp keyword. 357 BC_LEX_KW_MODEXP, 358 359 /// bc divmod keyword. 360 BC_LEX_KW_DIVMOD, 361 362 /// bc quit keyword. 363 BC_LEX_KW_QUIT, 364 365 /// bc read keyword. 366 BC_LEX_KW_READ, 367 368 #if BC_ENABLE_EXTRA_MATH 369 370 /// bc rand keyword. 371 BC_LEX_KW_RAND, 372 373 #endif // BC_ENABLE_EXTRA_MATH 374 375 /// bc maxibase keyword. 376 BC_LEX_KW_MAXIBASE, 377 378 /// bc maxobase keyword. 379 BC_LEX_KW_MAXOBASE, 380 381 /// bc maxscale keyword. 382 BC_LEX_KW_MAXSCALE, 383 384 #if BC_ENABLE_EXTRA_MATH 385 386 /// bc maxrand keyword. 387 BC_LEX_KW_MAXRAND, 388 389 #endif // BC_ENABLE_EXTRA_MATH 390 391 /// bc line_length keyword. 392 BC_LEX_KW_LINE_LENGTH, 393 394 #if BC_ENABLED 395 396 /// bc global_stacks keyword. 397 BC_LEX_KW_GLOBAL_STACKS, 398 399 #endif // BC_ENABLED 400 401 /// bc leading_zero keyword. 402 BC_LEX_KW_LEADING_ZERO, 403 404 /// bc stream keyword. 405 BC_LEX_KW_STREAM, 406 407 /// bc else keyword. 408 BC_LEX_KW_ELSE, 409 410 #if DC_ENABLED 411 412 /// dc extended registers keyword. 413 BC_LEX_EXTENDED_REGISTERS, 414 415 /// A special token for dc to calculate equal without a register. 416 BC_LEX_EQ_NO_REG, 417 418 /// Colon (array) operator. 419 BC_LEX_COLON, 420 421 /// Execute command. 422 BC_LEX_EXECUTE, 423 424 /// Print stack command. 425 BC_LEX_PRINT_STACK, 426 427 /// Clear stack command. 428 BC_LEX_CLEAR_STACK, 429 430 /// Register stack level command. 431 BC_LEX_REG_STACK_LEVEL, 432 433 /// Main stack level command. 434 BC_LEX_STACK_LEVEL, 435 436 /// Duplicate command. 437 BC_LEX_DUPLICATE, 438 439 /// Swap (reverse) command. 440 BC_LEX_SWAP, 441 442 /// Pop (remove) command. 443 BC_LEX_POP, 444 445 /// Store ibase command. 446 BC_LEX_STORE_IBASE, 447 448 /// Store obase command. 449 BC_LEX_STORE_OBASE, 450 451 /// Store scale command. 452 BC_LEX_STORE_SCALE, 453 454 #if BC_ENABLE_EXTRA_MATH 455 456 /// Store seed command. 457 BC_LEX_STORE_SEED, 458 459 #endif // BC_ENABLE_EXTRA_MATH 460 461 /// Load variable onto stack command. 462 BC_LEX_LOAD, 463 464 /// Pop off of variable stack onto results stack command. 465 BC_LEX_LOAD_POP, 466 467 /// Push onto variable stack command. 468 BC_LEX_STORE_PUSH, 469 470 /// Print with pop command. 471 BC_LEX_PRINT_POP, 472 473 /// Parameterized quit command. 474 BC_LEX_NQUIT, 475 476 /// Execution stack depth command. 477 BC_LEX_EXEC_STACK_LENGTH, 478 479 /// Scale of number command. This is needed specifically for dc because bc 480 /// parses the scale function in parts. 481 BC_LEX_SCALE_FACTOR, 482 483 /// Array length command. This is needed specifically for dc because bc 484 /// just reuses its length keyword. 485 BC_LEX_ARRAY_LENGTH, 486 487 #endif // DC_ENABLED 488 489 } BcLexType; 490 491 struct BcLex; 492 493 /** 494 * A function pointer to call when another token is needed. Mostly called by the 495 * parser. 496 * @param l The lexer. 497 */ 498 typedef void (*BcLexNext)(struct BcLex* l); 499 500 /// The lexer. 501 typedef struct BcLex 502 { 503 /// A pointer to the text to lex. 504 const char* buf; 505 506 /// The current index into buf. 507 size_t i; 508 509 /// The current line. 510 size_t line; 511 512 /// The length of buf. 513 size_t len; 514 515 /// The current token. 516 BcLexType t; 517 518 /// The previous token. 519 BcLexType last; 520 521 /// A string to store extra data for tokens. For example, the @a BC_LEX_STR 522 /// token really needs to store the actual string, and numbers also need the 523 /// string. 524 BcVec str; 525 526 /// The mode the lexer is in. 527 BcMode mode; 528 529 } BcLex; 530 531 /** 532 * Initializes a lexer. 533 * @param l The lexer to initialize. 534 */ 535 void 536 bc_lex_init(BcLex* l); 537 538 /** 539 * Frees a lexer. This is not guarded by #if BC_DEBUG because a separate 540 * parser is created at runtime to parse read() expressions and dc strings, and 541 * that parser needs a lexer. 542 * @param l The lexer to free. 543 */ 544 void 545 bc_lex_free(BcLex* l); 546 547 /** 548 * Sets the filename that the lexer will be lexing. 549 * @param l The lexer. 550 * @param file The filename that the lexer will lex. 551 */ 552 void 553 bc_lex_file(BcLex* l, const char* file); 554 555 /** 556 * Sets the text the lexer will lex. 557 * @param l The lexer. 558 * @param text The text to lex. 559 * @param mode The mode to lex in. 560 */ 561 void 562 bc_lex_text(BcLex* l, const char* text, BcMode mode); 563 564 /** 565 * Generic next function for the parser to call. It takes care of calling the 566 * correct @a BcLexNext function and consuming whitespace. 567 * @param l The lexer. 568 */ 569 void 570 bc_lex_next(BcLex* l); 571 572 /** 573 * Lexes a line comment (one beginning with '#' and going to a newline). 574 * @param l The lexer. 575 */ 576 void 577 bc_lex_lineComment(BcLex* l); 578 579 /** 580 * Lexes a general comment (C-style comment). 581 * @param l The lexer. 582 */ 583 void 584 bc_lex_comment(BcLex* l); 585 586 /** 587 * Lexes whitespace, finding as much as possible. 588 * @param l The lexer. 589 */ 590 void 591 bc_lex_whitespace(BcLex* l); 592 593 /** 594 * Lexes a number that begins with char @a start. This takes care of parsing 595 * numbers in scientific and engineering notations. 596 * @param l The lexer. 597 * @param start The starting char of the number. To detect a number and call 598 * this function, the lexer had to eat the first char. It fixes 599 * that by passing it in. 600 */ 601 void 602 bc_lex_number(BcLex* l, char start); 603 604 /** 605 * Lexes a name/identifier. 606 * @param l The lexer. 607 */ 608 void 609 bc_lex_name(BcLex* l); 610 611 /** 612 * Lexes common whitespace characters. 613 * @param l The lexer. 614 * @param c The character to lex. 615 */ 616 void 617 bc_lex_commonTokens(BcLex* l, char c); 618 619 /** 620 * Throws a parse error because char @a c was invalid. 621 * @param l The lexer. 622 * @param c The problem character. 623 */ 624 void 625 bc_lex_invalidChar(BcLex* l, char c); 626 627 /** 628 * Reads a line from stdin and puts it into the lexer's buffer. 629 * @param l The lexer. 630 */ 631 bool 632 bc_lex_readLine(BcLex* l); 633 634 #endif // BC_LEX_H 635