1 /* 2 * ***************************************************************************** 3 * 4 * SPDX-License-Identifier: BSD-2-Clause 5 * 6 * Copyright (c) 2018-2023 Gavin D. Howard and contributors. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions are met: 10 * 11 * * Redistributions of source code must retain the above copyright notice, this 12 * list of conditions and the following disclaimer. 13 * 14 * * Redistributions in binary form must reproduce the above copyright notice, 15 * this list of conditions and the following disclaimer in the documentation 16 * and/or other materials provided with the distribution. 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 19 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 21 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE 22 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 23 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 24 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 25 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 26 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 27 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 28 * POSSIBILITY OF SUCH DAMAGE. 29 * 30 * ***************************************************************************** 31 * 32 * Definitions for bc's lexer. 33 * 34 */ 35 36 #ifndef BC_LEX_H 37 #define BC_LEX_H 38 39 #include <stdbool.h> 40 #include <stddef.h> 41 42 #include <status.h> 43 #include <vector.h> 44 #include <lang.h> 45 46 /** 47 * A convenience macro for throwing errors in lex code. This takes care of 48 * plumbing like passing in the current line the lexer is on. 49 * @param l The lexer. 50 * @param e The error. 51 */ 52 #ifndef NDEBUG 53 #define bc_lex_err(l, e) (bc_vm_handleError((e), __FILE__, __LINE__, (l)->line)) 54 #else // NDEBUG 55 #define bc_lex_err(l, e) (bc_vm_handleError((e), (l)->line)) 56 #endif // NDEBUG 57 58 /** 59 * A convenience macro for throwing errors in lex code. This takes care of 60 * plumbing like passing in the current line the lexer is on. 61 * @param l The lexer. 62 * @param e The error. 63 */ 64 #ifndef NDEBUG 65 #define bc_lex_verr(l, e, ...) \ 66 (bc_vm_handleError((e), __FILE__, __LINE__, (l)->line, __VA_ARGS__)) 67 #else // NDEBUG 68 #define bc_lex_verr(l, e, ...) (bc_vm_handleError((e), (l)->line, __VA_ARGS__)) 69 #endif // NDEBUG 70 71 // BC_LEX_NEG_CHAR returns the char that corresponds to negative for the 72 // current calculator. 73 // 74 // BC_LEX_LAST_NUM_CHAR returns the char that corresponds to the last valid 75 // char for numbers. In bc and dc, capital letters are part of numbers, to a 76 // point. (dc only goes up to hex, so its last valid char is 'F'.) 77 #if BC_ENABLED 78 79 #if DC_ENABLED 80 #define BC_LEX_NEG_CHAR (BC_IS_BC ? '-' : '_') 81 #define BC_LEX_LAST_NUM_CHAR (BC_IS_BC ? 'Z' : 'F') 82 #else // DC_ENABLED 83 #define BC_LEX_NEG_CHAR ('-') 84 #define BC_LEX_LAST_NUM_CHAR ('Z') 85 #endif // DC_ENABLED 86 87 #else // BC_ENABLED 88 89 #define BC_LEX_NEG_CHAR ('_') 90 #define BC_LEX_LAST_NUM_CHAR ('F') 91 92 #endif // BC_ENABLED 93 94 /** 95 * Returns true if c is a valid number character. 96 * @param c The char to check. 97 * @param pt If a decimal point has already been seen. 98 * @param int_only True if the number is expected to be an int only, false if 99 * non-integers are allowed. 100 * @return True if @a c is a valid number character. 101 */ 102 #define BC_LEX_NUM_CHAR(c, pt, int_only) \ 103 (isdigit(c) != 0 || ((c) >= 'A' && (c) <= BC_LEX_LAST_NUM_CHAR) || \ 104 ((c) == '.' && !(pt) && !(int_only))) 105 106 /// An enum of lex token types. 107 typedef enum BcLexType 108 { 109 /// End of file. 110 BC_LEX_EOF, 111 112 /// Marker for invalid tokens, used by bc and dc for const data. 113 BC_LEX_INVALID, 114 115 #if BC_ENABLED 116 117 /// Increment operator. 118 BC_LEX_OP_INC, 119 120 /// Decrement operator. 121 BC_LEX_OP_DEC, 122 123 #endif // BC_ENABLED 124 125 /// BC_LEX_NEG is not used in lexing; it is only for parsing. The lexer 126 /// marks all '-' characters as BC_LEX_OP_MINUS, but the parser needs to be 127 /// able to distinguish them. 128 BC_LEX_NEG, 129 130 /// Boolean not. 131 BC_LEX_OP_BOOL_NOT, 132 133 #if BC_ENABLE_EXTRA_MATH 134 135 /// Truncation operator. 136 BC_LEX_OP_TRUNC, 137 138 #endif // BC_ENABLE_EXTRA_MATH 139 140 /// Power operator. 141 BC_LEX_OP_POWER, 142 143 /// Multiplication operator. 144 BC_LEX_OP_MULTIPLY, 145 146 /// Division operator. 147 BC_LEX_OP_DIVIDE, 148 149 /// Modulus operator. 150 BC_LEX_OP_MODULUS, 151 152 /// Addition operator. 153 BC_LEX_OP_PLUS, 154 155 /// Subtraction operator. 156 BC_LEX_OP_MINUS, 157 158 #if BC_ENABLE_EXTRA_MATH 159 160 /// Places (truncate or extend) operator. 161 BC_LEX_OP_PLACES, 162 163 /// Left (decimal) shift operator. 164 BC_LEX_OP_LSHIFT, 165 166 /// Right (decimal) shift operator. 167 BC_LEX_OP_RSHIFT, 168 169 #endif // BC_ENABLE_EXTRA_MATH 170 171 /// Equal operator. 172 BC_LEX_OP_REL_EQ, 173 174 /// Less than or equal operator. 175 BC_LEX_OP_REL_LE, 176 177 /// Greater than or equal operator. 178 BC_LEX_OP_REL_GE, 179 180 /// Not equal operator. 181 BC_LEX_OP_REL_NE, 182 183 /// Less than operator. 184 BC_LEX_OP_REL_LT, 185 186 /// Greater than operator. 187 BC_LEX_OP_REL_GT, 188 189 /// Boolean or operator. 190 BC_LEX_OP_BOOL_OR, 191 192 /// Boolean and operator. 193 BC_LEX_OP_BOOL_AND, 194 195 #if BC_ENABLED 196 197 /// Power assignment operator. 198 BC_LEX_OP_ASSIGN_POWER, 199 200 /// Multiplication assignment operator. 201 BC_LEX_OP_ASSIGN_MULTIPLY, 202 203 /// Division assignment operator. 204 BC_LEX_OP_ASSIGN_DIVIDE, 205 206 /// Modulus assignment operator. 207 BC_LEX_OP_ASSIGN_MODULUS, 208 209 /// Addition assignment operator. 210 BC_LEX_OP_ASSIGN_PLUS, 211 212 /// Subtraction assignment operator. 213 BC_LEX_OP_ASSIGN_MINUS, 214 215 #if BC_ENABLE_EXTRA_MATH 216 217 /// Places (truncate or extend) assignment operator. 218 BC_LEX_OP_ASSIGN_PLACES, 219 220 /// Left (decimal) shift assignment operator. 221 BC_LEX_OP_ASSIGN_LSHIFT, 222 223 /// Right (decimal) shift assignment operator. 224 BC_LEX_OP_ASSIGN_RSHIFT, 225 226 #endif // BC_ENABLE_EXTRA_MATH 227 #endif // BC_ENABLED 228 229 /// Assignment operator. 230 BC_LEX_OP_ASSIGN, 231 232 /// Newline. 233 BC_LEX_NLINE, 234 235 /// Whitespace. 236 BC_LEX_WHITESPACE, 237 238 /// Left parenthesis. 239 BC_LEX_LPAREN, 240 241 /// Right parenthesis. 242 BC_LEX_RPAREN, 243 244 /// Left bracket. 245 BC_LEX_LBRACKET, 246 247 /// Comma. 248 BC_LEX_COMMA, 249 250 /// Right bracket. 251 BC_LEX_RBRACKET, 252 253 /// Left brace. 254 BC_LEX_LBRACE, 255 256 /// Semicolon. 257 BC_LEX_SCOLON, 258 259 /// Right brace. 260 BC_LEX_RBRACE, 261 262 /// String. 263 BC_LEX_STR, 264 265 /// Identifier/name. 266 BC_LEX_NAME, 267 268 /// Constant number. 269 BC_LEX_NUMBER, 270 271 // These keywords are in the order they are in for a reason. Don't change 272 // the order unless you want a bunch of weird failures in the test suite. 273 // In fact, almost all of these tokens are in a specific order for a reason. 274 275 #if BC_ENABLED 276 277 /// bc auto keyword. 278 BC_LEX_KW_AUTO, 279 280 /// bc break keyword. 281 BC_LEX_KW_BREAK, 282 283 /// bc continue keyword. 284 BC_LEX_KW_CONTINUE, 285 286 /// bc define keyword. 287 BC_LEX_KW_DEFINE, 288 289 /// bc for keyword. 290 BC_LEX_KW_FOR, 291 292 /// bc if keyword. 293 BC_LEX_KW_IF, 294 295 /// bc limits keyword. 296 BC_LEX_KW_LIMITS, 297 298 /// bc return keyword. 299 BC_LEX_KW_RETURN, 300 301 /// bc while keyword. 302 BC_LEX_KW_WHILE, 303 304 /// bc halt keyword. 305 BC_LEX_KW_HALT, 306 307 /// bc last keyword. 308 BC_LEX_KW_LAST, 309 310 #endif // BC_ENABLED 311 312 /// bc ibase keyword. 313 BC_LEX_KW_IBASE, 314 315 /// bc obase keyword. 316 BC_LEX_KW_OBASE, 317 318 /// bc scale keyword. 319 BC_LEX_KW_SCALE, 320 321 #if BC_ENABLE_EXTRA_MATH 322 323 /// bc seed keyword. 324 BC_LEX_KW_SEED, 325 326 #endif // BC_ENABLE_EXTRA_MATH 327 328 /// bc length keyword. 329 BC_LEX_KW_LENGTH, 330 331 /// bc print keyword. 332 BC_LEX_KW_PRINT, 333 334 /// bc sqrt keyword. 335 BC_LEX_KW_SQRT, 336 337 /// bc abs keyword. 338 BC_LEX_KW_ABS, 339 340 /// bc is_number keyword. 341 BC_LEX_KW_IS_NUMBER, 342 343 /// bc is_string keyword. 344 BC_LEX_KW_IS_STRING, 345 346 #if BC_ENABLE_EXTRA_MATH 347 348 /// bc irand keyword. 349 BC_LEX_KW_IRAND, 350 351 #endif // BC_ENABLE_EXTRA_MATH 352 353 /// bc asciffy keyword. 354 BC_LEX_KW_ASCIIFY, 355 356 /// bc modexp keyword. 357 BC_LEX_KW_MODEXP, 358 359 /// bc divmod keyword. 360 BC_LEX_KW_DIVMOD, 361 362 /// bc quit keyword. 363 BC_LEX_KW_QUIT, 364 365 /// bc read keyword. 366 BC_LEX_KW_READ, 367 368 #if BC_ENABLE_EXTRA_MATH 369 370 /// bc rand keyword. 371 BC_LEX_KW_RAND, 372 373 #endif // BC_ENABLE_EXTRA_MATH 374 375 /// bc maxibase keyword. 376 BC_LEX_KW_MAXIBASE, 377 378 /// bc maxobase keyword. 379 BC_LEX_KW_MAXOBASE, 380 381 /// bc maxscale keyword. 382 BC_LEX_KW_MAXSCALE, 383 384 #if BC_ENABLE_EXTRA_MATH 385 386 /// bc maxrand keyword. 387 BC_LEX_KW_MAXRAND, 388 389 #endif // BC_ENABLE_EXTRA_MATH 390 391 /// bc line_length keyword. 392 BC_LEX_KW_LINE_LENGTH, 393 394 #if BC_ENABLED 395 396 /// bc global_stacks keyword. 397 BC_LEX_KW_GLOBAL_STACKS, 398 399 #endif // BC_ENABLED 400 401 /// bc leading_zero keyword. 402 BC_LEX_KW_LEADING_ZERO, 403 404 /// bc stream keyword. 405 BC_LEX_KW_STREAM, 406 407 /// bc else keyword. 408 BC_LEX_KW_ELSE, 409 410 #if DC_ENABLED 411 412 /// A special token for dc to calculate equal without a register. 413 BC_LEX_EQ_NO_REG, 414 415 /// Colon (array) operator. 416 BC_LEX_COLON, 417 418 /// Execute command. 419 BC_LEX_EXECUTE, 420 421 /// Print stack command. 422 BC_LEX_PRINT_STACK, 423 424 /// Clear stack command. 425 BC_LEX_CLEAR_STACK, 426 427 /// Register stack level command. 428 BC_LEX_REG_STACK_LEVEL, 429 430 /// Main stack level command. 431 BC_LEX_STACK_LEVEL, 432 433 /// Duplicate command. 434 BC_LEX_DUPLICATE, 435 436 /// Swap (reverse) command. 437 BC_LEX_SWAP, 438 439 /// Pop (remove) command. 440 BC_LEX_POP, 441 442 /// Store ibase command. 443 BC_LEX_STORE_IBASE, 444 445 /// Store obase command. 446 BC_LEX_STORE_OBASE, 447 448 /// Store scale command. 449 BC_LEX_STORE_SCALE, 450 451 #if BC_ENABLE_EXTRA_MATH 452 453 /// Store seed command. 454 BC_LEX_STORE_SEED, 455 456 #endif // BC_ENABLE_EXTRA_MATH 457 458 /// Load variable onto stack command. 459 BC_LEX_LOAD, 460 461 /// Pop off of variable stack onto results stack command. 462 BC_LEX_LOAD_POP, 463 464 /// Push onto variable stack command. 465 BC_LEX_STORE_PUSH, 466 467 /// Print with pop command. 468 BC_LEX_PRINT_POP, 469 470 /// Parameterized quit command. 471 BC_LEX_NQUIT, 472 473 /// Execution stack depth command. 474 BC_LEX_EXEC_STACK_LENGTH, 475 476 /// Scale of number command. This is needed specifically for dc because bc 477 /// parses the scale function in parts. 478 BC_LEX_SCALE_FACTOR, 479 480 /// Array length command. This is needed specifically for dc because bc 481 /// just reuses its length keyword. 482 BC_LEX_ARRAY_LENGTH, 483 484 #endif // DC_ENABLED 485 486 } BcLexType; 487 488 struct BcLex; 489 490 /** 491 * A function pointer to call when another token is needed. Mostly called by the 492 * parser. 493 * @param l The lexer. 494 */ 495 typedef void (*BcLexNext)(struct BcLex* l); 496 497 /// The lexer. 498 typedef struct BcLex 499 { 500 /// A pointer to the text to lex. 501 const char* buf; 502 503 /// The current index into buf. 504 size_t i; 505 506 /// The current line. 507 size_t line; 508 509 /// The length of buf. 510 size_t len; 511 512 /// The current token. 513 BcLexType t; 514 515 /// The previous token. 516 BcLexType last; 517 518 /// A string to store extra data for tokens. For example, the @a BC_LEX_STR 519 /// token really needs to store the actual string, and numbers also need the 520 /// string. 521 BcVec str; 522 523 /// The mode the lexer is in. 524 BcMode mode; 525 526 } BcLex; 527 528 /** 529 * Initializes a lexer. 530 * @param l The lexer to initialize. 531 */ 532 void 533 bc_lex_init(BcLex* l); 534 535 /** 536 * Frees a lexer. This is not guarded by #ifndef NDEBUG because a separate 537 * parser is created at runtime to parse read() expressions and dc strings, and 538 * that parser needs a lexer. 539 * @param l The lexer to free. 540 */ 541 void 542 bc_lex_free(BcLex* l); 543 544 /** 545 * Sets the filename that the lexer will be lexing. 546 * @param l The lexer. 547 * @param file The filename that the lexer will lex. 548 */ 549 void 550 bc_lex_file(BcLex* l, const char* file); 551 552 /** 553 * Sets the text the lexer will lex. 554 * @param l The lexer. 555 * @param text The text to lex. 556 * @param mode The mode to lex in. 557 */ 558 void 559 bc_lex_text(BcLex* l, const char* text, BcMode mode); 560 561 /** 562 * Generic next function for the parser to call. It takes care of calling the 563 * correct @a BcLexNext function and consuming whitespace. 564 * @param l The lexer. 565 */ 566 void 567 bc_lex_next(BcLex* l); 568 569 /** 570 * Lexes a line comment (one beginning with '#' and going to a newline). 571 * @param l The lexer. 572 */ 573 void 574 bc_lex_lineComment(BcLex* l); 575 576 /** 577 * Lexes a general comment (C-style comment). 578 * @param l The lexer. 579 */ 580 void 581 bc_lex_comment(BcLex* l); 582 583 /** 584 * Lexes whitespace, finding as much as possible. 585 * @param l The lexer. 586 */ 587 void 588 bc_lex_whitespace(BcLex* l); 589 590 /** 591 * Lexes a number that begins with char @a start. This takes care of parsing 592 * numbers in scientific and engineering notations. 593 * @param l The lexer. 594 * @param start The starting char of the number. To detect a number and call 595 * this function, the lexer had to eat the first char. It fixes 596 * that by passing it in. 597 */ 598 void 599 bc_lex_number(BcLex* l, char start); 600 601 /** 602 * Lexes a name/identifier. 603 * @param l The lexer. 604 */ 605 void 606 bc_lex_name(BcLex* l); 607 608 /** 609 * Lexes common whitespace characters. 610 * @param l The lexer. 611 * @param c The character to lex. 612 */ 613 void 614 bc_lex_commonTokens(BcLex* l, char c); 615 616 /** 617 * Throws a parse error because char @a c was invalid. 618 * @param l The lexer. 619 * @param c The problem character. 620 */ 621 void 622 bc_lex_invalidChar(BcLex* l, char c); 623 624 /** 625 * Reads a line from stdin and puts it into the lexer's buffer. 626 * @param l The lexer. 627 */ 628 bool 629 bc_lex_readLine(BcLex* l); 630 631 #endif // BC_LEX_H 632