1 /* 2 * ***************************************************************************** 3 * 4 * SPDX-License-Identifier: BSD-2-Clause 5 * 6 * Copyright (c) 2018-2021 Gavin D. Howard and contributors. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions are met: 10 * 11 * * Redistributions of source code must retain the above copyright notice, this 12 * list of conditions and the following disclaimer. 13 * 14 * * Redistributions in binary form must reproduce the above copyright notice, 15 * this list of conditions and the following disclaimer in the documentation 16 * and/or other materials provided with the distribution. 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 19 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 21 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE 22 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 23 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 24 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 25 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 26 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 27 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 28 * POSSIBILITY OF SUCH DAMAGE. 29 * 30 * ***************************************************************************** 31 * 32 * Definitions for bc's lexer. 33 * 34 */ 35 36 #ifndef BC_LEX_H 37 #define BC_LEX_H 38 39 #include <stdbool.h> 40 #include <stddef.h> 41 42 #include <status.h> 43 #include <vector.h> 44 #include <lang.h> 45 46 // Two convencience macros for throwing errors in lex code. They take care of 47 // plumbing like passing in the current line the lexer is on. 48 #define bc_lex_err(l, e) (bc_vm_handleError((e), (l)->line)) 49 #define bc_lex_verr(l, e, ...) (bc_vm_handleError((e), (l)->line, __VA_ARGS__)) 50 51 // BC_LEX_NEG_CHAR returns the char that corresponds to negative for the 52 // current calculator. 53 // 54 // BC_LEX_LAST_NUM_CHAR returns the char that corresponds to the last valid 55 // char for numbers. In bc and dc, capital letters are part of numbers, to a 56 // point. (dc only goes up to hex, so its last valid char is 'F'.) 57 #if BC_ENABLED 58 59 #if DC_ENABLED 60 #define BC_LEX_NEG_CHAR (BC_IS_BC ? '-' : '_') 61 #define BC_LEX_LAST_NUM_CHAR (BC_IS_BC ? 'Z' : 'F') 62 #else // DC_ENABLED 63 #define BC_LEX_NEG_CHAR ('-') 64 #define BC_LEX_LAST_NUM_CHAR ('Z') 65 #endif // DC_ENABLED 66 67 #else // BC_ENABLED 68 69 #define BC_LEX_NEG_CHAR ('_') 70 #define BC_LEX_LAST_NUM_CHAR ('F') 71 72 #endif // BC_ENABLED 73 74 /** 75 * Returns true if c is a valid number character. 76 * @param c The char to check. 77 * @param pt If a decimal point has already been seen. 78 * @param int_only True if the number is expected to be an int only, false if 79 * non-integers are allowed. 80 * @return True if @a c is a valid number character. 81 */ 82 #define BC_LEX_NUM_CHAR(c, pt, int_only) \ 83 (isdigit(c) != 0 || ((c) >= 'A' && (c) <= BC_LEX_LAST_NUM_CHAR) || \ 84 ((c) == '.' && !(pt) && !(int_only))) 85 86 /// An enum of lex token types. 87 typedef enum BcLexType { 88 89 /// End of file. 90 BC_LEX_EOF, 91 92 /// Marker for invalid tokens, used by bc and dc for const data. 93 BC_LEX_INVALID, 94 95 #if BC_ENABLED 96 97 /// Increment operator. 98 BC_LEX_OP_INC, 99 100 /// Decrement operator. 101 BC_LEX_OP_DEC, 102 103 #endif // BC_ENABLED 104 105 /// BC_LEX_NEG is not used in lexing; it is only for parsing. The lexer 106 /// marks all '-' characters as BC_LEX_OP_MINUS, but the parser needs to be 107 /// able to distinguish them. 108 BC_LEX_NEG, 109 110 /// Boolean not. 111 BC_LEX_OP_BOOL_NOT, 112 113 #if BC_ENABLE_EXTRA_MATH 114 115 /// Truncation operator. 116 BC_LEX_OP_TRUNC, 117 118 #endif // BC_ENABLE_EXTRA_MATH 119 120 /// Power operator. 121 BC_LEX_OP_POWER, 122 123 /// Multiplication operator. 124 BC_LEX_OP_MULTIPLY, 125 126 /// Division operator. 127 BC_LEX_OP_DIVIDE, 128 129 /// Modulus operator. 130 BC_LEX_OP_MODULUS, 131 132 /// Addition operator. 133 BC_LEX_OP_PLUS, 134 135 /// Subtraction operator. 136 BC_LEX_OP_MINUS, 137 138 #if BC_ENABLE_EXTRA_MATH 139 /// Places (truncate or extend) operator. 140 BC_LEX_OP_PLACES, 141 142 /// Left (decimal) shift operator. 143 BC_LEX_OP_LSHIFT, 144 145 /// Right (decimal) shift operator. 146 BC_LEX_OP_RSHIFT, 147 #endif // BC_ENABLE_EXTRA_MATH 148 149 /// Equal operator. 150 BC_LEX_OP_REL_EQ, 151 152 /// Less than or equal operator. 153 BC_LEX_OP_REL_LE, 154 155 /// Greater than or equal operator. 156 BC_LEX_OP_REL_GE, 157 158 /// Not equal operator. 159 BC_LEX_OP_REL_NE, 160 161 /// Less than operator. 162 BC_LEX_OP_REL_LT, 163 164 /// Greater than operator. 165 BC_LEX_OP_REL_GT, 166 167 /// Boolean or operator. 168 BC_LEX_OP_BOOL_OR, 169 170 /// Boolean and operator. 171 BC_LEX_OP_BOOL_AND, 172 173 #if BC_ENABLED 174 /// Power assignment operator. 175 BC_LEX_OP_ASSIGN_POWER, 176 177 /// Multiplication assignment operator. 178 BC_LEX_OP_ASSIGN_MULTIPLY, 179 180 /// Division assignment operator. 181 BC_LEX_OP_ASSIGN_DIVIDE, 182 183 /// Modulus assignment operator. 184 BC_LEX_OP_ASSIGN_MODULUS, 185 186 /// Addition assignment operator. 187 BC_LEX_OP_ASSIGN_PLUS, 188 189 /// Subtraction assignment operator. 190 BC_LEX_OP_ASSIGN_MINUS, 191 192 #if BC_ENABLE_EXTRA_MATH 193 194 /// Places (truncate or extend) assignment operator. 195 BC_LEX_OP_ASSIGN_PLACES, 196 197 /// Left (decimal) shift assignment operator. 198 BC_LEX_OP_ASSIGN_LSHIFT, 199 200 /// Right (decimal) shift assignment operator. 201 BC_LEX_OP_ASSIGN_RSHIFT, 202 203 #endif // BC_ENABLE_EXTRA_MATH 204 #endif // BC_ENABLED 205 206 /// Assignment operator. 207 BC_LEX_OP_ASSIGN, 208 209 /// Newline. 210 BC_LEX_NLINE, 211 212 /// Whitespace. 213 BC_LEX_WHITESPACE, 214 215 /// Left parenthesis. 216 BC_LEX_LPAREN, 217 218 /// Right parenthesis. 219 BC_LEX_RPAREN, 220 221 /// Left bracket. 222 BC_LEX_LBRACKET, 223 224 /// Comma. 225 BC_LEX_COMMA, 226 227 /// Right bracket. 228 BC_LEX_RBRACKET, 229 230 /// Left brace. 231 BC_LEX_LBRACE, 232 233 /// Semicolon. 234 BC_LEX_SCOLON, 235 236 /// Right brace. 237 BC_LEX_RBRACE, 238 239 /// String. 240 BC_LEX_STR, 241 242 /// Identifier/name. 243 BC_LEX_NAME, 244 245 /// Constant number. 246 BC_LEX_NUMBER, 247 248 // These keywords are in the order they are in for a reason. Don't change 249 // the order unless you want a bunch of weird failures in the test suite. 250 // In fact, almost all of these tokens are in a specific order for a reason. 251 252 #if BC_ENABLED 253 254 /// bc auto keyword. 255 BC_LEX_KW_AUTO, 256 257 /// bc break keyword. 258 BC_LEX_KW_BREAK, 259 260 /// bc continue keyword. 261 BC_LEX_KW_CONTINUE, 262 263 /// bc define keyword. 264 BC_LEX_KW_DEFINE, 265 266 /// bc for keyword. 267 BC_LEX_KW_FOR, 268 269 /// bc if keyword. 270 BC_LEX_KW_IF, 271 272 /// bc limits keyword. 273 BC_LEX_KW_LIMITS, 274 275 /// bc return keyword. 276 BC_LEX_KW_RETURN, 277 278 /// bc while keyword. 279 BC_LEX_KW_WHILE, 280 281 /// bc halt keyword. 282 BC_LEX_KW_HALT, 283 284 /// bc last keyword. 285 BC_LEX_KW_LAST, 286 287 #endif // BC_ENABLED 288 289 /// bc ibase keyword. 290 BC_LEX_KW_IBASE, 291 292 /// bc obase keyword. 293 BC_LEX_KW_OBASE, 294 295 /// bc scale keyword. 296 BC_LEX_KW_SCALE, 297 298 #if BC_ENABLE_EXTRA_MATH 299 300 /// bc seed keyword. 301 BC_LEX_KW_SEED, 302 303 #endif // BC_ENABLE_EXTRA_MATH 304 305 /// bc length keyword. 306 BC_LEX_KW_LENGTH, 307 308 /// bc print keyword. 309 BC_LEX_KW_PRINT, 310 311 /// bc sqrt keyword. 312 BC_LEX_KW_SQRT, 313 314 /// bc abs keyword. 315 BC_LEX_KW_ABS, 316 317 #if BC_ENABLE_EXTRA_MATH 318 319 /// bc irand keyword. 320 BC_LEX_KW_IRAND, 321 322 #endif // BC_ENABLE_EXTRA_MATH 323 324 /// bc asciffy keyword. 325 BC_LEX_KW_ASCIIFY, 326 327 /// bc modexp keyword. 328 BC_LEX_KW_MODEXP, 329 330 /// bc divmod keyword. 331 BC_LEX_KW_DIVMOD, 332 333 /// bc quit keyword. 334 BC_LEX_KW_QUIT, 335 336 /// bc read keyword. 337 BC_LEX_KW_READ, 338 339 #if BC_ENABLE_EXTRA_MATH 340 341 /// bc rand keyword. 342 BC_LEX_KW_RAND, 343 344 #endif // BC_ENABLE_EXTRA_MATH 345 346 /// bc maxibase keyword. 347 BC_LEX_KW_MAXIBASE, 348 349 /// bc maxobase keyword. 350 BC_LEX_KW_MAXOBASE, 351 352 /// bc maxscale keyword. 353 BC_LEX_KW_MAXSCALE, 354 355 #if BC_ENABLE_EXTRA_MATH 356 /// bc maxrand keyword. 357 BC_LEX_KW_MAXRAND, 358 #endif // BC_ENABLE_EXTRA_MATH 359 360 /// bc line_length keyword. 361 BC_LEX_KW_LINE_LENGTH, 362 363 #if BC_ENABLED 364 365 /// bc global_stacks keyword. 366 BC_LEX_KW_GLOBAL_STACKS, 367 368 #endif // BC_ENABLED 369 370 /// bc leading_zero keyword. 371 BC_LEX_KW_LEADING_ZERO, 372 373 /// bc stream keyword. 374 BC_LEX_KW_STREAM, 375 376 /// bc else keyword. 377 BC_LEX_KW_ELSE, 378 379 #if DC_ENABLED 380 381 /// A special token for dc to calculate equal without a register. 382 BC_LEX_EQ_NO_REG, 383 384 /// Colon (array) operator. 385 BC_LEX_COLON, 386 387 /// Execute command. 388 BC_LEX_EXECUTE, 389 390 /// Print stack command. 391 BC_LEX_PRINT_STACK, 392 393 /// Clear stack command. 394 BC_LEX_CLEAR_STACK, 395 396 /// Register stack level command. 397 BC_LEX_REG_STACK_LEVEL, 398 399 /// Main stack level command. 400 BC_LEX_STACK_LEVEL, 401 402 /// Duplicate command. 403 BC_LEX_DUPLICATE, 404 405 /// Swap (reverse) command. 406 BC_LEX_SWAP, 407 408 /// Pop (remove) command. 409 BC_LEX_POP, 410 411 /// Store ibase command. 412 BC_LEX_STORE_IBASE, 413 414 /// Store obase command. 415 BC_LEX_STORE_OBASE, 416 417 /// Store scale command. 418 BC_LEX_STORE_SCALE, 419 420 #if BC_ENABLE_EXTRA_MATH 421 /// Store seed command. 422 BC_LEX_STORE_SEED, 423 #endif // BC_ENABLE_EXTRA_MATH 424 425 /// Load variable onto stack command. 426 BC_LEX_LOAD, 427 428 /// Pop off of variable stack onto results stack command. 429 BC_LEX_LOAD_POP, 430 431 /// Push onto variable stack command. 432 BC_LEX_STORE_PUSH, 433 434 /// Print with pop command. 435 BC_LEX_PRINT_POP, 436 437 /// Parameterized quit command. 438 BC_LEX_NQUIT, 439 440 /// Execution stack depth command. 441 BC_LEX_EXEC_STACK_LENGTH, 442 443 /// Scale of number command. This is needed specifically for dc because bc 444 /// parses the scale function in parts. 445 BC_LEX_SCALE_FACTOR, 446 447 /// Array length command. This is needed specifically for dc because bc 448 /// just reuses its length keyword. 449 BC_LEX_ARRAY_LENGTH, 450 451 #endif // DC_ENABLED 452 453 } BcLexType; 454 455 struct BcLex; 456 457 /** 458 * A function pointer to call when another token is needed. Mostly called by the 459 * parser. 460 * @param l The lexer. 461 */ 462 typedef void (*BcLexNext)(struct BcLex* l); 463 464 /// The lexer. 465 typedef struct BcLex { 466 467 /// A pointer to the text to lex. 468 const char *buf; 469 470 /// The current index into buf. 471 size_t i; 472 473 /// The current line. 474 size_t line; 475 476 /// The length of buf. 477 size_t len; 478 479 /// The current token. 480 BcLexType t; 481 482 /// The previous token. 483 BcLexType last; 484 485 /// A string to store extra data for tokens. For example, the @a BC_LEX_STR 486 /// token really needs to store the actual string, and numbers also need the 487 /// string. 488 BcVec str; 489 490 /// If this is true, the lexer is processing stdin and can ask for more data 491 /// if a string or comment are not properly terminated. 492 bool is_stdin; 493 494 /// If this is true, the lexer is processing expressions from the 495 /// command-line and can ask for more data if a string or comment are not 496 /// properly terminated. 497 bool is_exprs; 498 499 } BcLex; 500 501 /** 502 * Initializes a lexer. 503 * @param l The lexer to initialize. 504 */ 505 void bc_lex_init(BcLex *l); 506 507 /** 508 * Frees a lexer. This is not guarded by #ifndef NDEBUG because a separate 509 * parser is created at runtime to parse read() expressions and dc strings, and 510 * that parser needs a lexer. 511 * @param l The lexer to free. 512 */ 513 void bc_lex_free(BcLex *l); 514 515 /** 516 * Sets the filename that the lexer will be lexing. 517 * @param l The lexer. 518 * @param file The filename that the lexer will lex. 519 */ 520 void bc_lex_file(BcLex *l, const char *file); 521 522 /** 523 * Sets the text the lexer will lex. 524 * @param l The lexer. 525 * @param text The text to lex. 526 * @param is_stdin True if the text is from stdin, false otherwise. 527 * @param is_exprs True if the text is from command-line expressions, false 528 * otherwise. 529 */ 530 void bc_lex_text(BcLex *l, const char *text, bool is_stdin, bool is_exprs); 531 532 /** 533 * Generic next function for the parser to call. It takes care of calling the 534 * correct @a BcLexNext function and consuming whitespace. 535 * @param l The lexer. 536 */ 537 void bc_lex_next(BcLex *l); 538 539 /** 540 * Lexes a line comment (one beginning with '#' and going to a newline). 541 * @param l The lexer. 542 */ 543 void bc_lex_lineComment(BcLex *l); 544 545 /** 546 * Lexes a general comment (C-style comment). 547 * @param l The lexer. 548 */ 549 void bc_lex_comment(BcLex *l); 550 551 /** 552 * Lexes whitespace, finding as much as possible. 553 * @param l The lexer. 554 */ 555 void bc_lex_whitespace(BcLex *l); 556 557 /** 558 * Lexes a number that begins with char @a start. This takes care of parsing 559 * numbers in scientific and engineering notations. 560 * @param l The lexer. 561 * @param start The starting char of the number. To detect a number and call 562 * this function, the lexer had to eat the first char. It fixes 563 * that by passing it in. 564 */ 565 void bc_lex_number(BcLex *l, char start); 566 567 /** 568 * Lexes a name/identifier. 569 * @param l The lexer. 570 */ 571 void bc_lex_name(BcLex *l); 572 573 /** 574 * Lexes common whitespace characters. 575 * @param l The lexer. 576 * @param c The character to lex. 577 */ 578 void bc_lex_commonTokens(BcLex *l, char c); 579 580 /** 581 * Throws a parse error because char @a c was invalid. 582 * @param l The lexer. 583 * @param c The problem character. 584 */ 585 void bc_lex_invalidChar(BcLex *l, char c); 586 587 /** 588 * Reads a line from stdin and puts it into the lexer's buffer. 589 * @param l The lexer. 590 */ 591 bool bc_lex_readLine(BcLex *l); 592 593 #endif // BC_LEX_H 594