1 /* 2 * ***************************************************************************** 3 * 4 * SPDX-License-Identifier: BSD-2-Clause 5 * 6 * Copyright (c) 2018-2021 Gavin D. Howard and contributors. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions are met: 10 * 11 * * Redistributions of source code must retain the above copyright notice, this 12 * list of conditions and the following disclaimer. 13 * 14 * * Redistributions in binary form must reproduce the above copyright notice, 15 * this list of conditions and the following disclaimer in the documentation 16 * and/or other materials provided with the distribution. 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 19 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 21 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE 22 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 23 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 24 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 25 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 26 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 27 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 28 * POSSIBILITY OF SUCH DAMAGE. 29 * 30 * ***************************************************************************** 31 * 32 * Definitions for bc's lexer. 33 * 34 */ 35 36 #ifndef BC_LEX_H 37 #define BC_LEX_H 38 39 #include <stdbool.h> 40 #include <stddef.h> 41 42 #include <status.h> 43 #include <vector.h> 44 #include <lang.h> 45 46 // Two convencience macros for throwing errors in lex code. They take care of 47 // plumbing like passing in the current line the lexer is on. 48 #define bc_lex_err(l, e) (bc_vm_handleError((e), (l)->line)) 49 #define bc_lex_verr(l, e, ...) (bc_vm_handleError((e), (l)->line, __VA_ARGS__)) 50 51 // BC_LEX_NEG_CHAR returns the char that corresponds to negative for the 52 // current calculator. 53 // 54 // BC_LEX_LAST_NUM_CHAR returns the char that corresponds to the last valid 55 // char for numbers. In bc and dc, capital letters are part of numbers, to a 56 // point. (dc only goes up to hex, so its last valid char is 'F'.) 57 #if BC_ENABLED 58 59 #if DC_ENABLED 60 #define BC_LEX_NEG_CHAR (BC_IS_BC ? '-' : '_') 61 #define BC_LEX_LAST_NUM_CHAR (BC_IS_BC ? 'Z' : 'F') 62 #else // DC_ENABLED 63 #define BC_LEX_NEG_CHAR ('-') 64 #define BC_LEX_LAST_NUM_CHAR ('Z') 65 #endif // DC_ENABLED 66 67 #else // BC_ENABLED 68 69 #define BC_LEX_NEG_CHAR ('_') 70 #define BC_LEX_LAST_NUM_CHAR ('F') 71 72 #endif // BC_ENABLED 73 74 /** 75 * Returns true if c is a valid number character. 76 * @param c The char to check. 77 * @param pt If a decimal point has already been seen. 78 * @param int_only True if the number is expected to be an int only, false if 79 * non-integers are allowed. 80 * @return True if @a c is a valid number character. 81 */ 82 #define BC_LEX_NUM_CHAR(c, pt, int_only) \ 83 (isdigit(c) != 0 || ((c) >= 'A' && (c) <= BC_LEX_LAST_NUM_CHAR) || \ 84 ((c) == '.' && !(pt) && !(int_only))) 85 86 /// An enum of lex token types. 87 typedef enum BcLexType { 88 89 /// End of file. 90 BC_LEX_EOF, 91 92 /// Marker for invalid tokens, used by bc and dc for const data. 93 BC_LEX_INVALID, 94 95 #if BC_ENABLED 96 97 /// Increment operator. 98 BC_LEX_OP_INC, 99 100 /// Decrement operator. 101 BC_LEX_OP_DEC, 102 103 #endif // BC_ENABLED 104 105 /// BC_LEX_NEG is not used in lexing; it is only for parsing. The lexer 106 /// marks all '-' characters as BC_LEX_OP_MINUS, but the parser needs to be 107 /// able to distinguish them. 108 BC_LEX_NEG, 109 110 /// Boolean not. 111 BC_LEX_OP_BOOL_NOT, 112 113 #if BC_ENABLE_EXTRA_MATH 114 115 /// Truncation operator. 116 BC_LEX_OP_TRUNC, 117 118 #endif // BC_ENABLE_EXTRA_MATH 119 120 /// Power operator. 121 BC_LEX_OP_POWER, 122 123 /// Multiplication operator. 124 BC_LEX_OP_MULTIPLY, 125 126 /// Division operator. 127 BC_LEX_OP_DIVIDE, 128 129 /// Modulus operator. 130 BC_LEX_OP_MODULUS, 131 132 /// Addition operator. 133 BC_LEX_OP_PLUS, 134 135 /// Subtraction operator. 136 BC_LEX_OP_MINUS, 137 138 #if BC_ENABLE_EXTRA_MATH 139 /// Places (truncate or extend) operator. 140 BC_LEX_OP_PLACES, 141 142 /// Left (decimal) shift operator. 143 BC_LEX_OP_LSHIFT, 144 145 /// Right (decimal) shift operator. 146 BC_LEX_OP_RSHIFT, 147 #endif // BC_ENABLE_EXTRA_MATH 148 149 /// Equal operator. 150 BC_LEX_OP_REL_EQ, 151 152 /// Less than or equal operator. 153 BC_LEX_OP_REL_LE, 154 155 /// Greater than or equal operator. 156 BC_LEX_OP_REL_GE, 157 158 /// Not equal operator. 159 BC_LEX_OP_REL_NE, 160 161 /// Less than operator. 162 BC_LEX_OP_REL_LT, 163 164 /// Greater than operator. 165 BC_LEX_OP_REL_GT, 166 167 /// Boolean or operator. 168 BC_LEX_OP_BOOL_OR, 169 170 /// Boolean and operator. 171 BC_LEX_OP_BOOL_AND, 172 173 #if BC_ENABLED 174 /// Power assignment operator. 175 BC_LEX_OP_ASSIGN_POWER, 176 177 /// Multiplication assignment operator. 178 BC_LEX_OP_ASSIGN_MULTIPLY, 179 180 /// Division assignment operator. 181 BC_LEX_OP_ASSIGN_DIVIDE, 182 183 /// Modulus assignment operator. 184 BC_LEX_OP_ASSIGN_MODULUS, 185 186 /// Addition assignment operator. 187 BC_LEX_OP_ASSIGN_PLUS, 188 189 /// Subtraction assignment operator. 190 BC_LEX_OP_ASSIGN_MINUS, 191 192 #if BC_ENABLE_EXTRA_MATH 193 194 /// Places (truncate or extend) assignment operator. 195 BC_LEX_OP_ASSIGN_PLACES, 196 197 /// Left (decimal) shift assignment operator. 198 BC_LEX_OP_ASSIGN_LSHIFT, 199 200 /// Right (decimal) shift assignment operator. 201 BC_LEX_OP_ASSIGN_RSHIFT, 202 203 #endif // BC_ENABLE_EXTRA_MATH 204 #endif // BC_ENABLED 205 206 /// Assignment operator. 207 BC_LEX_OP_ASSIGN, 208 209 /// Newline. 210 BC_LEX_NLINE, 211 212 /// Whitespace. 213 BC_LEX_WHITESPACE, 214 215 /// Left parenthesis. 216 BC_LEX_LPAREN, 217 218 /// Right parenthesis. 219 BC_LEX_RPAREN, 220 221 /// Left bracket. 222 BC_LEX_LBRACKET, 223 224 /// Comma. 225 BC_LEX_COMMA, 226 227 /// Right bracket. 228 BC_LEX_RBRACKET, 229 230 /// Left brace. 231 BC_LEX_LBRACE, 232 233 /// Semicolon. 234 BC_LEX_SCOLON, 235 236 /// Right brace. 237 BC_LEX_RBRACE, 238 239 /// String. 240 BC_LEX_STR, 241 242 /// Identifier/name. 243 BC_LEX_NAME, 244 245 /// Constant number. 246 BC_LEX_NUMBER, 247 248 // These keywords are in the order they are in for a reason. Don't change 249 // the order unless you want a bunch of weird failures in the test suite. 250 // In fact, almost all of these tokens are in a specific order for a reason. 251 252 #if BC_ENABLED 253 254 /// bc auto keyword. 255 BC_LEX_KW_AUTO, 256 257 /// bc break keyword. 258 BC_LEX_KW_BREAK, 259 260 /// bc continue keyword. 261 BC_LEX_KW_CONTINUE, 262 263 /// bc define keyword. 264 BC_LEX_KW_DEFINE, 265 266 /// bc for keyword. 267 BC_LEX_KW_FOR, 268 269 /// bc if keyword. 270 BC_LEX_KW_IF, 271 272 /// bc limits keyword. 273 BC_LEX_KW_LIMITS, 274 275 /// bc return keyword. 276 BC_LEX_KW_RETURN, 277 278 /// bc while keyword. 279 BC_LEX_KW_WHILE, 280 281 /// bc halt keyword. 282 BC_LEX_KW_HALT, 283 284 /// bc last keyword. 285 BC_LEX_KW_LAST, 286 287 #endif // BC_ENABLED 288 289 /// bc ibase keyword. 290 BC_LEX_KW_IBASE, 291 292 /// bc obase keyword. 293 BC_LEX_KW_OBASE, 294 295 /// bc scale keyword. 296 BC_LEX_KW_SCALE, 297 298 #if BC_ENABLE_EXTRA_MATH 299 300 /// bc seed keyword. 301 BC_LEX_KW_SEED, 302 303 #endif // BC_ENABLE_EXTRA_MATH 304 305 /// bc length keyword. 306 BC_LEX_KW_LENGTH, 307 308 /// bc print keyword. 309 BC_LEX_KW_PRINT, 310 311 /// bc sqrt keyword. 312 BC_LEX_KW_SQRT, 313 314 /// bc abs keyword. 315 BC_LEX_KW_ABS, 316 317 #if BC_ENABLE_EXTRA_MATH 318 319 /// bc irand keyword. 320 BC_LEX_KW_IRAND, 321 322 #endif // BC_ENABLE_EXTRA_MATH 323 324 /// bc asciffy keyword. 325 BC_LEX_KW_ASCIIFY, 326 327 /// bc modexp keyword. 328 BC_LEX_KW_MODEXP, 329 330 /// bc divmod keyword. 331 BC_LEX_KW_DIVMOD, 332 333 /// bc quit keyword. 334 BC_LEX_KW_QUIT, 335 336 /// bc read keyword. 337 BC_LEX_KW_READ, 338 339 #if BC_ENABLE_EXTRA_MATH 340 341 /// bc rand keyword. 342 BC_LEX_KW_RAND, 343 344 #endif // BC_ENABLE_EXTRA_MATH 345 346 /// bc maxibase keyword. 347 BC_LEX_KW_MAXIBASE, 348 349 /// bc maxobase keyword. 350 BC_LEX_KW_MAXOBASE, 351 352 /// bc maxscale keyword. 353 BC_LEX_KW_MAXSCALE, 354 355 #if BC_ENABLE_EXTRA_MATH 356 /// bc maxrand keyword. 357 BC_LEX_KW_MAXRAND, 358 #endif // BC_ENABLE_EXTRA_MATH 359 360 /// bc stream keyword. 361 BC_LEX_KW_STREAM, 362 363 /// bc else keyword. 364 BC_LEX_KW_ELSE, 365 366 #if DC_ENABLED 367 368 /// A special token for dc to calculate equal without a register. 369 BC_LEX_EQ_NO_REG, 370 371 /// Colon (array) operator. 372 BC_LEX_COLON, 373 374 /// Execute command. 375 BC_LEX_EXECUTE, 376 377 /// Print stack command. 378 BC_LEX_PRINT_STACK, 379 380 /// Clear stack command. 381 BC_LEX_CLEAR_STACK, 382 383 /// Register stack level command. 384 BC_LEX_REG_STACK_LEVEL, 385 386 /// Main stack level command. 387 BC_LEX_STACK_LEVEL, 388 389 /// Duplicate command. 390 BC_LEX_DUPLICATE, 391 392 /// Swap (reverse) command. 393 BC_LEX_SWAP, 394 395 /// Pop (remove) command. 396 BC_LEX_POP, 397 398 /// Store ibase command. 399 BC_LEX_STORE_IBASE, 400 401 /// Store obase command. 402 BC_LEX_STORE_OBASE, 403 404 /// Store scale command. 405 BC_LEX_STORE_SCALE, 406 407 #if BC_ENABLE_EXTRA_MATH 408 /// Store seed command. 409 BC_LEX_STORE_SEED, 410 #endif // BC_ENABLE_EXTRA_MATH 411 412 /// Load variable onto stack command. 413 BC_LEX_LOAD, 414 415 /// Pop off of variable stack onto results stack command. 416 BC_LEX_LOAD_POP, 417 418 /// Push onto variable stack command. 419 BC_LEX_STORE_PUSH, 420 421 /// Print with pop command. 422 BC_LEX_PRINT_POP, 423 424 /// Parameterized quit command. 425 BC_LEX_NQUIT, 426 427 /// Execution stack depth command. 428 BC_LEX_EXEC_STACK_LENGTH, 429 430 /// Scale of number command. This is needed specifically for dc because bc 431 /// parses the scale function in parts. 432 BC_LEX_SCALE_FACTOR, 433 434 /// Array length command. This is needed specifically for dc because bc 435 /// just reuses its length keyword. 436 BC_LEX_ARRAY_LENGTH, 437 438 #endif // DC_ENABLED 439 440 } BcLexType; 441 442 struct BcLex; 443 444 /** 445 * A function pointer to call when another token is needed. Mostly called by the 446 * parser. 447 * @param l The lexer. 448 */ 449 typedef void (*BcLexNext)(struct BcLex* l); 450 451 /// The lexer. 452 typedef struct BcLex { 453 454 /// A pointer to the text to lex. 455 const char *buf; 456 457 /// The current index into buf. 458 size_t i; 459 460 /// The current line. 461 size_t line; 462 463 /// The length of buf. 464 size_t len; 465 466 /// The current token. 467 BcLexType t; 468 469 /// The previous token. 470 BcLexType last; 471 472 /// A string to store extra data for tokens. For example, the @a BC_LEX_STR 473 /// token really needs to store the actual string, and numbers also need the 474 /// string. 475 BcVec str; 476 477 /// If this is true, the lexer is processing stdin and can ask for more data 478 /// if a string or comment are not properly terminated. 479 bool is_stdin; 480 481 } BcLex; 482 483 /** 484 * Initializes a lexer. 485 * @param l The lexer to initialize. 486 */ 487 void bc_lex_init(BcLex *l); 488 489 /** 490 * Frees a lexer. This is not guarded by #ifndef NDEBUG because a separate 491 * parser is created at runtime to parse read() expressions and dc strings, and 492 * that parser needs a lexer. 493 * @param l The lexer to free. 494 */ 495 void bc_lex_free(BcLex *l); 496 497 /** 498 * Sets the filename that the lexer will be lexing. 499 * @param l The lexer. 500 * @param file The filename that the lexer will lex. 501 */ 502 void bc_lex_file(BcLex *l, const char *file); 503 504 /** 505 * Sets the text the lexer will lex. 506 * @param l The lexer. 507 * @param text The text to lex. 508 * @param is_stdin True if the text is from stdin, false otherwise. 509 */ 510 void bc_lex_text(BcLex *l, const char *text, bool is_stdin); 511 512 /** 513 * Generic next function for the parser to call. It takes care of calling the 514 * correct @a BcLexNext function and consuming whitespace. 515 * @param l The lexer. 516 */ 517 void bc_lex_next(BcLex *l); 518 519 /** 520 * Lexes a line comment (one beginning with '#' and going to a newline). 521 * @param l The lexer. 522 */ 523 void bc_lex_lineComment(BcLex *l); 524 525 /** 526 * Lexes a general comment (C-style comment). 527 * @param l The lexer. 528 */ 529 void bc_lex_comment(BcLex *l); 530 531 /** 532 * Lexes whitespace, finding as much as possible. 533 * @param l The lexer. 534 */ 535 void bc_lex_whitespace(BcLex *l); 536 537 /** 538 * Lexes a number that begins with char @a start. This takes care of parsing 539 * numbers in scientific and engineering notations. 540 * @param l The lexer. 541 * @param start The starting char of the number. To detect a number and call 542 * this function, the lexer had to eat the first char. It fixes 543 * that by passing it in. 544 */ 545 void bc_lex_number(BcLex *l, char start); 546 547 /** 548 * Lexes a name/identifier. 549 * @param l The lexer. 550 */ 551 void bc_lex_name(BcLex *l); 552 553 /** 554 * Lexes common whitespace characters. 555 * @param l The lexer. 556 * @param c The character to lex. 557 */ 558 void bc_lex_commonTokens(BcLex *l, char c); 559 560 /** 561 * Throws a parse error because char @a c was invalid. 562 * @param l The lexer. 563 * @param c The problem character. 564 */ 565 void bc_lex_invalidChar(BcLex *l, char c); 566 567 /** 568 * Reads a line from stdin and puts it into the lexer's buffer. 569 * @param l The lexer. 570 */ 571 bool bc_lex_readLine(BcLex *l); 572 573 #endif // BC_LEX_H 574