1 /* 2 * ***************************************************************************** 3 * 4 * SPDX-License-Identifier: BSD-2-Clause 5 * 6 * Copyright (c) 2018-2021 Gavin D. Howard and contributors. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions are met: 10 * 11 * * Redistributions of source code must retain the above copyright notice, this 12 * list of conditions and the following disclaimer. 13 * 14 * * Redistributions in binary form must reproduce the above copyright notice, 15 * this list of conditions and the following disclaimer in the documentation 16 * and/or other materials provided with the distribution. 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 19 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 21 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE 22 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 23 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 24 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 25 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 26 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 27 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 28 * POSSIBILITY OF SUCH DAMAGE. 29 * 30 * ***************************************************************************** 31 * 32 * The lexer for bc. 33 * 34 */ 35 36 #if BC_ENABLED 37 38 #include <assert.h> 39 #include <ctype.h> 40 #include <string.h> 41 42 #include <bc.h> 43 #include <vm.h> 44 45 static void bc_lex_identifier(BcLex *l) { 46 47 size_t i; 48 const char *buf = l->buf + l->i - 1; 49 50 for (i = 0; i < bc_lex_kws_len; ++i) { 51 52 const BcLexKeyword *kw = bc_lex_kws + i; 53 size_t n = BC_LEX_KW_LEN(kw); 54 55 if (!strncmp(buf, kw->name, n) && !isalnum(buf[n]) && buf[n] != '_') { 56 57 l->t = BC_LEX_KW_AUTO + (BcLexType) i; 58 59 if (!BC_LEX_KW_POSIX(kw)) 60 bc_lex_verr(l, BC_ERR_POSIX_KW, kw->name); 61 62 // We minus 1 because the index has already been incremented. 63 l->i += n - 1; 64 return; 65 } 66 } 67 68 bc_lex_name(l); 69 70 if (BC_ERR(l->str.len - 1 > 1)) 71 bc_lex_verr(l, BC_ERR_POSIX_NAME_LEN, l->str.v); 72 } 73 74 static void bc_lex_string(BcLex *l) { 75 76 size_t len, nlines = 0, i = l->i; 77 const char *buf = l->buf; 78 char c; 79 80 l->t = BC_LEX_STR; 81 82 for (; (c = buf[i]) && c != '"'; ++i) nlines += c == '\n'; 83 84 if (BC_ERR(c == '\0')) { 85 l->i = i; 86 bc_lex_err(l, BC_ERR_PARSE_STRING); 87 } 88 89 len = i - l->i; 90 bc_vec_string(&l->str, len, l->buf + l->i); 91 92 l->i = i + 1; 93 l->line += nlines; 94 } 95 96 static void bc_lex_assign(BcLex *l, BcLexType with, BcLexType without) { 97 if (l->buf[l->i] == '=') { 98 l->i += 1; 99 l->t = with; 100 } 101 else l->t = without; 102 } 103 104 void bc_lex_token(BcLex *l) { 105 106 char c = l->buf[l->i++], c2; 107 108 // This is the workhorse of the lexer. 109 switch (c) { 110 111 case '\0': 112 case '\n': 113 case '\t': 114 case '\v': 115 case '\f': 116 case '\r': 117 case ' ': 118 { 119 bc_lex_commonTokens(l, c); 120 break; 121 } 122 123 case '!': 124 { 125 bc_lex_assign(l, BC_LEX_OP_REL_NE, BC_LEX_OP_BOOL_NOT); 126 127 if (l->t == BC_LEX_OP_BOOL_NOT) 128 bc_lex_verr(l, BC_ERR_POSIX_BOOL, "!"); 129 130 break; 131 } 132 133 case '"': 134 { 135 bc_lex_string(l); 136 break; 137 } 138 139 case '#': 140 { 141 bc_lex_err(l, BC_ERR_POSIX_COMMENT); 142 bc_lex_lineComment(l); 143 break; 144 } 145 146 case '%': 147 { 148 bc_lex_assign(l, BC_LEX_OP_ASSIGN_MODULUS, BC_LEX_OP_MODULUS); 149 break; 150 } 151 152 case '&': 153 { 154 c2 = l->buf[l->i]; 155 if (BC_NO_ERR(c2 == '&')) { 156 157 bc_lex_verr(l, BC_ERR_POSIX_BOOL, "&&"); 158 159 l->i += 1; 160 l->t = BC_LEX_OP_BOOL_AND; 161 } 162 else bc_lex_invalidChar(l, c); 163 164 break; 165 } 166 #if BC_ENABLE_EXTRA_MATH 167 case '$': 168 { 169 l->t = BC_LEX_OP_TRUNC; 170 break; 171 } 172 173 case '@': 174 { 175 bc_lex_assign(l, BC_LEX_OP_ASSIGN_PLACES, BC_LEX_OP_PLACES); 176 break; 177 } 178 #endif // BC_ENABLE_EXTRA_MATH 179 case '(': 180 case ')': 181 { 182 l->t = (BcLexType) (c - '(' + BC_LEX_LPAREN); 183 break; 184 } 185 186 case '*': 187 { 188 bc_lex_assign(l, BC_LEX_OP_ASSIGN_MULTIPLY, BC_LEX_OP_MULTIPLY); 189 break; 190 } 191 192 case '+': 193 { 194 c2 = l->buf[l->i]; 195 if (c2 == '+') { 196 l->i += 1; 197 l->t = BC_LEX_OP_INC; 198 } 199 else bc_lex_assign(l, BC_LEX_OP_ASSIGN_PLUS, BC_LEX_OP_PLUS); 200 break; 201 } 202 203 case ',': 204 { 205 l->t = BC_LEX_COMMA; 206 break; 207 } 208 209 case '-': 210 { 211 c2 = l->buf[l->i]; 212 if (c2 == '-') { 213 l->i += 1; 214 l->t = BC_LEX_OP_DEC; 215 } 216 else bc_lex_assign(l, BC_LEX_OP_ASSIGN_MINUS, BC_LEX_OP_MINUS); 217 break; 218 } 219 220 case '.': 221 { 222 c2 = l->buf[l->i]; 223 if (BC_LEX_NUM_CHAR(c2, true, false)) bc_lex_number(l, c); 224 else { 225 l->t = BC_LEX_KW_LAST; 226 bc_lex_err(l, BC_ERR_POSIX_DOT); 227 } 228 break; 229 } 230 231 case '/': 232 { 233 c2 = l->buf[l->i]; 234 if (c2 =='*') bc_lex_comment(l); 235 else bc_lex_assign(l, BC_LEX_OP_ASSIGN_DIVIDE, BC_LEX_OP_DIVIDE); 236 break; 237 } 238 239 case '0': 240 case '1': 241 case '2': 242 case '3': 243 case '4': 244 case '5': 245 case '6': 246 case '7': 247 case '8': 248 case '9': 249 case 'A': 250 case 'B': 251 case 'C': 252 case 'D': 253 case 'E': 254 case 'F': 255 // Apparently, GNU bc (and maybe others) allows any uppercase letter as 256 // a number. When single digits, they act like the ones above. When 257 // multi-digit, any letter above the input base is automatically set to 258 // the biggest allowable digit in the input base. 259 case 'G': 260 case 'H': 261 case 'I': 262 case 'J': 263 case 'K': 264 case 'L': 265 case 'M': 266 case 'N': 267 case 'O': 268 case 'P': 269 case 'Q': 270 case 'R': 271 case 'S': 272 case 'T': 273 case 'U': 274 case 'V': 275 case 'W': 276 case 'X': 277 case 'Y': 278 case 'Z': 279 { 280 bc_lex_number(l, c); 281 break; 282 } 283 284 case ';': 285 { 286 l->t = BC_LEX_SCOLON; 287 break; 288 } 289 290 case '<': 291 { 292 #if BC_ENABLE_EXTRA_MATH 293 c2 = l->buf[l->i]; 294 295 if (c2 == '<') { 296 l->i += 1; 297 bc_lex_assign(l, BC_LEX_OP_ASSIGN_LSHIFT, BC_LEX_OP_LSHIFT); 298 break; 299 } 300 #endif // BC_ENABLE_EXTRA_MATH 301 bc_lex_assign(l, BC_LEX_OP_REL_LE, BC_LEX_OP_REL_LT); 302 break; 303 } 304 305 case '=': 306 { 307 bc_lex_assign(l, BC_LEX_OP_REL_EQ, BC_LEX_OP_ASSIGN); 308 break; 309 } 310 311 case '>': 312 { 313 #if BC_ENABLE_EXTRA_MATH 314 c2 = l->buf[l->i]; 315 316 if (c2 == '>') { 317 l->i += 1; 318 bc_lex_assign(l, BC_LEX_OP_ASSIGN_RSHIFT, BC_LEX_OP_RSHIFT); 319 break; 320 } 321 #endif // BC_ENABLE_EXTRA_MATH 322 bc_lex_assign(l, BC_LEX_OP_REL_GE, BC_LEX_OP_REL_GT); 323 break; 324 } 325 326 case '[': 327 case ']': 328 { 329 l->t = (BcLexType) (c - '[' + BC_LEX_LBRACKET); 330 break; 331 } 332 333 case '\\': 334 { 335 if (BC_NO_ERR(l->buf[l->i] == '\n')) { 336 l->i += 1; 337 l->t = BC_LEX_WHITESPACE; 338 } 339 else bc_lex_invalidChar(l, c); 340 break; 341 } 342 343 case '^': 344 { 345 bc_lex_assign(l, BC_LEX_OP_ASSIGN_POWER, BC_LEX_OP_POWER); 346 break; 347 } 348 349 case 'a': 350 case 'b': 351 case 'c': 352 case 'd': 353 case 'e': 354 case 'f': 355 case 'g': 356 case 'h': 357 case 'i': 358 case 'j': 359 case 'k': 360 case 'l': 361 case 'm': 362 case 'n': 363 case 'o': 364 case 'p': 365 case 'q': 366 case 'r': 367 case 's': 368 case 't': 369 case 'u': 370 case 'v': 371 case 'w': 372 case 'x': 373 case 'y': 374 case 'z': 375 { 376 bc_lex_identifier(l); 377 break; 378 } 379 380 case '{': 381 case '}': 382 { 383 l->t = (BcLexType) (c - '{' + BC_LEX_LBRACE); 384 break; 385 } 386 387 case '|': 388 { 389 c2 = l->buf[l->i]; 390 391 if (BC_NO_ERR(c2 == '|')) { 392 393 bc_lex_verr(l, BC_ERR_POSIX_BOOL, "||"); 394 395 l->i += 1; 396 l->t = BC_LEX_OP_BOOL_OR; 397 } 398 else bc_lex_invalidChar(l, c); 399 400 break; 401 } 402 403 default: 404 { 405 bc_lex_invalidChar(l, c); 406 } 407 } 408 } 409 #endif // BC_ENABLED 410