1 /* 2 * ***************************************************************************** 3 * 4 * SPDX-License-Identifier: BSD-2-Clause 5 * 6 * Copyright (c) 2018-2020 Gavin D. Howard and contributors. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions are met: 10 * 11 * * Redistributions of source code must retain the above copyright notice, this 12 * list of conditions and the following disclaimer. 13 * 14 * * Redistributions in binary form must reproduce the above copyright notice, 15 * this list of conditions and the following disclaimer in the documentation 16 * and/or other materials provided with the distribution. 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 19 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 21 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE 22 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 23 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 24 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 25 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 26 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 27 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 28 * POSSIBILITY OF SUCH DAMAGE. 29 * 30 * ***************************************************************************** 31 * 32 * Common code for the lexers. 33 * 34 */ 35 36 #include <assert.h> 37 #include <ctype.h> 38 #include <stdbool.h> 39 #include <string.h> 40 41 #include <status.h> 42 #include <lex.h> 43 #include <vm.h> 44 #include <bc.h> 45 46 void bc_lex_invalidChar(BcLex *l, char c) { 47 l->t = BC_LEX_INVALID; 48 bc_lex_verr(l, BC_ERROR_PARSE_CHAR, c); 49 } 50 51 void bc_lex_lineComment(BcLex *l) { 52 l->t = BC_LEX_WHITESPACE; 53 while (l->i < l->len && l->buf[l->i] != '\n') l->i += 1; 54 } 55 56 void bc_lex_comment(BcLex *l) { 57 58 size_t i, nlines = 0; 59 const char *buf = l->buf; 60 bool end = false; 61 char c; 62 63 l->i += 1; 64 l->t = BC_LEX_WHITESPACE; 65 66 for (i = l->i; !end; i += !end) { 67 68 for (; (c = buf[i]) && c != '*'; ++i) nlines += (c == '\n'); 69 70 if (BC_ERR(!c || buf[i + 1] == '\0')) { 71 l->i = i; 72 bc_lex_err(l, BC_ERROR_PARSE_COMMENT); 73 } 74 75 end = buf[i + 1] == '/'; 76 } 77 78 l->i = i + 2; 79 l->line += nlines; 80 } 81 82 void bc_lex_whitespace(BcLex *l) { 83 char c; 84 l->t = BC_LEX_WHITESPACE; 85 for (c = l->buf[l->i]; c != '\n' && isspace(c); c = l->buf[++l->i]); 86 } 87 88 void bc_lex_commonTokens(BcLex *l, char c) { 89 if (!c) l->t = BC_LEX_EOF; 90 else if (c == '\n') l->t = BC_LEX_NLINE; 91 else bc_lex_whitespace(l); 92 } 93 94 static size_t bc_lex_num(BcLex *l, char start, bool int_only) { 95 96 const char *buf = l->buf + l->i; 97 size_t i; 98 char c; 99 bool last_pt, pt = (start == '.'); 100 101 for (i = 0; (c = buf[i]) && (BC_LEX_NUM_CHAR(c, pt, int_only) || 102 (c == '\\' && buf[i + 1] == '\n')); ++i) 103 { 104 if (c == '\\') { 105 106 if (buf[i + 1] == '\n') { 107 108 i += 2; 109 110 // Make sure to eat whitespace at the beginning of the line. 111 while(isspace(buf[i]) && buf[i] != '\n') i += 1; 112 113 c = buf[i]; 114 115 if (!BC_LEX_NUM_CHAR(c, pt, int_only)) break; 116 } 117 else break; 118 } 119 120 last_pt = (c == '.'); 121 if (pt && last_pt) break; 122 pt = pt || last_pt; 123 124 bc_vec_push(&l->str, &c); 125 } 126 127 return i; 128 } 129 130 void bc_lex_number(BcLex *l, char start) { 131 132 l->t = BC_LEX_NUMBER; 133 134 bc_vec_npop(&l->str, l->str.len); 135 bc_vec_push(&l->str, &start); 136 137 l->i += bc_lex_num(l, start, false); 138 139 #if BC_ENABLE_EXTRA_MATH 140 { 141 char c = l->buf[l->i]; 142 143 if (c == 'e') { 144 145 #if BC_ENABLED 146 if (BC_IS_POSIX) bc_lex_err(l, BC_ERROR_POSIX_EXP_NUM); 147 #endif // BC_ENABLED 148 149 bc_vec_push(&l->str, &c); 150 l->i += 1; 151 c = l->buf[l->i]; 152 153 if (c == BC_LEX_NEG_CHAR) { 154 bc_vec_push(&l->str, &c); 155 l->i += 1; 156 c = l->buf[l->i]; 157 } 158 159 if (BC_ERR(!BC_LEX_NUM_CHAR(c, false, true))) 160 bc_lex_verr(l, BC_ERROR_PARSE_CHAR, c); 161 162 l->i += bc_lex_num(l, 0, true); 163 } 164 } 165 #endif // BC_ENABLE_EXTRA_MATH 166 167 bc_vec_pushByte(&l->str, '\0'); 168 } 169 170 void bc_lex_name(BcLex *l) { 171 172 size_t i = 0; 173 const char *buf = l->buf + l->i - 1; 174 char c = buf[i]; 175 176 l->t = BC_LEX_NAME; 177 178 while ((c >= 'a' && c <= 'z') || isdigit(c) || c == '_') c = buf[++i]; 179 180 bc_vec_string(&l->str, i, buf); 181 182 // Increment the index. We minus 1 because it has already been incremented. 183 l->i += i - 1; 184 } 185 186 void bc_lex_init(BcLex *l) { 187 BC_SIG_ASSERT_LOCKED; 188 assert(l != NULL); 189 bc_vec_init(&l->str, sizeof(char), NULL); 190 } 191 192 void bc_lex_free(BcLex *l) { 193 BC_SIG_ASSERT_LOCKED; 194 assert(l != NULL); 195 bc_vec_free(&l->str); 196 } 197 198 void bc_lex_file(BcLex *l, const char *file) { 199 assert(l != NULL && file != NULL); 200 l->line = 1; 201 vm.file = file; 202 } 203 204 void bc_lex_next(BcLex *l) { 205 206 assert(l != NULL); 207 208 l->last = l->t; 209 l->line += (l->i != 0 && l->buf[l->i - 1] == '\n'); 210 211 if (BC_ERR(l->last == BC_LEX_EOF)) bc_lex_err(l, BC_ERROR_PARSE_EOF); 212 213 l->t = BC_LEX_EOF; 214 215 if (l->i == l->len) return; 216 217 // Loop until failure or we don't have whitespace. This 218 // is so the parser doesn't get inundated with whitespace. 219 do { 220 vm.next(l); 221 } while (l->t == BC_LEX_WHITESPACE); 222 } 223 224 void bc_lex_text(BcLex *l, const char *text) { 225 assert(l != NULL && text != NULL); 226 l->buf = text; 227 l->i = 0; 228 l->len = strlen(text); 229 l->t = l->last = BC_LEX_INVALID; 230 bc_lex_next(l); 231 } 232