1 /* 2 * ***************************************************************************** 3 * 4 * SPDX-License-Identifier: BSD-2-Clause 5 * 6 * Copyright (c) 2018-2020 Gavin D. Howard and contributors. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions are met: 10 * 11 * * Redistributions of source code must retain the above copyright notice, this 12 * list of conditions and the following disclaimer. 13 * 14 * * Redistributions in binary form must reproduce the above copyright notice, 15 * this list of conditions and the following disclaimer in the documentation 16 * and/or other materials provided with the distribution. 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 19 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 21 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE 22 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 23 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 24 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 25 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 26 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 27 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 28 * POSSIBILITY OF SUCH DAMAGE. 29 * 30 * ***************************************************************************** 31 * 32 * Common code for the lexers. 33 * 34 */ 35 36 #include <assert.h> 37 #include <ctype.h> 38 #include <stdbool.h> 39 #include <string.h> 40 41 #include <lex.h> 42 #include <vm.h> 43 #include <bc.h> 44 45 void bc_lex_invalidChar(BcLex *l, char c) { 46 l->t = BC_LEX_INVALID; 47 bc_lex_verr(l, BC_ERR_PARSE_CHAR, c); 48 } 49 50 void bc_lex_lineComment(BcLex *l) { 51 l->t = BC_LEX_WHITESPACE; 52 while (l->i < l->len && l->buf[l->i] != '\n') l->i += 1; 53 } 54 55 void bc_lex_comment(BcLex *l) { 56 57 size_t i, nlines = 0; 58 const char *buf = l->buf; 59 bool end = false; 60 char c; 61 62 l->i += 1; 63 l->t = BC_LEX_WHITESPACE; 64 65 for (i = l->i; !end; i += !end) { 66 67 for (; (c = buf[i]) && c != '*'; ++i) nlines += (c == '\n'); 68 69 if (BC_ERR(!c || buf[i + 1] == '\0')) { 70 l->i = i; 71 bc_lex_err(l, BC_ERR_PARSE_COMMENT); 72 } 73 74 end = buf[i + 1] == '/'; 75 } 76 77 l->i = i + 2; 78 l->line += nlines; 79 } 80 81 void bc_lex_whitespace(BcLex *l) { 82 char c; 83 l->t = BC_LEX_WHITESPACE; 84 for (c = l->buf[l->i]; c != '\n' && isspace(c); c = l->buf[++l->i]); 85 } 86 87 void bc_lex_commonTokens(BcLex *l, char c) { 88 if (!c) l->t = BC_LEX_EOF; 89 else if (c == '\n') l->t = BC_LEX_NLINE; 90 else bc_lex_whitespace(l); 91 } 92 93 static size_t bc_lex_num(BcLex *l, char start, bool int_only) { 94 95 const char *buf = l->buf + l->i; 96 size_t i; 97 char c; 98 bool last_pt, pt = (start == '.'); 99 100 for (i = 0; (c = buf[i]) && (BC_LEX_NUM_CHAR(c, pt, int_only) || 101 (c == '\\' && buf[i + 1] == '\n')); ++i) 102 { 103 if (c == '\\') { 104 105 if (buf[i + 1] == '\n') { 106 107 i += 2; 108 109 // Make sure to eat whitespace at the beginning of the line. 110 while(isspace(buf[i]) && buf[i] != '\n') i += 1; 111 112 c = buf[i]; 113 114 if (!BC_LEX_NUM_CHAR(c, pt, int_only)) break; 115 } 116 else break; 117 } 118 119 last_pt = (c == '.'); 120 if (pt && last_pt) break; 121 pt = pt || last_pt; 122 123 bc_vec_push(&l->str, &c); 124 } 125 126 return i; 127 } 128 129 void bc_lex_number(BcLex *l, char start) { 130 131 l->t = BC_LEX_NUMBER; 132 133 bc_vec_npop(&l->str, l->str.len); 134 bc_vec_push(&l->str, &start); 135 136 l->i += bc_lex_num(l, start, false); 137 138 #if BC_ENABLE_EXTRA_MATH 139 { 140 char c = l->buf[l->i]; 141 142 if (c == 'e') { 143 144 #if BC_ENABLED 145 if (BC_IS_POSIX) bc_lex_err(l, BC_ERR_POSIX_EXP_NUM); 146 #endif // BC_ENABLED 147 148 bc_vec_push(&l->str, &c); 149 l->i += 1; 150 c = l->buf[l->i]; 151 152 if (c == BC_LEX_NEG_CHAR) { 153 bc_vec_push(&l->str, &c); 154 l->i += 1; 155 c = l->buf[l->i]; 156 } 157 158 if (BC_ERR(!BC_LEX_NUM_CHAR(c, false, true))) 159 bc_lex_verr(l, BC_ERR_PARSE_CHAR, c); 160 161 l->i += bc_lex_num(l, 0, true); 162 } 163 } 164 #endif // BC_ENABLE_EXTRA_MATH 165 166 bc_vec_pushByte(&l->str, '\0'); 167 } 168 169 void bc_lex_name(BcLex *l) { 170 171 size_t i = 0; 172 const char *buf = l->buf + l->i - 1; 173 char c = buf[i]; 174 175 l->t = BC_LEX_NAME; 176 177 while ((c >= 'a' && c <= 'z') || isdigit(c) || c == '_') c = buf[++i]; 178 179 bc_vec_string(&l->str, i, buf); 180 181 // Increment the index. We minus 1 because it has already been incremented. 182 l->i += i - 1; 183 } 184 185 void bc_lex_init(BcLex *l) { 186 BC_SIG_ASSERT_LOCKED; 187 assert(l != NULL); 188 bc_vec_init(&l->str, sizeof(char), NULL); 189 } 190 191 void bc_lex_free(BcLex *l) { 192 BC_SIG_ASSERT_LOCKED; 193 assert(l != NULL); 194 bc_vec_free(&l->str); 195 } 196 197 void bc_lex_file(BcLex *l, const char *file) { 198 assert(l != NULL && file != NULL); 199 l->line = 1; 200 vm.file = file; 201 } 202 203 void bc_lex_next(BcLex *l) { 204 205 assert(l != NULL); 206 207 l->last = l->t; 208 l->line += (l->i != 0 && l->buf[l->i - 1] == '\n'); 209 210 if (BC_ERR(l->last == BC_LEX_EOF)) bc_lex_err(l, BC_ERR_PARSE_EOF); 211 212 l->t = BC_LEX_EOF; 213 214 if (l->i == l->len) return; 215 216 // Loop until failure or we don't have whitespace. This 217 // is so the parser doesn't get inundated with whitespace. 218 do { 219 vm.next(l); 220 } while (l->t == BC_LEX_WHITESPACE); 221 } 222 223 void bc_lex_text(BcLex *l, const char *text) { 224 assert(l != NULL && text != NULL); 225 l->buf = text; 226 l->i = 0; 227 l->len = strlen(text); 228 l->t = l->last = BC_LEX_INVALID; 229 bc_lex_next(l); 230 } 231