xref: /freebsd/contrib/bc/src/lex.c (revision 69c5fa5cd1ec9b09ed88a086607a8a0993818db9)
1 /*
2  * *****************************************************************************
3  *
4  * SPDX-License-Identifier: BSD-2-Clause
5  *
6  * Copyright (c) 2018-2021 Gavin D. Howard and contributors.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions are met:
10  *
11  * * Redistributions of source code must retain the above copyright notice, this
12  *   list of conditions and the following disclaimer.
13  *
14  * * Redistributions in binary form must reproduce the above copyright notice,
15  *   this list of conditions and the following disclaimer in the documentation
16  *   and/or other materials provided with the distribution.
17  *
18  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
22  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
23  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
24  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
25  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
26  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
27  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
28  * POSSIBILITY OF SUCH DAMAGE.
29  *
30  * *****************************************************************************
31  *
32  * Common code for the lexers.
33  *
34  */
35 
36 #include <assert.h>
37 #include <ctype.h>
38 #include <stdbool.h>
39 #include <string.h>
40 
41 #include <lex.h>
42 #include <vm.h>
43 #include <bc.h>
44 
45 void bc_lex_invalidChar(BcLex *l, char c) {
46 	l->t = BC_LEX_INVALID;
47 	bc_lex_verr(l, BC_ERR_PARSE_CHAR, c);
48 }
49 
50 void bc_lex_lineComment(BcLex *l) {
51 	l->t = BC_LEX_WHITESPACE;
52 	while (l->i < l->len && l->buf[l->i] != '\n') l->i += 1;
53 }
54 
55 void bc_lex_comment(BcLex *l) {
56 
57 	size_t i, nlines = 0;
58 	const char *buf = l->buf;
59 	bool end = false;
60 	char c;
61 
62 	l->i += 1;
63 	l->t = BC_LEX_WHITESPACE;
64 
65 	for (i = l->i; !end; i += !end) {
66 
67 		for (; (c = buf[i]) && c != '*'; ++i) nlines += (c == '\n');
68 
69 		if (BC_ERR(!c || buf[i + 1] == '\0')) {
70 			l->i = i;
71 			bc_lex_err(l, BC_ERR_PARSE_COMMENT);
72 		}
73 
74 		end = buf[i + 1] == '/';
75 	}
76 
77 	l->i = i + 2;
78 	l->line += nlines;
79 }
80 
81 void bc_lex_whitespace(BcLex *l) {
82 	char c;
83 	l->t = BC_LEX_WHITESPACE;
84 	for (c = l->buf[l->i]; c != '\n' && isspace(c); c = l->buf[++l->i]);
85 }
86 
87 void bc_lex_commonTokens(BcLex *l, char c) {
88 	if (!c) l->t = BC_LEX_EOF;
89 	else if (c == '\n') l->t = BC_LEX_NLINE;
90 	else bc_lex_whitespace(l);
91 }
92 
93 static size_t bc_lex_num(BcLex *l, char start, bool int_only) {
94 
95 	const char *buf = l->buf + l->i;
96 	size_t i;
97 	char c;
98 	bool last_pt, pt = (start == '.');
99 
100 	for (i = 0; (c = buf[i]) && (BC_LEX_NUM_CHAR(c, pt, int_only) ||
101 	                             (c == '\\' && buf[i + 1] == '\n')); ++i)
102 	{
103 		if (c == '\\') {
104 
105 			if (buf[i + 1] == '\n') {
106 
107 				i += 2;
108 
109 				// Make sure to eat whitespace at the beginning of the line.
110 				while(isspace(buf[i]) && buf[i] != '\n') i += 1;
111 
112 				c = buf[i];
113 
114 				if (!BC_LEX_NUM_CHAR(c, pt, int_only)) break;
115 			}
116 			else break;
117 		}
118 
119 		last_pt = (c == '.');
120 		if (pt && last_pt) break;
121 		pt = pt || last_pt;
122 
123 		bc_vec_push(&l->str, &c);
124 	}
125 
126 	return i;
127 }
128 
129 void bc_lex_number(BcLex *l, char start) {
130 
131 	l->t = BC_LEX_NUMBER;
132 
133 	bc_vec_popAll(&l->str);
134 	bc_vec_push(&l->str, &start);
135 
136 	l->i += bc_lex_num(l, start, false);
137 
138 #if BC_ENABLE_EXTRA_MATH
139 	{
140 		char c = l->buf[l->i];
141 
142 		if (c == 'e') {
143 
144 #if BC_ENABLED
145 			if (BC_IS_POSIX) bc_lex_err(l, BC_ERR_POSIX_EXP_NUM);
146 #endif // BC_ENABLED
147 
148 			bc_vec_push(&l->str, &c);
149 			l->i += 1;
150 			c = l->buf[l->i];
151 
152 			if (c == BC_LEX_NEG_CHAR) {
153 				bc_vec_push(&l->str, &c);
154 				l->i += 1;
155 				c = l->buf[l->i];
156 			}
157 
158 			if (BC_ERR(!BC_LEX_NUM_CHAR(c, false, true)))
159 				bc_lex_verr(l, BC_ERR_PARSE_CHAR, c);
160 
161 			l->i += bc_lex_num(l, 0, true);
162 		}
163 	}
164 #endif // BC_ENABLE_EXTRA_MATH
165 
166 	bc_vec_pushByte(&l->str, '\0');
167 }
168 
169 void bc_lex_name(BcLex *l) {
170 
171 	size_t i = 0;
172 	const char *buf = l->buf + l->i - 1;
173 	char c = buf[i];
174 
175 	l->t = BC_LEX_NAME;
176 
177 	while ((c >= 'a' && c <= 'z') || isdigit(c) || c == '_') c = buf[++i];
178 
179 	bc_vec_string(&l->str, i, buf);
180 
181 	// Increment the index. We minus 1 because it has already been incremented.
182 	l->i += i - 1;
183 }
184 
185 void bc_lex_init(BcLex *l) {
186 	BC_SIG_ASSERT_LOCKED;
187 	assert(l != NULL);
188 	bc_vec_init(&l->str, sizeof(char), NULL);
189 }
190 
191 void bc_lex_free(BcLex *l) {
192 	BC_SIG_ASSERT_LOCKED;
193 	assert(l != NULL);
194 	bc_vec_free(&l->str);
195 }
196 
197 void bc_lex_file(BcLex *l, const char *file) {
198 	assert(l != NULL && file != NULL);
199 	l->line = 1;
200 	vm.file = file;
201 }
202 
203 void bc_lex_next(BcLex *l) {
204 
205 	assert(l != NULL);
206 
207 	l->last = l->t;
208 	l->line += (l->i != 0 && l->buf[l->i - 1] == '\n');
209 
210 	if (BC_ERR(l->last == BC_LEX_EOF)) bc_lex_err(l, BC_ERR_PARSE_EOF);
211 
212 	l->t = BC_LEX_EOF;
213 
214 	if (l->i == l->len) return;
215 
216 	// Loop until failure or we don't have whitespace. This
217 	// is so the parser doesn't get inundated with whitespace.
218 	do {
219 		vm.next(l);
220 	} while (l->t == BC_LEX_WHITESPACE);
221 }
222 
223 void bc_lex_text(BcLex *l, const char *text) {
224 	assert(l != NULL && text != NULL);
225 	l->buf = text;
226 	l->i = 0;
227 	l->len = strlen(text);
228 	l->t = l->last = BC_LEX_INVALID;
229 	bc_lex_next(l);
230 }
231