xref: /freebsd/contrib/bc/src/bc_lex.c (revision a0409676120c1e558d0ade943019934e0f15118d)
1 /*
2  * *****************************************************************************
3  *
4  * SPDX-License-Identifier: BSD-2-Clause
5  *
6  * Copyright (c) 2018-2021 Gavin D. Howard and contributors.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions are met:
10  *
11  * * Redistributions of source code must retain the above copyright notice, this
12  *   list of conditions and the following disclaimer.
13  *
14  * * Redistributions in binary form must reproduce the above copyright notice,
15  *   this list of conditions and the following disclaimer in the documentation
16  *   and/or other materials provided with the distribution.
17  *
18  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
22  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
23  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
24  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
25  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
26  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
27  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
28  * POSSIBILITY OF SUCH DAMAGE.
29  *
30  * *****************************************************************************
31  *
32  * The lexer for bc.
33  *
34  */
35 
36 #if BC_ENABLED
37 
38 #include <assert.h>
39 #include <ctype.h>
40 #include <string.h>
41 
42 #include <bc.h>
43 #include <vm.h>
44 
45 static void bc_lex_identifier(BcLex *l) {
46 
47 	size_t i;
48 	const char *buf = l->buf + l->i - 1;
49 
50 	for (i = 0; i < bc_lex_kws_len; ++i) {
51 
52 		const BcLexKeyword *kw = bc_lex_kws + i;
53 		size_t n = BC_LEX_KW_LEN(kw);
54 
55 		if (!strncmp(buf, kw->name, n) && !isalnum(buf[n]) && buf[n] != '_') {
56 
57 			l->t = BC_LEX_KW_AUTO + (BcLexType) i;
58 
59 			if (!BC_LEX_KW_POSIX(kw))
60 				bc_lex_verr(l, BC_ERR_POSIX_KW, kw->name);
61 
62 			// We minus 1 because the index has already been incremented.
63 			l->i += n - 1;
64 			return;
65 		}
66 	}
67 
68 	bc_lex_name(l);
69 
70 	if (BC_ERR(l->str.len - 1 > 1))
71 		bc_lex_verr(l, BC_ERR_POSIX_NAME_LEN, l->str.v);
72 }
73 
74 static void bc_lex_string(BcLex *l) {
75 
76 	size_t len, nlines = 0, i = l->i;
77 	const char *buf = l->buf;
78 	char c;
79 
80 	l->t = BC_LEX_STR;
81 
82 	for (; (c = buf[i]) && c != '"'; ++i) nlines += c == '\n';
83 
84 	if (BC_ERR(c == '\0')) {
85 		l->i = i;
86 		bc_lex_err(l, BC_ERR_PARSE_STRING);
87 	}
88 
89 	len = i - l->i;
90 	bc_vec_string(&l->str, len, l->buf + l->i);
91 
92 	l->i = i + 1;
93 	l->line += nlines;
94 }
95 
96 static void bc_lex_assign(BcLex *l, BcLexType with, BcLexType without) {
97 	if (l->buf[l->i] == '=') {
98 		l->i += 1;
99 		l->t = with;
100 	}
101 	else l->t = without;
102 }
103 
104 void bc_lex_token(BcLex *l) {
105 
106 	char c = l->buf[l->i++], c2;
107 
108 	// This is the workhorse of the lexer.
109 	switch (c) {
110 
111 		case '\0':
112 		case '\n':
113 		case '\t':
114 		case '\v':
115 		case '\f':
116 		case '\r':
117 		case ' ':
118 		{
119 			bc_lex_commonTokens(l, c);
120 			break;
121 		}
122 
123 		case '!':
124 		{
125 			bc_lex_assign(l, BC_LEX_OP_REL_NE, BC_LEX_OP_BOOL_NOT);
126 
127 			if (l->t == BC_LEX_OP_BOOL_NOT)
128 				bc_lex_verr(l, BC_ERR_POSIX_BOOL, "!");
129 
130 			break;
131 		}
132 
133 		case '"':
134 		{
135 			bc_lex_string(l);
136 			break;
137 		}
138 
139 		case '#':
140 		{
141 			bc_lex_err(l, BC_ERR_POSIX_COMMENT);
142 			bc_lex_lineComment(l);
143 			break;
144 		}
145 
146 		case '%':
147 		{
148 			bc_lex_assign(l, BC_LEX_OP_ASSIGN_MODULUS, BC_LEX_OP_MODULUS);
149 			break;
150 		}
151 
152 		case '&':
153 		{
154 			c2 = l->buf[l->i];
155 			if (BC_NO_ERR(c2 == '&')) {
156 
157 				bc_lex_verr(l, BC_ERR_POSIX_BOOL, "&&");
158 
159 				l->i += 1;
160 				l->t = BC_LEX_OP_BOOL_AND;
161 			}
162 			else bc_lex_invalidChar(l, c);
163 
164 			break;
165 		}
166 #if BC_ENABLE_EXTRA_MATH
167 		case '$':
168 		{
169 			l->t = BC_LEX_OP_TRUNC;
170 			break;
171 		}
172 
173 		case '@':
174 		{
175 			bc_lex_assign(l, BC_LEX_OP_ASSIGN_PLACES, BC_LEX_OP_PLACES);
176 			break;
177 		}
178 #endif // BC_ENABLE_EXTRA_MATH
179 		case '(':
180 		case ')':
181 		{
182 			l->t = (BcLexType) (c - '(' + BC_LEX_LPAREN);
183 			break;
184 		}
185 
186 		case '*':
187 		{
188 			bc_lex_assign(l, BC_LEX_OP_ASSIGN_MULTIPLY, BC_LEX_OP_MULTIPLY);
189 			break;
190 		}
191 
192 		case '+':
193 		{
194 			c2 = l->buf[l->i];
195 			if (c2 == '+') {
196 				l->i += 1;
197 				l->t = BC_LEX_OP_INC;
198 			}
199 			else bc_lex_assign(l, BC_LEX_OP_ASSIGN_PLUS, BC_LEX_OP_PLUS);
200 			break;
201 		}
202 
203 		case ',':
204 		{
205 			l->t = BC_LEX_COMMA;
206 			break;
207 		}
208 
209 		case '-':
210 		{
211 			c2 = l->buf[l->i];
212 			if (c2 == '-') {
213 				l->i += 1;
214 				l->t = BC_LEX_OP_DEC;
215 			}
216 			else bc_lex_assign(l, BC_LEX_OP_ASSIGN_MINUS, BC_LEX_OP_MINUS);
217 			break;
218 		}
219 
220 		case '.':
221 		{
222 			c2 = l->buf[l->i];
223 			if (BC_LEX_NUM_CHAR(c2, true, false)) bc_lex_number(l, c);
224 			else {
225 				l->t = BC_LEX_KW_LAST;
226 				bc_lex_err(l, BC_ERR_POSIX_DOT);
227 			}
228 			break;
229 		}
230 
231 		case '/':
232 		{
233 			c2 = l->buf[l->i];
234 			if (c2 =='*') bc_lex_comment(l);
235 			else bc_lex_assign(l, BC_LEX_OP_ASSIGN_DIVIDE, BC_LEX_OP_DIVIDE);
236 			break;
237 		}
238 
239 		case '0':
240 		case '1':
241 		case '2':
242 		case '3':
243 		case '4':
244 		case '5':
245 		case '6':
246 		case '7':
247 		case '8':
248 		case '9':
249 		case 'A':
250 		case 'B':
251 		case 'C':
252 		case 'D':
253 		case 'E':
254 		case 'F':
255 		// Apparently, GNU bc (and maybe others) allows any uppercase letter as
256 		// a number. When single digits, they act like the ones above. When
257 		// multi-digit, any letter above the input base is automatically set to
258 		// the biggest allowable digit in the input base.
259 		case 'G':
260 		case 'H':
261 		case 'I':
262 		case 'J':
263 		case 'K':
264 		case 'L':
265 		case 'M':
266 		case 'N':
267 		case 'O':
268 		case 'P':
269 		case 'Q':
270 		case 'R':
271 		case 'S':
272 		case 'T':
273 		case 'U':
274 		case 'V':
275 		case 'W':
276 		case 'X':
277 		case 'Y':
278 		case 'Z':
279 		{
280 			bc_lex_number(l, c);
281 			break;
282 		}
283 
284 		case ';':
285 		{
286 			l->t = BC_LEX_SCOLON;
287 			break;
288 		}
289 
290 		case '<':
291 		{
292 #if BC_ENABLE_EXTRA_MATH
293 			c2 = l->buf[l->i];
294 
295 			if (c2 == '<') {
296 				l->i += 1;
297 				bc_lex_assign(l, BC_LEX_OP_ASSIGN_LSHIFT, BC_LEX_OP_LSHIFT);
298 				break;
299 			}
300 #endif // BC_ENABLE_EXTRA_MATH
301 			bc_lex_assign(l, BC_LEX_OP_REL_LE, BC_LEX_OP_REL_LT);
302 			break;
303 		}
304 
305 		case '=':
306 		{
307 			bc_lex_assign(l, BC_LEX_OP_REL_EQ, BC_LEX_OP_ASSIGN);
308 			break;
309 		}
310 
311 		case '>':
312 		{
313 #if BC_ENABLE_EXTRA_MATH
314 			c2 = l->buf[l->i];
315 
316 			if (c2 == '>') {
317 				l->i += 1;
318 				bc_lex_assign(l, BC_LEX_OP_ASSIGN_RSHIFT, BC_LEX_OP_RSHIFT);
319 				break;
320 			}
321 #endif // BC_ENABLE_EXTRA_MATH
322 			bc_lex_assign(l, BC_LEX_OP_REL_GE, BC_LEX_OP_REL_GT);
323 			break;
324 		}
325 
326 		case '[':
327 		case ']':
328 		{
329 			l->t = (BcLexType) (c - '[' + BC_LEX_LBRACKET);
330 			break;
331 		}
332 
333 		case '\\':
334 		{
335 			if (BC_NO_ERR(l->buf[l->i] == '\n')) {
336 				l->i += 1;
337 				l->t = BC_LEX_WHITESPACE;
338 			}
339 			else bc_lex_invalidChar(l, c);
340 			break;
341 		}
342 
343 		case '^':
344 		{
345 			bc_lex_assign(l, BC_LEX_OP_ASSIGN_POWER, BC_LEX_OP_POWER);
346 			break;
347 		}
348 
349 		case 'a':
350 		case 'b':
351 		case 'c':
352 		case 'd':
353 		case 'e':
354 		case 'f':
355 		case 'g':
356 		case 'h':
357 		case 'i':
358 		case 'j':
359 		case 'k':
360 		case 'l':
361 		case 'm':
362 		case 'n':
363 		case 'o':
364 		case 'p':
365 		case 'q':
366 		case 'r':
367 		case 's':
368 		case 't':
369 		case 'u':
370 		case 'v':
371 		case 'w':
372 		case 'x':
373 		case 'y':
374 		case 'z':
375 		{
376 			bc_lex_identifier(l);
377 			break;
378 		}
379 
380 		case '{':
381 		case '}':
382 		{
383 			l->t = (BcLexType) (c - '{' + BC_LEX_LBRACE);
384 			break;
385 		}
386 
387 		case '|':
388 		{
389 			c2 = l->buf[l->i];
390 
391 			if (BC_NO_ERR(c2 == '|')) {
392 
393 				bc_lex_verr(l, BC_ERR_POSIX_BOOL, "||");
394 
395 				l->i += 1;
396 				l->t = BC_LEX_OP_BOOL_OR;
397 			}
398 			else bc_lex_invalidChar(l, c);
399 
400 			break;
401 		}
402 
403 		default:
404 		{
405 			bc_lex_invalidChar(l, c);
406 		}
407 	}
408 }
409 #endif // BC_ENABLED
410