xref: /illumos-gate/usr/src/cmd/geniconvtbl/itm_comp.l (revision 18d738ddd2d0f4a4b4d5b1939e627aacd420b59d)
1 %{
2 /*
3  * CDDL HEADER START
4  *
5  * The contents of this file are subject to the terms of the
6  * Common Development and Distribution License, Version 1.0 only
7  * (the "License").  You may not use this file except in compliance
8  * with the License.
9  *
10  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
11  * or http://www.opensolaris.org/os/licensing.
12  * See the License for the specific language governing permissions
13  * and limitations under the License.
14  *
15  * When distributing Covered Code, include this CDDL HEADER in each
16  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
17  * If applicable, add the following below this CDDL HEADER, with the
18  * fields enclosed by brackets "[]" replaced with your own identifying
19  * information: Portions Copyright [yyyy] [name of copyright owner]
20  *
21  * CDDL HEADER END
22  *
23  * Copyright (c) 1999 by Sun Microsystems, Inc.
24  * All rights reserved.
25  */
26 
27 #include <assert.h>
28 #include <stdio.h>
29 #include <stdlib.h>
30 #include <unistd.h>
31 #include <libintl.h>
32 #include <string.h>
33 
34 #include <regexpr.h>
35 
36 #include "iconv_tm.h"
37 #include "itmcomp.h"
38 #include "y.tab.h"
39 
40 static itm_data_t	*hexadecimal_data(int, char *);
41 static itm_data_t	*name_data(int, char *);
42 static void		filename_lineno(void);
43 static int		at_name_to_token(char *);
44 
45 
46 %}
47 
48 %start	norm comment
49 
50 DECIMAL		([0-9]+)
51 OCTAL		(0[0-7][0-7]+)
52 HEXADECIMAL	(("0x"|"0X")([0-9A-Fa-f])+)
53 ITMNAME		(([^% \t\n\r])+"%"([^% \t\n\r])+)
54 ATNAME		"@"([0-9A-Za-z_]+)
55 NAME		([A-Za-z_][A-Za-z0-9_]*)
56 MAPTYPE_NAME	(automatic|dense|index|hash|binary)
57 
58 %%
59 
60 [ \t\n]+	;
61 "//".*"\n"	;
62 
63 ^"#"[ \t]*{DECIMAL}[ \t]*"\"".*"\"".*"\n" {
64 			filename_lineno();
65 		}
66 
67 ^"#".*"\n"	{
68 			if (NULL == cmd_opt.preprocess) {
69 				itm_error(
70 				gettext("warning: "
71 					"preprocess may be required\n"));
72 			}
73 		}
74 
75 {DECIMAL}	{
76 			yylval.num = strtoul(yytext, (char **)NULL, 10);
77 			return (DECIMAL);
78 		}
79 
80 {OCTAL}		{	yylval.num = strtoul(yytext, (char **)NULL, 8);
81 			return (DECIMAL);
82 		}
83 
84 {HEXADECIMAL}	{	yylval.data = hexadecimal_data(yyleng - 2, yytext + 2);
85 			return (HEXADECIMAL);
86 		}
87 
88 {ITMNAME}	{	yylval.data = str_to_data(yyleng, yytext);
89 			return (ITMNAME);
90 		}
91 
92 {ATNAME}	{	return at_name_to_token(yytext);
93 		}
94 
95 {MAPTYPE_NAME}	{	yylval.num = at_name_to_token(yytext);
96 			yylval.data = name_data(yyleng, yytext);
97 			return (MAPTYPE_NAME);
98 
99 		}
100 
101 {NAME}		{	yylval.num = at_name_to_token(yytext);
102 			if (0 != yylval.num) {
103 				return (yylval.num);
104 			} else {
105 				yylval.data = name_data(yyleng, yytext);
106 				return (NAME);
107 			}
108 		}
109 
110 
111 "{"		{return (CBO);}
112 "}"		{return (CBC);}
113 "["		{return (SBO);}
114 "]"		{return (SBC);}
115 "("		{return (PO);}
116 ")"		{return (PC);}
117 ";"		{return (SC);}
118 ","		{return (COMMA);}
119 ":"		{return (COLON);}
120 "..."		{return (ELLIPSES);}
121 
122 
123 "="		{return (ASSIGN);}
124 "||"		{return (LOR);}
125 "&&"		{return (LAND);}
126 "|"		{return (OR);}
127 "^"		{return (XOR);}
128 "&"		{return (AND);}
129 "=="		{return (EQ);}
130 "!="		{return (NE);}
131 "<"		{return (LT);}
132 "<="		{return (LE);}
133 ">"		{return (GT);}
134 ">="		{return (GE);}
135 "<<"		{return (SHL);}
136 ">>"		{return (SHR);}
137 "+"		{return (PLUS);}
138 "-"		{return (MINUS);}
139 "*"		{return (MUL);}
140 "/"		{return (DIV);}
141 "%"		{return (MOD);}
142 "!"		{return (NOT);}
143 "~"		{return (NEG);}
144 
145 .		{	itm_error(
146 				gettext("Unrecognized token '%1$c' \n"),
147 				cmd_opt.my_name, yytext[0]);
148 			return (0);
149 		}
150 
151 %%
152 
153 /*
154  * lexinit - starts the Lexical Analyzer off in the right start condition
155  */
156 void
157 lexinit()
158 {
159 	BEGIN norm;
160 }
161 
162 /* does this really need to be here? */
163 int
164 yywrap()
165 {
166 	return (1);
167 }
168 
169 void
170 yyerror(char *s)
171 {
172 	extern int	yylineno;
173 
174 	itm_error(
175 		gettext("%1$s: file(%2$s) line(%3$d) last token(%4$s)\n"),
176 		s, itm_input_file, yylineno, yytext);
177 
178 	exit(ITMC_STATUS_BT);
179 }
180 
181 typedef struct {
182 	char	*name;
183 	int	token;
184 } at_name_token_t;
185 
186 /*
187  * NOT: This table must be sorted alphabetically.
188  */
189 static at_name_token_t at_table[] = {
190 	"@automatic",	MAPTYPE_AUTO,
191 	"@binary",	MAPTYPE_BINARY,
192 	"@between",	BETWEEN,
193 	"@condition",	CONDITION,
194 	"@default",	ITM_DEFAULT,
195 	"@dense",	MAPTYPE_DENSE,
196 	"@direction",	DIRECTION,
197 	"@discard",	DISCARD,
198 	"@else",	ITM_ELSE,
199 	"@error",	ERROR,
200 	"@escapeseq",	ESCAPESEQ,
201 	"@false",	ITM_FALSE,
202 	"@hash",	MAPTYPE_HASH,
203 	"@identical",	ITM_IDENTICAL,
204 	"@if",		ITM_IF,
205 	"@in",		ITM_IN,
206 	"@index",	MAPTYPE_INDEX,
207 	"@init",	ITM_INIT,
208 	"@input",	ITM_IN,
209 	"@inputsize",	ITM_INSIZE,
210 	"@map",		MAP,
211 	"@maptype",	MAPTYPE,
212 	"@no_change_copy",	ITM_IDENTICAL,
213 	"@nop",		NOP,
214 	"@operation",	OPERATION,
215 	"@out",		ITM_OUT,
216 	"@output",	ITM_OUT,
217 	"@output_byte_length",	RESULTLEN,
218 	"@outputsize",	ITM_OUTSIZE,
219 	"@printchr",	PRINTCHR,
220 	"@printhd",	PRINTHD,
221 	"@printint",	PRINTINT,
222 	"@reset",	RESET,
223 	"@resultlen",	RESULTLEN,
224 	"@return",	RETURN,
225 	"@true",	ITM_TRUE,
226 	"automatic",	MAPTYPE_AUTO,
227 	"between",	BETWEEN,
228 	"binary",	MAPTYPE_BINARY,
229 	"break",	BREAK,
230 	"condition",	CONDITION,
231 	"default",	ITM_DEFAULT,
232 	"dense",	MAPTYPE_DENSE,
233 	"direction",	DIRECTION,
234 	"discard",	DISCARD,
235 	"else",		ITM_ELSE,
236 	"error",	ERROR,
237 	"escapeseq",	ESCAPESEQ,
238 	"false",	ITM_FALSE,
239 	"hash",		MAPTYPE_HASH,
240 	"if",		ITM_IF,
241 	"index",	MAPTYPE_INDEX,
242 	"init",		ITM_INIT,
243 	"input",	ITM_IN,
244 	"inputsize",	ITM_INSIZE,
245 	"map",		MAP,
246 	"maptype",	MAPTYPE,
247 	"no_change_copy",	ITM_IDENTICAL,
248 	"nop",		NOP,
249 	"operation",	OPERATION,
250 	"output",	ITM_OUT,
251 	"output_byte_length",	RESULTLEN,
252 	"outputsize",	ITM_OUTSIZE,
253 	"printchr",	PRINTCHR,
254 	"printhd",	PRINTHD,
255 	"printint",	PRINTINT,
256 	"reset",	RESET,
257 	"return",	RETURN,
258 	"true",		ITM_TRUE,
259 };
260 
261 int
262 at_name_to_token(char *s)
263 {
264 	int	high;
265 	int	mid;
266 	int	low;
267 	int	result;
268 
269 	TRACE_MESSAGE('l', ("at_name_to_token: %s", s));
270 	for (low = 0, high = (sizeof (at_table) /
271 				sizeof (at_name_token_t));
272 	    low < high; /* NOP */) {
273 		mid = (low + high) / 2;
274 		result = strcmp(s, at_table[mid].name);
275 		if (result < 0) {
276 			high = mid;
277 		} else if (0 < result) {
278 			low = mid + 1;
279 		} else { /* 0 == result */
280 			TRACE_MESSAGE('l', (" %d\n", at_table[mid].token));
281 			return (at_table[mid].token);
282 		}
283 	}
284 	TRACE_MESSAGE('l', (" - not found\n"));
285 	return (0);
286 }
287 
288 static itm_data_t *
289 hexadecimal_data(int seqsize, char *seq)
290 {
291 	itm_data_t	*data;
292 	char		*binary;
293 	int		i, j;
294 	int		val;
295 	int		high;
296 	int		low;
297 	int		size;
298 
299 	/* size is assured to be multiple of 2 */
300 	assert(seqsize != 0);
301 	size = seqsize + 1;
302 	size /= 2;
303 	if (size > MAXSEQUENCE) {
304 		itm_error(
305 		gettext(" specified sequence must be less than %$1d\n"),
306 		MAXSEQUENCE);
307 		return (NULL);
308 	}
309 	binary = malloc_vital(size);
310 	i = j = 0;
311 	if (seqsize % 2 != 0) {
312 		low =  *(seq);
313 		if (('0' <= low) && (low <= '9')) {
314 			val = (low - '0');
315 		} else if (('a' <= low) && (low <= 'f')) {
316 			val = (low - 'a' + 10);
317 		} else if (('A' <= low) && (low <= 'F')) {
318 			val = (low - 'A' + 10);
319 		}
320 		*(binary + i) = val;
321 		i++;
322 		j++;
323 	}
324 	for (/* NOP */; i < size; i++, j += 2) {
325 		high = *(seq + j);
326 		low =  *(seq + j + 1);
327 		if (('0' <= high) && (high <= '9')) {
328 			val = ((high - '0') << 4);
329 		} else if (('a' <= high) && (high <= 'f')) {
330 			val = ((high - 'a' + 10) << 4);
331 		} else if (('A' <= high) && (high <= 'F')) {
332 			val = ((high - 'A' + 10) << 4);
333 		}
334 		if (('0' <= low) && (low <= '9')) {
335 			val |= (low - '0');
336 		} else if (('a' <= low) && (low <= 'f')) {
337 			val |= (low - 'a' + 10);
338 		} else if (('A' <= low) && (low <= 'F')) {
339 			val |= (low - 'A' + 10);
340 		}
341 		*(binary + i) = val;
342 	}
343 
344 	data = malloc_vital(sizeof (itm_data_t));
345 
346 	data->size = size;
347 	if (size <= sizeof (data->place)) {
348 		(void) memmove(&(data->place), binary, size);
349 		free(binary);
350 	} else {
351 		data->place.itm_ptr = (itm_place2_t)binary;
352 	}
353 
354 	return (data);
355 }
356 
357 static itm_data_t *
358 name_data(int size, char *seq)
359 {
360 	itm_data_t *data;
361 
362 
363 	if (size > MAXNAMELENGTH) {
364 		itm_error(gettext("the length(%d) exceed limitation(%d)"),
365 		size, MAXNAMELENGTH);
366 		exit(ITMC_STATUS_BT2);
367 	}
368 	data = malloc_vital(sizeof (itm_data_t));
369 
370 	data->size = size;
371 	if (size <= sizeof (data->place)) {
372 		(void) memmove(&(data->place), seq, size);
373 	} else {
374 		data->place.itm_ptr = (itm_place2_t)malloc_vital(size);
375 		(void) memmove((char *)(data->place.itm_ptr), seq, size);
376 	}
377 
378 	return (data);
379 }
380 
381 static void
382 filename_lineno(void)
383 {
384 	static char	*re;
385 	static char	restr[] =
386 			"^#[ \t]*\\([0-9]\\{1,\\}\\)[ \t]*\"\\(.*\\)\".*";
387 	int		match;
388 	extern char	*braslist[];
389 	extern char	*braelist[];
390 	static char	*filename;
391 	int		len;
392 	int		lineno;
393 
394 	if (NULL == re) {
395 		re = compile(restr, NULL, NULL);
396 		if (NULL == re) {
397 			itm_error(
398 				gettext("REGEXP compile error\n"));
399 			exit(ITMC_STATUS_BT);
400 		}
401 	}
402 	match = step(yytext, re);
403 	if (0 != match) {
404 		lineno = atoi(braslist[0]);
405 		free(filename);
406 		len = braelist[1] - braslist[1];
407 		filename = malloc_vital(len + 1);
408 		(void) memcpy(filename, braslist[1], len);
409 		*(filename + len) = '\0';
410 		itm_input_file = filename;
411 		yylineno = lineno;
412 	}
413 }
414