xref: /illumos-gate/usr/src/cmd/geniconvtbl/itm_comp.l (revision 35a5a3587fd94b666239c157d3722745250ccbd7)
1 %{
2 /*
3  * CDDL HEADER START
4  *
5  * The contents of this file are subject to the terms of the
6  * Common Development and Distribution License, Version 1.0 only
7  * (the "License").  You may not use this file except in compliance
8  * with the License.
9  *
10  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
11  * or http://www.opensolaris.org/os/licensing.
12  * See the License for the specific language governing permissions
13  * and limitations under the License.
14  *
15  * When distributing Covered Code, include this CDDL HEADER in each
16  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
17  * If applicable, add the following below this CDDL HEADER, with the
18  * fields enclosed by brackets "[]" replaced with your own identifying
19  * information: Portions Copyright [yyyy] [name of copyright owner]
20  *
21  * CDDL HEADER END
22  *
23  * Copyright (c) 1999 by Sun Microsystems, Inc.
24  * All rights reserved.
25  */
26 
27 #pragma ident	"%Z%%M%	%I%	%E% SMI"
28 
29 #include <assert.h>
30 #include <stdio.h>
31 #include <stdlib.h>
32 #include <unistd.h>
33 #include <libintl.h>
34 #include <string.h>
35 
36 #include <regexpr.h>
37 
38 #include "iconv_tm.h"
39 #include "itmcomp.h"
40 #include "y.tab.h"
41 
42 static itm_data_t	*hexadecimal_data(int, char *);
43 static itm_data_t	*name_data(int, char *);
44 static void		filename_lineno(void);
45 static int		at_name_to_token(char *);
46 
47 
48 %}
49 
50 %start	norm comment
51 
52 DECIMAL		([0-9]+)
53 OCTAL		(0[0-7][0-7]+)
54 HEXADECIMAL	(("0x"|"0X")([0-9A-Fa-f])+)
55 ITMNAME		(([^% \t\n\r])+"%"([^% \t\n\r])+)
56 ATNAME		"@"([0-9A-Za-z_]+)
57 NAME		([A-Za-z_][A-Za-z0-9_]*)
58 MAPTYPE_NAME	(automatic|dense|index|hash|binary)
59 
60 %%
61 
62 [ \t\n]+	;
63 "//".*"\n"	;
64 
65 ^"#"[ \t]*{DECIMAL}[ \t]*"\"".*"\"".*"\n" {
66 			filename_lineno();
67 		}
68 
69 ^"#".*"\n"	{
70 			if (NULL == cmd_opt.preprocess) {
71 				itm_error(
72 				gettext("warning: "
73 					"preprocess may be required\n"));
74 			}
75 		}
76 
77 {DECIMAL}	{
78 			yylval.num = strtoul(yytext, (char **)NULL, 10);
79 			return (DECIMAL);
80 		}
81 
82 {OCTAL}		{	yylval.num = strtoul(yytext, (char **)NULL, 8);
83 			return (DECIMAL);
84 		}
85 
86 {HEXADECIMAL}	{	yylval.data = hexadecimal_data(yyleng - 2, yytext + 2);
87 			return (HEXADECIMAL);
88 		}
89 
90 {ITMNAME}	{	yylval.data = str_to_data(yyleng, yytext);
91 			return (ITMNAME);
92 		}
93 
94 {ATNAME}	{	return at_name_to_token(yytext);
95 		}
96 
97 {MAPTYPE_NAME}	{	yylval.num = at_name_to_token(yytext);
98 			yylval.data = name_data(yyleng, yytext);
99 			return (MAPTYPE_NAME);
100 
101 		}
102 
103 {NAME}		{	yylval.num = at_name_to_token(yytext);
104 			if (0 != yylval.num) {
105 				return (yylval.num);
106 			} else {
107 				yylval.data = name_data(yyleng, yytext);
108 				return (NAME);
109 			}
110 		}
111 
112 
113 "{"		{return (CBO);}
114 "}"		{return (CBC);}
115 "["		{return (SBO);}
116 "]"		{return (SBC);}
117 "("		{return (PO);}
118 ")"		{return (PC);}
119 ";"		{return (SC);}
120 ","		{return (COMMA);}
121 ":"		{return (COLON);}
122 "..."		{return (ELLIPSES);}
123 
124 
125 "="		{return (ASSIGN);}
126 "||"		{return (LOR);}
127 "&&"		{return (LAND);}
128 "|"		{return (OR);}
129 "^"		{return (XOR);}
130 "&"		{return (AND);}
131 "=="		{return (EQ);}
132 "!="		{return (NE);}
133 "<"		{return (LT);}
134 "<="		{return (LE);}
135 ">"		{return (GT);}
136 ">="		{return (GE);}
137 "<<"		{return (SHL);}
138 ">>"		{return (SHR);}
139 "+"		{return (PLUS);}
140 "-"		{return (MINUS);}
141 "*"		{return (MUL);}
142 "/"		{return (DIV);}
143 "%"		{return (MOD);}
144 "!"		{return (NOT);}
145 "~"		{return (NEG);}
146 
147 .		{	itm_error(
148 				gettext("Unrecognized token '%1$c' \n"),
149 				cmd_opt.my_name, yytext[0]);
150 			return (0);
151 		}
152 
153 %%
154 
155 /*
156  * lexinit - starts the Lexical Analyzer off in the right start condition
157  */
158 void
159 lexinit()
160 {
161 	BEGIN norm;
162 }
163 
164 /* does this really need to be here? */
165 int
166 yywrap()
167 {
168 	return (1);
169 }
170 
171 void
172 yyerror(char *s)
173 {
174 	extern int	yylineno;
175 
176 	itm_error(
177 		gettext("%1$s: file(%2$s) line(%3$d) last token(%4$s)\n"),
178 		s, itm_input_file, yylineno, yytext);
179 
180 	exit(ITMC_STATUS_BT);
181 }
182 
183 typedef struct {
184 	char	*name;
185 	int	token;
186 } at_name_token_t;
187 
188 /*
189  * NOT: This table must be sorted alphabetically.
190  */
191 static at_name_token_t at_table[] = {
192 	"@automatic",	MAPTYPE_AUTO,
193 	"@binary",	MAPTYPE_BINARY,
194 	"@between",	BETWEEN,
195 	"@condition",	CONDITION,
196 	"@default",	ITM_DEFAULT,
197 	"@dense",	MAPTYPE_DENSE,
198 	"@direction",	DIRECTION,
199 	"@discard",	DISCARD,
200 	"@else",	ITM_ELSE,
201 	"@error",	ERROR,
202 	"@escapeseq",	ESCAPESEQ,
203 	"@false",	ITM_FALSE,
204 	"@hash",	MAPTYPE_HASH,
205 	"@identical",	ITM_IDENTICAL,
206 	"@if",		ITM_IF,
207 	"@in",		ITM_IN,
208 	"@index",	MAPTYPE_INDEX,
209 	"@init",	ITM_INIT,
210 	"@input",	ITM_IN,
211 	"@inputsize",	ITM_INSIZE,
212 	"@map",		MAP,
213 	"@maptype",	MAPTYPE,
214 	"@no_change_copy",	ITM_IDENTICAL,
215 	"@nop",		NOP,
216 	"@operation",	OPERATION,
217 	"@out",		ITM_OUT,
218 	"@output",	ITM_OUT,
219 	"@output_byte_length",	RESULTLEN,
220 	"@outputsize",	ITM_OUTSIZE,
221 	"@printchr",	PRINTCHR,
222 	"@printhd",	PRINTHD,
223 	"@printint",	PRINTINT,
224 	"@reset",	RESET,
225 	"@resultlen",	RESULTLEN,
226 	"@return",	RETURN,
227 	"@true",	ITM_TRUE,
228 	"automatic",	MAPTYPE_AUTO,
229 	"between",	BETWEEN,
230 	"binary",	MAPTYPE_BINARY,
231 	"break",	BREAK,
232 	"condition",	CONDITION,
233 	"default",	ITM_DEFAULT,
234 	"dense",	MAPTYPE_DENSE,
235 	"direction",	DIRECTION,
236 	"discard",	DISCARD,
237 	"else",		ITM_ELSE,
238 	"error",	ERROR,
239 	"escapeseq",	ESCAPESEQ,
240 	"false",	ITM_FALSE,
241 	"hash",		MAPTYPE_HASH,
242 	"if",		ITM_IF,
243 	"index",	MAPTYPE_INDEX,
244 	"init",		ITM_INIT,
245 	"input",	ITM_IN,
246 	"inputsize",	ITM_INSIZE,
247 	"map",		MAP,
248 	"maptype",	MAPTYPE,
249 	"no_change_copy",	ITM_IDENTICAL,
250 	"nop",		NOP,
251 	"operation",	OPERATION,
252 	"output",	ITM_OUT,
253 	"output_byte_length",	RESULTLEN,
254 	"outputsize",	ITM_OUTSIZE,
255 	"printchr",	PRINTCHR,
256 	"printhd",	PRINTHD,
257 	"printint",	PRINTINT,
258 	"reset",	RESET,
259 	"return",	RETURN,
260 	"true",		ITM_TRUE,
261 };
262 
263 int
264 at_name_to_token(char *s)
265 {
266 	int	high;
267 	int	mid;
268 	int	low;
269 	int	result;
270 
271 	TRACE_MESSAGE('l', ("at_name_to_token: %s", s));
272 	for (low = 0, high = (sizeof (at_table) /
273 				sizeof (at_name_token_t));
274 	    low < high; /* NOP */) {
275 		mid = (low + high) / 2;
276 		result = strcmp(s, at_table[mid].name);
277 		if (result < 0) {
278 			high = mid;
279 		} else if (0 < result) {
280 			low = mid + 1;
281 		} else { /* 0 == result */
282 			TRACE_MESSAGE('l', (" %d\n", at_table[mid].token));
283 			return (at_table[mid].token);
284 		}
285 	}
286 	TRACE_MESSAGE('l', (" - not found\n"));
287 	return (0);
288 }
289 
290 static itm_data_t *
291 hexadecimal_data(int seqsize, char *seq)
292 {
293 	itm_data_t	*data;
294 	char		*binary;
295 	int		i, j;
296 	int		val;
297 	int		high;
298 	int		low;
299 	int		size;
300 
301 	/* size is assured to be multiple of 2 */
302 	assert(seqsize != 0);
303 	size = seqsize + 1;
304 	size /= 2;
305 	if (size > MAXSEQUENCE) {
306 		itm_error(
307 		gettext(" specified sequence must be less than %$1d\n"),
308 		MAXSEQUENCE);
309 		return (NULL);
310 	}
311 	binary = malloc_vital(size);
312 	i = j = 0;
313 	if (seqsize % 2 != 0) {
314 		low =  *(seq);
315 		if (('0' <= low) && (low <= '9')) {
316 			val = (low - '0');
317 		} else if (('a' <= low) && (low <= 'f')) {
318 			val = (low - 'a' + 10);
319 		} else if (('A' <= low) && (low <= 'F')) {
320 			val = (low - 'A' + 10);
321 		}
322 		*(binary + i) = val;
323 		i++;
324 		j++;
325 	}
326 	for (/* NOP */; i < size; i++, j += 2) {
327 		high = *(seq + j);
328 		low =  *(seq + j + 1);
329 		if (('0' <= high) && (high <= '9')) {
330 			val = ((high - '0') << 4);
331 		} else if (('a' <= high) && (high <= 'f')) {
332 			val = ((high - 'a' + 10) << 4);
333 		} else if (('A' <= high) && (high <= 'F')) {
334 			val = ((high - 'A' + 10) << 4);
335 		}
336 		if (('0' <= low) && (low <= '9')) {
337 			val |= (low - '0');
338 		} else if (('a' <= low) && (low <= 'f')) {
339 			val |= (low - 'a' + 10);
340 		} else if (('A' <= low) && (low <= 'F')) {
341 			val |= (low - 'A' + 10);
342 		}
343 		*(binary + i) = val;
344 	}
345 
346 	data = malloc_vital(sizeof (itm_data_t));
347 
348 	data->size = size;
349 	if (size <= sizeof (data->place)) {
350 		(void) memmove(&(data->place), binary, size);
351 		free(binary);
352 	} else {
353 		data->place.itm_ptr = (itm_place2_t)binary;
354 	}
355 
356 	return (data);
357 }
358 
359 static itm_data_t *
360 name_data(int size, char *seq)
361 {
362 	itm_data_t *data;
363 
364 
365 	if (size > MAXNAMELENGTH) {
366 		itm_error(gettext("the length(%d) exceed limitation(%d)"),
367 		size, MAXNAMELENGTH);
368 		exit(ITMC_STATUS_BT2);
369 	}
370 	data = malloc_vital(sizeof (itm_data_t));
371 
372 	data->size = size;
373 	if (size <= sizeof (data->place)) {
374 		(void) memmove(&(data->place), seq, size);
375 	} else {
376 		data->place.itm_ptr = (itm_place2_t)malloc_vital(size);
377 		(void) memmove((char *)(data->place.itm_ptr), seq, size);
378 	}
379 
380 	return (data);
381 }
382 
383 static void
384 filename_lineno(void)
385 {
386 	static char	*re;
387 	static char	restr[] =
388 			"^#[ \t]*\\([0-9]\\{1,\\}\\)[ \t]*\"\\(.*\\)\".*";
389 	int		match;
390 	extern char	*braslist[];
391 	extern char	*braelist[];
392 	static char	*filename;
393 	int		len;
394 	int		lineno;
395 
396 	if (NULL == re) {
397 		re = compile(restr, NULL, NULL);
398 		if (NULL == re) {
399 			itm_error(
400 				gettext("REGEXP compile error\n"));
401 			exit(ITMC_STATUS_BT);
402 		}
403 	}
404 	match = step(yytext, re);
405 	if (0 != match) {
406 		lineno = atoi(braslist[0]);
407 		free(filename);
408 		len = braelist[1] - braslist[1];
409 		filename = malloc_vital(len + 1);
410 		(void) memcpy(filename, braslist[1], len);
411 		*(filename + len) = '\0';
412 		itm_input_file = filename;
413 		yylineno = lineno;
414 	}
415 }
416