xref: /illumos-gate/usr/src/cmd/msgfmt/gnu_handle.c (revision 2bda830b1b393f809c54b105ec8ab418c3e505a1)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License, Version 1.0 only
6  * (the "License").  You may not use this file except in compliance
7  * with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or http://www.opensolaris.org/os/licensing.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 /*
23  * Copyright 2004 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #pragma ident	"%Z%%M%	%I%	%E% SMI"
28 
29 #include "gnu_msgfmt.h"
30 
31 static int	next_entry_is_fuzzy = 0;
32 static int	next_entry_is_c_format = 0;
33 static struct catalog	*cur_catalog = NULL;
34 static char	*cur_mo = NULL;
35 
36 FILE	*fp;
37 iconv_t	cd = (iconv_t)-1;
38 struct catalog	*catalog_head = NULL;
39 int	cur_po_index = 0;
40 
41 static size_t
42 search_alias(char **paddr, size_t size, const char *variant)
43 {
44 	char	*addr = *paddr;
45 	char 	*p, *sp, *q;
46 	size_t	var_len, can_len;
47 
48 	var_len = strlen(variant);
49 	p = addr;
50 	q = addr + size;
51 	while (q > p) {
52 		if (*p == '#') {
53 			/*
54 			 * Line beginning with '#' is a comment
55 			 */
56 			p++;
57 			while ((q > p) && (*p++ != '\n'))
58 				;
59 			continue;
60 		}
61 		/* skip leading spaces */
62 		while ((q > p) &&
63 		    ((*p == ' ') || (*p == '\t')))
64 			p++;
65 		if (q <= p)
66 			break;
67 		sp = p;
68 		while ((q > p) && (*p != ' ') &&
69 		    (*p != '\t') && (*p != '\n'))
70 			p++;
71 		if (q <= p) {
72 			/* invalid entry */
73 			break;
74 		}
75 		if (*p == '\n') {
76 			/* invalid entry */
77 			p++;
78 			continue;
79 		}
80 
81 		if (((p - sp) != var_len) ||
82 		    ((strncmp(sp, variant, var_len) != 0) &&
83 		    (strncasecmp(sp, variant, var_len) != 0))) {
84 			/*
85 			 * didn't match
86 			 */
87 
88 			/* skip remaining chars in this line */
89 			p++;
90 			while ((q > p) && (*p++ != '\n'))
91 				;
92 			continue;
93 		}
94 
95 		/* matching entry found */
96 
97 		/* skip spaces */
98 		while ((q > p) &&
99 		    ((*p == ' ') || (*p == '\t')))
100 			p++;
101 		if (q <= p)
102 			break;
103 		sp = p;
104 		while ((q > p) && (*p != ' ') &&
105 		    (*p != '\t') && (*p != '\n'))
106 			p++;
107 		can_len = p - sp;
108 		if (can_len == 0) {
109 			while ((q > p) && (*p++ != '\n'))
110 				;
111 			continue;
112 		}
113 		*paddr = sp;
114 		return (can_len);
115 	}
116 	return (0);
117 }
118 
119 /*
120  * Checks if the specified charset is equivalent to UTF-8.
121  * If it's equivalent to UTF-8, returns 1; Otherwise, returns 0.
122  */
123 static int
124 check_utf8(const char *charset)
125 {
126 	int	fd;
127 	struct stat64	statbuf;
128 	caddr_t	addr;
129 	size_t	buflen, charset_len, utf8_len;
130 	char	*c_charset, *c_utf8, *p;
131 
132 	if (strcmp(charset, DEST_CHARSET) == 0)
133 		return (1);
134 
135 	fd = open(_ENCODING_ALIAS_PATH, O_RDONLY);
136 	if (fd == -1) {
137 		/* no alias file found */
138 		return (0);
139 	}
140 	if (fstat64(fd, &statbuf) == -1) {
141 		(void) close(fd);
142 		return (0);
143 	}
144 	buflen = (size_t)statbuf.st_size;
145 	addr = mmap(NULL, buflen, PROT_READ, MAP_SHARED, fd, 0);
146 	(void) close(fd);
147 	if (addr == MAP_FAILED) {
148 		warning("mmap() for %s failed.", _ENCODING_ALIAS_PATH);
149 		return (0);
150 	}
151 	p = (char *)addr;
152 	charset_len = search_alias(&p, buflen, charset);
153 	if (charset_len) {
154 		c_charset = alloca(charset_len + 1);
155 		(void) memcpy(c_charset, p, charset_len);
156 		c_charset[charset_len] = '\0';
157 	} else {
158 		c_charset = (char *)charset;
159 	}
160 	p = (char *)addr;
161 	utf8_len = search_alias(&p, buflen, DEST_CHARSET);
162 	if (utf8_len) {
163 		c_utf8 = alloca(utf8_len + 1);
164 		(void) memcpy(c_utf8, p, utf8_len);
165 		c_utf8[utf8_len] = '\0';
166 	} else {
167 		c_utf8 = DEST_CHARSET;
168 	}
169 	(void) munmap(addr, buflen);
170 	if (charset_len == 0 && utf8_len == 0) {
171 		/*
172 		 * Entry for neither charset nor utf8 found
173 		 */
174 		return (0);
175 	}
176 
177 	if (strcmp(c_charset, c_utf8) == 0)
178 		return (1);
179 	else
180 		return (0);
181 }
182 
183 static void
184 conv_init(const char *charset)
185 {
186 	if (charset == NULL) {
187 		/*
188 		 * No conversion
189 		 */
190 		cd = (iconv_t)-1;
191 		return;
192 	}
193 	if (check_utf8(charset)) {
194 		/*
195 		 * Charset is UTF-8.
196 		 * No conversion is required.
197 		 */
198 		cd = (iconv_t)-1;
199 		return;
200 	}
201 	cd = iconv_open(DEST_CHARSET, charset);
202 	if (cd == (iconv_t)-1) {
203 		/*
204 		 * No such a conversion
205 		 */
206 		warning(gettext(WARN_NOCONV),
207 			cur_line, cur_po, charset, DEST_CHARSET);
208 		return;
209 	}
210 }
211 
212 void
213 clear_state(void)
214 {
215 	next_entry_is_fuzzy = 0;
216 	next_entry_is_c_format = 0;
217 }
218 
219 void
220 handle_domain(char *domainname)
221 {
222 	if (outfile) {
223 		/*
224 		 * outfile has been specified by -o option
225 		 * ignore all domain directives
226 		 */
227 		if (verbose_flag) {
228 			diag(gettext(DIAG_IGNORE_DOMAIN),
229 				cur_line, cur_po, domainname);
230 		}
231 		free(domainname);
232 		return;
233 	}
234 
235 	if (strict_flag) {
236 		/*
237 		 * add ".mo" to the domain
238 		 */
239 		char	*tmp;
240 		tmp = Xrealloc(domainname, strlen(domainname) + 3 + 1);
241 		(void) strcat(tmp, ".mo");
242 		domainname = tmp;
243 	}
244 	catalog_init(domainname);
245 	free(domainname);
246 }
247 
248 void
249 catalog_init(const char *filename)
250 {
251 	struct catalog	*p;
252 
253 	if (!catalog_head) {
254 		p = Xcalloc(1, sizeof (struct catalog));
255 		p->fname = Xstrdup(filename);
256 		p->msg_size = DEF_MSG_NUM;
257 		p->nmsg = 0;
258 		p->msg = Xcalloc(p->msg_size, sizeof (struct messages));
259 		p->thash_size = find_prime(DEF_MSG_NUM);
260 		p->thash = Xcalloc(p->thash_size, sizeof (unsigned int));
261 		catalog_head = p;
262 	} else {
263 		p = catalog_head;
264 		for (; ; ) {
265 			struct catalog	*tmp;
266 			if (strcmp(p->fname, filename) == 0) {
267 				/* already registered */
268 				break;
269 			}
270 			if (p->next) {
271 				p = p->next;
272 				continue;
273 			}
274 			/*
275 			 * this domain hasn't been registered
276 			 */
277 			tmp = Xcalloc(1, sizeof (struct catalog));
278 			tmp->fname = Xstrdup(filename);
279 			tmp->msg_size = DEF_MSG_NUM;
280 			tmp->nmsg = 0;
281 			tmp->msg = Xcalloc(tmp->msg_size,
282 			    sizeof (struct messages));
283 			tmp->thash_size = find_prime(DEF_MSG_NUM);
284 			tmp->thash = Xcalloc(tmp->thash_size,
285 			    sizeof (unsigned int));
286 			p->next = tmp;
287 			p = tmp;
288 			break;
289 		}
290 	}
291 	cur_catalog = p;
292 	cur_mo = p->fname;
293 }
294 
295 
296 void
297 handle_comment(char *comment)
298 {
299 	char	*p;
300 
301 	p = comment;
302 
303 	if (*p != ',') {
304 		/*
305 		 * This comment is just informative only.
306 		 */
307 		free(comment);
308 		return;
309 	}
310 	/*
311 	 * Checks "fuzzy", "c-format", and "no-c-format"
312 	 */
313 	p++;
314 	if (strstr(p, "fuzzy") != NULL) {
315 		next_entry_is_fuzzy = 1;
316 	}
317 	if (strstr(p, "no-c-format") != NULL) {
318 		next_entry_is_c_format = 0;
319 	} else if (strstr(p, "c-format") != NULL) {
320 		next_entry_is_c_format = 1;
321 	}
322 
323 	free(comment);
324 }
325 
326 void
327 handle_message(struct entry *id, struct entry *str)
328 {
329 	char	*charset, *nplurals, *tmp, *p;
330 	struct messages	*msg, *dupmsg;
331 	size_t	len;
332 	unsigned int	hash_val;
333 	unsigned int	nmsg, n, thash_idx;
334 
335 	if (cur_mo == NULL) {
336 		/*
337 		 * output file hasn't been specified, nor
338 		 * no domain directive found
339 		 */
340 		char	*default_domain;
341 
342 		default_domain = strict_flag ? DEFAULT_DOMAIN_MO :
343 		    DEFAULT_DOMAIN;
344 		catalog_init(default_domain);
345 	}
346 
347 	/*
348 	 * cur_catalog should be valid, at this point
349 	 */
350 
351 	hash_val = hashpjw(id->str);
352 	dupmsg = search_msg(cur_catalog, id->str, hash_val);
353 
354 	if (dupmsg) {
355 		if ((dupmsg->str_len == str->len) &&
356 		    (memcmp(dupmsg->str, str->str, str->len) == 0)) {
357 			/* totally same entry */
358 			if (verbose_flag) {
359 				warning(gettext(WARN_DUP_ENTRIES),
360 				    dupmsg->num, po_names[dupmsg->po],
361 				    id->num, cur_po);
362 			}
363 			free(id->str);
364 			if (id->pos)
365 				free(id->pos);
366 			free(str->str);
367 			if (str->pos)
368 				free(str->pos);
369 			return;
370 		}
371 		/* duplicate msgid */
372 		if (verbose_flag) {
373 			diag(gettext(ERR_DUP_ENTRIES),
374 			    dupmsg->num, po_names[dupmsg->po],
375 			    id->num, cur_po);
376 			po_error++;
377 		}
378 		/* ignore this etnry */
379 		free(id->str);
380 		if (id->pos)
381 			free(id->pos);
382 		free(str->str);
383 		if (str->pos)
384 			free(str->pos);
385 		return;
386 	}
387 
388 	if (next_entry_is_fuzzy) {
389 		/* fuzzy entry */
390 		cur_catalog->fnum++;
391 		if (!fuzzy_flag) {
392 			/* ignore this entry */
393 			free(id->str);
394 			if (id->pos)
395 				free(id->pos);
396 			free(str->str);
397 			if (str->pos)
398 				free(str->pos);
399 			return;
400 		}
401 	}
402 
403 	if (str->len == str->no) {
404 		/* this entry is not translated */
405 		cur_catalog->unum++;
406 		free(id->str);
407 		if (id->pos)
408 			free(id->pos);
409 		free(str->str);
410 		if (str->pos)
411 			free(str->pos);
412 		return;
413 	}
414 
415 	/* Checks if this is the header entry */
416 	if ((id->no == 1) && (id->len == 1)) {
417 		/*
418 		 * Header entry
419 		 */
420 		cur_catalog->header++;
421 
422 		/*
423 		 * Need to extract the charset information
424 		 */
425 		charset = strstr(str->str, CHARSET_STR);
426 		if (charset == NULL) {
427 			/* no charset information */
428 			warning(gettext(WARN_NOCHARSET),
429 			    id->num, cur_po, str->num);
430 			conv_init(NULL);
431 		} else {
432 			charset += CHARSET_LEN;
433 			p = charset;
434 			while ((*p != ' ') && (*p != '\t') &&
435 			    (*p != '\n'))
436 				p++;
437 			len = p - charset;
438 			tmp = Xmalloc(len + 1);
439 			(void) memcpy(tmp, charset, len);
440 			*(tmp + len) = '\0';
441 			charset = tmp;
442 			conv_init(charset);
443 			free(charset);
444 		}
445 		nplurals = strstr(str->str, NPLURALS_STR);
446 		if (nplurals == NULL) {
447 			cur_catalog->nplurals = 0;
448 		} else {
449 			unsigned int	num;
450 			nplurals += NPLURALS_LEN;
451 			p = nplurals;
452 			num = 0;
453 			while (isdigit((unsigned char)*p)) {
454 				num = num * 10 + *p++ - '0';
455 			}
456 			cur_catalog->nplurals = num;
457 		}
458 	}
459 
460 	if (verbose_flag)
461 		check_format(id, str, next_entry_is_c_format);
462 
463 	if (id->pos)
464 		free(id->pos);
465 	if (str->pos)
466 		free(str->pos);
467 
468 	msg = cur_catalog->msg;
469 	nmsg = cur_catalog->nmsg;
470 
471 	msg[nmsg].po = cur_po_index;
472 	msg[nmsg].num = id->num;
473 	msg[nmsg].id = id->str;
474 	msg[nmsg].id_len = id->len;
475 	msg[nmsg].str = str->str;
476 	msg[nmsg].str_len = str->len;
477 	msg[nmsg].hash = hash_val;
478 
479 	thash_idx = get_hash_index(cur_catalog->thash,
480 	    hash_val, cur_catalog->thash_size);
481 	cur_catalog->thash[thash_idx] = nmsg + 1;
482 	cur_catalog->nmsg++;
483 
484 	if (cur_catalog->nmsg >= cur_catalog->msg_size) {
485 		/* no vacancy in message array */
486 		cur_catalog->msg_size += DEF_MSG_NUM;
487 		cur_catalog->msg = Xrealloc(cur_catalog->msg,
488 		    cur_catalog->msg_size * sizeof (struct messages));
489 
490 		cur_catalog->thash_size =
491 			find_prime(cur_catalog->msg_size);
492 		free(cur_catalog->thash);
493 		cur_catalog->thash = Xcalloc(cur_catalog->thash_size,
494 		    sizeof (unsigned int));
495 
496 		for (n = 0; n < cur_catalog->nmsg; n++) {
497 			thash_idx = get_hash_index(cur_catalog->thash,
498 			    cur_catalog->msg[n].hash,
499 			    cur_catalog->thash_size);
500 			cur_catalog->thash[thash_idx] = n + 1;
501 		}
502 	}
503 }
504 
505 void
506 po_init(const char *file)
507 {
508 	char	*filename;
509 
510 	if (!inputdir) {
511 		filename = Xstrdup(file);
512 	} else {
513 		size_t	dirlen, filelen, len;
514 
515 		dirlen = strlen(inputdir);
516 		filelen = strlen(file);
517 		len = dirlen + 1 + filelen + 1;
518 		filename = Xmalloc(len);
519 		(void) memcpy(filename, inputdir, dirlen);
520 		*(filename + dirlen) = '/';
521 		(void) memcpy(filename + dirlen + 1, file, filelen);
522 		*(filename + dirlen + 1 + filelen) = '\0';
523 	}
524 
525 	fp = fopen(filename, "r");
526 	if (fp == NULL) {
527 		error(gettext(ERR_OPEN_FAILED), filename);
528 		/* NOTREACHED */
529 	}
530 
531 	po_names[cur_po_index] = filename;
532 	cur_line = 1;
533 	cd = (iconv_t)-1;
534 	if (!outfile)
535 		cur_mo = NULL;
536 }
537 
538 void
539 po_fini(void)
540 {
541 	cur_po_index++;
542 	(void) fclose(fp);
543 	if (cd != (iconv_t)-1)
544 		(void) iconv_close(cd);
545 }
546