xref: /illumos-gate/usr/src/cmd/msgfmt/gnu_handle.c (revision 60405de4d8688d96dd05157c28db3ade5c9bc234)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #pragma ident	"%Z%%M%	%I%	%E% SMI"
27 
28 #include "gnu_msgfmt.h"
29 
30 static int	next_entry_is_fuzzy = 0;
31 static int	next_entry_is_c_format = 0;
32 static struct catalog	*cur_catalog = NULL;
33 static char	*cur_mo = NULL;
34 
35 FILE	*fp;
36 iconv_t	cd = (iconv_t)-1;
37 struct catalog	*catalog_head = NULL;
38 int	cur_po_index = 0;
39 
40 static size_t
41 search_alias(char **paddr, size_t size, const char *variant)
42 {
43 	char	*addr = *paddr;
44 	char 	*p, *sp, *q;
45 	size_t	var_len, can_len;
46 
47 	var_len = strlen(variant);
48 	p = addr;
49 	q = addr + size;
50 	while (q > p) {
51 		if (*p == '#') {
52 			/*
53 			 * Line beginning with '#' is a comment
54 			 */
55 			p++;
56 			while ((q > p) && (*p++ != '\n'))
57 				;
58 			continue;
59 		}
60 		/* skip leading spaces */
61 		while ((q > p) &&
62 		    ((*p == ' ') || (*p == '\t')))
63 			p++;
64 		if (q <= p)
65 			break;
66 		sp = p;
67 		while ((q > p) && (*p != ' ') &&
68 		    (*p != '\t') && (*p != '\n'))
69 			p++;
70 		if (q <= p) {
71 			/* invalid entry */
72 			break;
73 		}
74 		if (*p == '\n') {
75 			/* invalid entry */
76 			p++;
77 			continue;
78 		}
79 
80 		if (((p - sp) != var_len) ||
81 		    ((strncmp(sp, variant, var_len) != 0) &&
82 		    (strncasecmp(sp, variant, var_len) != 0))) {
83 			/*
84 			 * didn't match
85 			 */
86 
87 			/* skip remaining chars in this line */
88 			p++;
89 			while ((q > p) && (*p++ != '\n'))
90 				;
91 			continue;
92 		}
93 
94 		/* matching entry found */
95 
96 		/* skip spaces */
97 		while ((q > p) &&
98 		    ((*p == ' ') || (*p == '\t')))
99 			p++;
100 		if (q <= p)
101 			break;
102 		sp = p;
103 		while ((q > p) && (*p != ' ') &&
104 		    (*p != '\t') && (*p != '\n'))
105 			p++;
106 		can_len = p - sp;
107 		if (can_len == 0) {
108 			while ((q > p) && (*p++ != '\n'))
109 				;
110 			continue;
111 		}
112 		*paddr = sp;
113 		return (can_len);
114 	}
115 	return (0);
116 }
117 
118 /*
119  * Checks if the specified charset is equivalent to UTF-8.
120  * If it's equivalent to UTF-8, returns 1; Otherwise, returns 0.
121  */
122 static int
123 check_utf8(const char *charset)
124 {
125 	int	fd;
126 	struct stat64	statbuf;
127 	caddr_t	addr;
128 	size_t	buflen, charset_len, utf8_len;
129 	char	*c_charset, *c_utf8, *p;
130 
131 	if (strcmp(charset, DEST_CHARSET) == 0)
132 		return (1);
133 
134 	fd = open(_ENCODING_ALIAS_PATH, O_RDONLY);
135 	if (fd == -1) {
136 		/* no alias file found */
137 		return (0);
138 	}
139 	if (fstat64(fd, &statbuf) == -1) {
140 		(void) close(fd);
141 		return (0);
142 	}
143 	buflen = (size_t)statbuf.st_size;
144 	addr = mmap(NULL, buflen, PROT_READ, MAP_SHARED, fd, 0);
145 	(void) close(fd);
146 	if (addr == MAP_FAILED) {
147 		warning("mmap() for %s failed.", _ENCODING_ALIAS_PATH);
148 		return (0);
149 	}
150 	p = (char *)addr;
151 	charset_len = search_alias(&p, buflen, charset);
152 	if (charset_len) {
153 		c_charset = alloca(charset_len + 1);
154 		(void) memcpy(c_charset, p, charset_len);
155 		c_charset[charset_len] = '\0';
156 	} else {
157 		c_charset = (char *)charset;
158 	}
159 	p = (char *)addr;
160 	utf8_len = search_alias(&p, buflen, DEST_CHARSET);
161 	if (utf8_len) {
162 		c_utf8 = alloca(utf8_len + 1);
163 		(void) memcpy(c_utf8, p, utf8_len);
164 		c_utf8[utf8_len] = '\0';
165 	} else {
166 		c_utf8 = DEST_CHARSET;
167 	}
168 	(void) munmap(addr, buflen);
169 	if (charset_len == 0 && utf8_len == 0) {
170 		/*
171 		 * Entry for neither charset nor utf8 found
172 		 */
173 		return (0);
174 	}
175 
176 	if (strcmp(c_charset, c_utf8) == 0)
177 		return (1);
178 	else
179 		return (0);
180 }
181 
182 static void
183 conv_init(const char *charset)
184 {
185 	if (charset == NULL) {
186 		/*
187 		 * No conversion
188 		 */
189 		cd = (iconv_t)-1;
190 		return;
191 	}
192 	if (check_utf8(charset)) {
193 		/*
194 		 * Charset is UTF-8.
195 		 * No conversion is required.
196 		 */
197 		cd = (iconv_t)-1;
198 		return;
199 	}
200 	cd = iconv_open(DEST_CHARSET, charset);
201 	if (cd == (iconv_t)-1) {
202 		/*
203 		 * No such a conversion
204 		 */
205 		warning(gettext(WARN_NOCONV),
206 			cur_line, cur_po, charset, DEST_CHARSET);
207 		return;
208 	}
209 }
210 
211 void
212 clear_state(void)
213 {
214 	next_entry_is_fuzzy = 0;
215 	next_entry_is_c_format = 0;
216 }
217 
218 void
219 handle_domain(char *domainname)
220 {
221 	if (outfile) {
222 		/*
223 		 * outfile has been specified by -o option
224 		 * ignore all domain directives
225 		 */
226 		if (verbose_flag) {
227 			diag(gettext(DIAG_IGNORE_DOMAIN),
228 				cur_line, cur_po, domainname);
229 		}
230 		free(domainname);
231 		return;
232 	}
233 
234 	if (strict_flag) {
235 		/*
236 		 * add ".mo" to the domain
237 		 */
238 		char	*tmp;
239 		tmp = Xrealloc(domainname, strlen(domainname) + 3 + 1);
240 		(void) strcat(tmp, ".mo");
241 		domainname = tmp;
242 	}
243 	catalog_init(domainname);
244 	free(domainname);
245 }
246 
247 void
248 catalog_init(const char *filename)
249 {
250 	struct catalog	*p;
251 
252 	if (!catalog_head) {
253 		p = Xcalloc(1, sizeof (struct catalog));
254 		p->fname = Xstrdup(filename);
255 		p->msg_size = DEF_MSG_NUM;
256 		p->nmsg = 0;
257 		p->msg = Xcalloc(p->msg_size, sizeof (struct messages));
258 		p->thash_size = find_prime(DEF_MSG_NUM);
259 		p->thash = Xcalloc(p->thash_size, sizeof (unsigned int));
260 		catalog_head = p;
261 	} else {
262 		p = catalog_head;
263 		for (; ; ) {
264 			struct catalog	*tmp;
265 			if (strcmp(p->fname, filename) == 0) {
266 				/* already registered */
267 				break;
268 			}
269 			if (p->next) {
270 				p = p->next;
271 				continue;
272 			}
273 			/*
274 			 * this domain hasn't been registered
275 			 */
276 			tmp = Xcalloc(1, sizeof (struct catalog));
277 			tmp->fname = Xstrdup(filename);
278 			tmp->msg_size = DEF_MSG_NUM;
279 			tmp->nmsg = 0;
280 			tmp->msg = Xcalloc(tmp->msg_size,
281 			    sizeof (struct messages));
282 			tmp->thash_size = find_prime(DEF_MSG_NUM);
283 			tmp->thash = Xcalloc(tmp->thash_size,
284 			    sizeof (unsigned int));
285 			p->next = tmp;
286 			p = tmp;
287 			break;
288 		}
289 	}
290 	cur_catalog = p;
291 	cur_mo = p->fname;
292 }
293 
294 
295 void
296 handle_comment(char *comment)
297 {
298 	char	*p;
299 
300 	p = comment;
301 
302 	if (*p != ',') {
303 		/*
304 		 * This comment is just informative only.
305 		 */
306 		free(comment);
307 		return;
308 	}
309 	/*
310 	 * Checks "fuzzy", "c-format", and "no-c-format"
311 	 */
312 	p++;
313 	if (strstr(p, "fuzzy") != NULL) {
314 		next_entry_is_fuzzy = 1;
315 	}
316 	if (strstr(p, "no-c-format") != NULL) {
317 		next_entry_is_c_format = 0;
318 	} else if (strstr(p, "c-format") != NULL) {
319 		next_entry_is_c_format = 1;
320 	}
321 
322 	free(comment);
323 }
324 
325 void
326 handle_message(struct entry *id, struct entry *str)
327 {
328 	char	*charset, *nplurals, *tmp, *p;
329 	struct messages	*msg, *dupmsg;
330 	size_t	len;
331 	unsigned int	hash_val;
332 	unsigned int	nmsg, n, thash_idx;
333 
334 	if (cur_mo == NULL) {
335 		/*
336 		 * output file hasn't been specified, nor
337 		 * no domain directive found
338 		 */
339 		char	*default_domain;
340 
341 		default_domain = strict_flag ? DEFAULT_DOMAIN_MO :
342 		    DEFAULT_DOMAIN;
343 		catalog_init(default_domain);
344 	}
345 
346 	/*
347 	 * cur_catalog should be valid, at this point
348 	 */
349 
350 	hash_val = hashpjw(id->str);
351 	dupmsg = search_msg(cur_catalog, id->str, hash_val);
352 
353 	if (dupmsg) {
354 		if ((dupmsg->str_len == str->len) &&
355 		    (memcmp(dupmsg->str, str->str, str->len) == 0)) {
356 			/* totally same entry */
357 			if (verbose_flag) {
358 				warning(gettext(WARN_DUP_ENTRIES),
359 				    dupmsg->num, po_names[dupmsg->po],
360 				    id->num, cur_po);
361 			}
362 			free(id->str);
363 			if (id->pos)
364 				free(id->pos);
365 			free(str->str);
366 			if (str->pos)
367 				free(str->pos);
368 			return;
369 		}
370 		/* duplicate msgid */
371 		if (verbose_flag) {
372 			diag(gettext(ERR_DUP_ENTRIES),
373 			    dupmsg->num, po_names[dupmsg->po],
374 			    id->num, cur_po);
375 			po_error++;
376 		}
377 		/* ignore this etnry */
378 		free(id->str);
379 		if (id->pos)
380 			free(id->pos);
381 		free(str->str);
382 		if (str->pos)
383 			free(str->pos);
384 		return;
385 	}
386 
387 	if (next_entry_is_fuzzy) {
388 		/* fuzzy entry */
389 		cur_catalog->fnum++;
390 		if (!fuzzy_flag) {
391 			/* ignore this entry */
392 			free(id->str);
393 			if (id->pos)
394 				free(id->pos);
395 			free(str->str);
396 			if (str->pos)
397 				free(str->pos);
398 			return;
399 		}
400 	}
401 
402 	if (str->len == str->no) {
403 		/* this entry is not translated */
404 		cur_catalog->unum++;
405 		free(id->str);
406 		if (id->pos)
407 			free(id->pos);
408 		free(str->str);
409 		if (str->pos)
410 			free(str->pos);
411 		return;
412 	}
413 
414 	/* Checks if this is the header entry */
415 	if ((id->no == 1) && (id->len == 1)) {
416 		/*
417 		 * Header entry
418 		 */
419 		cur_catalog->header++;
420 
421 		/*
422 		 * Need to extract the charset information
423 		 */
424 		charset = strstr(str->str, CHARSET_STR);
425 		if (charset == NULL) {
426 			/* no charset information */
427 			warning(gettext(WARN_NOCHARSET),
428 			    id->num, cur_po, str->num);
429 			conv_init(NULL);
430 		} else {
431 			charset += CHARSET_LEN;
432 			p = strpbrk(charset, " \t\n");
433 			if (p != NULL) {
434 				/* p points to a space, tab or new line char */
435 				len = p - charset;
436 			} else {
437 				/* not found */
438 				len = strlen(charset);
439 			}
440 			tmp = Xmalloc(len + 1);
441 			(void) memcpy(tmp, charset, len);
442 			*(tmp + len) = '\0';
443 			charset = tmp;
444 			conv_init(charset);
445 			free(charset);
446 		}
447 		nplurals = strstr(str->str, NPLURALS_STR);
448 		if (nplurals == NULL) {
449 			cur_catalog->nplurals = 0;
450 		} else {
451 			unsigned int	num;
452 			nplurals += NPLURALS_LEN;
453 			p = nplurals;
454 			num = 0;
455 			while (isdigit((unsigned char)*p)) {
456 				num = num * 10 + *p++ - '0';
457 			}
458 			cur_catalog->nplurals = num;
459 		}
460 	}
461 
462 	if (verbose_flag)
463 		check_format(id, str, next_entry_is_c_format);
464 
465 	if (id->pos)
466 		free(id->pos);
467 	if (str->pos)
468 		free(str->pos);
469 
470 	msg = cur_catalog->msg;
471 	nmsg = cur_catalog->nmsg;
472 
473 	msg[nmsg].po = cur_po_index;
474 	msg[nmsg].num = id->num;
475 	msg[nmsg].id = id->str;
476 	msg[nmsg].id_len = id->len;
477 	msg[nmsg].str = str->str;
478 	msg[nmsg].str_len = str->len;
479 	msg[nmsg].hash = hash_val;
480 
481 	thash_idx = get_hash_index(cur_catalog->thash,
482 	    hash_val, cur_catalog->thash_size);
483 	cur_catalog->thash[thash_idx] = nmsg + 1;
484 	cur_catalog->nmsg++;
485 
486 	if (cur_catalog->nmsg >= cur_catalog->msg_size) {
487 		/* no vacancy in message array */
488 		cur_catalog->msg_size += DEF_MSG_NUM;
489 		cur_catalog->msg = Xrealloc(cur_catalog->msg,
490 		    cur_catalog->msg_size * sizeof (struct messages));
491 
492 		cur_catalog->thash_size =
493 			find_prime(cur_catalog->msg_size);
494 		free(cur_catalog->thash);
495 		cur_catalog->thash = Xcalloc(cur_catalog->thash_size,
496 		    sizeof (unsigned int));
497 
498 		for (n = 0; n < cur_catalog->nmsg; n++) {
499 			thash_idx = get_hash_index(cur_catalog->thash,
500 			    cur_catalog->msg[n].hash,
501 			    cur_catalog->thash_size);
502 			cur_catalog->thash[thash_idx] = n + 1;
503 		}
504 	}
505 }
506 
507 void
508 po_init(const char *file)
509 {
510 	char	*filename;
511 
512 	if (!inputdir) {
513 		filename = Xstrdup(file);
514 	} else {
515 		size_t	dirlen, filelen, len;
516 
517 		dirlen = strlen(inputdir);
518 		filelen = strlen(file);
519 		len = dirlen + 1 + filelen + 1;
520 		filename = Xmalloc(len);
521 		(void) memcpy(filename, inputdir, dirlen);
522 		*(filename + dirlen) = '/';
523 		(void) memcpy(filename + dirlen + 1, file, filelen);
524 		*(filename + dirlen + 1 + filelen) = '\0';
525 	}
526 
527 	fp = fopen(filename, "r");
528 	if (fp == NULL) {
529 		error(gettext(ERR_OPEN_FAILED), filename);
530 		/* NOTREACHED */
531 	}
532 
533 	po_names[cur_po_index] = filename;
534 	cur_line = 1;
535 	cd = (iconv_t)-1;
536 	if (!outfile)
537 		cur_mo = NULL;
538 }
539 
540 void
541 po_fini(void)
542 {
543 	cur_po_index++;
544 	(void) fclose(fp);
545 	if (cd != (iconv_t)-1)
546 		(void) iconv_close(cd);
547 }
548