xref: /illumos-gate/usr/src/cmd/msgfmt/gnu_handle.c (revision 5c43f0bd385a568d23843a2fa79774668657d147)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #include "gnu_msgfmt.h"
27 
28 static int	next_entry_is_fuzzy = 0;
29 static int	next_entry_is_c_format = 0;
30 static struct catalog	*cur_catalog = NULL;
31 static char	*cur_mo = NULL;
32 
33 FILE	*fp;
34 iconv_t	cd = (iconv_t)-1;
35 struct catalog	*catalog_head = NULL;
36 int	cur_po_index = 0;
37 
38 static size_t
39 search_alias(char **paddr, size_t size, const char *variant)
40 {
41 	char	*addr = *paddr;
42 	char 	*p, *sp, *q;
43 	size_t	var_len, can_len;
44 
45 	var_len = strlen(variant);
46 	p = addr;
47 	q = addr + size;
48 	while (q > p) {
49 		if (*p == '#') {
50 			/*
51 			 * Line beginning with '#' is a comment
52 			 */
53 			p++;
54 			while ((q > p) && (*p++ != '\n'))
55 				;
56 			continue;
57 		}
58 		/* skip leading spaces */
59 		while ((q > p) &&
60 		    ((*p == ' ') || (*p == '\t')))
61 			p++;
62 		if (q <= p)
63 			break;
64 		sp = p;
65 		while ((q > p) && (*p != ' ') &&
66 		    (*p != '\t') && (*p != '\n'))
67 			p++;
68 		if (q <= p) {
69 			/* invalid entry */
70 			break;
71 		}
72 		if (*p == '\n') {
73 			/* invalid entry */
74 			p++;
75 			continue;
76 		}
77 
78 		if (((p - sp) != var_len) ||
79 		    ((strncmp(sp, variant, var_len) != 0) &&
80 		    (strncasecmp(sp, variant, var_len) != 0))) {
81 			/*
82 			 * didn't match
83 			 */
84 
85 			/* skip remaining chars in this line */
86 			p++;
87 			while ((q > p) && (*p++ != '\n'))
88 				;
89 			continue;
90 		}
91 
92 		/* matching entry found */
93 
94 		/* skip spaces */
95 		while ((q > p) &&
96 		    ((*p == ' ') || (*p == '\t')))
97 			p++;
98 		if (q <= p)
99 			break;
100 		sp = p;
101 		while ((q > p) && (*p != ' ') &&
102 		    (*p != '\t') && (*p != '\n'))
103 			p++;
104 		can_len = p - sp;
105 		if (can_len == 0) {
106 			while ((q > p) && (*p++ != '\n'))
107 				;
108 			continue;
109 		}
110 		*paddr = sp;
111 		return (can_len);
112 	}
113 	return (0);
114 }
115 
116 /*
117  * Checks if the specified charset is equivalent to UTF-8.
118  * If it's equivalent to UTF-8, returns 1; Otherwise, returns 0.
119  */
120 static int
121 check_utf8(const char *charset)
122 {
123 	int	fd;
124 	struct stat64	statbuf;
125 	caddr_t	addr;
126 	size_t	buflen, charset_len, utf8_len;
127 	char	*c_charset, *c_utf8, *p;
128 
129 	if (strcmp(charset, DEST_CHARSET) == 0)
130 		return (1);
131 
132 	fd = open(_ENCODING_ALIAS_PATH, O_RDONLY);
133 	if (fd == -1) {
134 		/* no alias file found */
135 		return (0);
136 	}
137 	if (fstat64(fd, &statbuf) == -1) {
138 		(void) close(fd);
139 		return (0);
140 	}
141 	buflen = (size_t)statbuf.st_size;
142 	addr = mmap(NULL, buflen, PROT_READ, MAP_SHARED, fd, 0);
143 	(void) close(fd);
144 	if (addr == MAP_FAILED) {
145 		warning("mmap() for %s failed.", _ENCODING_ALIAS_PATH);
146 		return (0);
147 	}
148 	p = (char *)addr;
149 	charset_len = search_alias(&p, buflen, charset);
150 	if (charset_len) {
151 		c_charset = alloca(charset_len + 1);
152 		(void) memcpy(c_charset, p, charset_len);
153 		c_charset[charset_len] = '\0';
154 	} else {
155 		c_charset = (char *)charset;
156 	}
157 	p = (char *)addr;
158 	utf8_len = search_alias(&p, buflen, DEST_CHARSET);
159 	if (utf8_len) {
160 		c_utf8 = alloca(utf8_len + 1);
161 		(void) memcpy(c_utf8, p, utf8_len);
162 		c_utf8[utf8_len] = '\0';
163 	} else {
164 		c_utf8 = DEST_CHARSET;
165 	}
166 	(void) munmap(addr, buflen);
167 	if (charset_len == 0 && utf8_len == 0) {
168 		/*
169 		 * Entry for neither charset nor utf8 found
170 		 */
171 		return (0);
172 	}
173 
174 	if (strcmp(c_charset, c_utf8) == 0)
175 		return (1);
176 	else
177 		return (0);
178 }
179 
180 static void
181 conv_init(const char *charset)
182 {
183 	if (charset == NULL) {
184 		/*
185 		 * No conversion
186 		 */
187 		cd = (iconv_t)-1;
188 		return;
189 	}
190 	if (check_utf8(charset)) {
191 		/*
192 		 * Charset is UTF-8.
193 		 * No conversion is required.
194 		 */
195 		cd = (iconv_t)-1;
196 		return;
197 	}
198 	cd = iconv_open(DEST_CHARSET, charset);
199 	if (cd == (iconv_t)-1) {
200 		/*
201 		 * No such a conversion
202 		 */
203 		warning(gettext(WARN_NOCONV),
204 			cur_line, cur_po, charset, DEST_CHARSET);
205 		return;
206 	}
207 }
208 
209 void
210 clear_state(void)
211 {
212 	next_entry_is_fuzzy = 0;
213 	next_entry_is_c_format = 0;
214 }
215 
216 void
217 handle_domain(char *domainname)
218 {
219 	if (outfile) {
220 		/*
221 		 * outfile has been specified by -o option
222 		 * ignore all domain directives
223 		 */
224 		if (verbose_flag) {
225 			diag(gettext(DIAG_IGNORE_DOMAIN),
226 				cur_line, cur_po, domainname);
227 		}
228 		free(domainname);
229 		return;
230 	}
231 
232 	if (strict_flag) {
233 		/*
234 		 * add ".mo" to the domain
235 		 */
236 		char	*tmp;
237 		tmp = Xrealloc(domainname, strlen(domainname) + 3 + 1);
238 		(void) strcat(tmp, ".mo");
239 		domainname = tmp;
240 	}
241 	catalog_init(domainname);
242 	free(domainname);
243 }
244 
245 void
246 catalog_init(const char *filename)
247 {
248 	struct catalog	*p;
249 
250 	if (!catalog_head) {
251 		p = Xcalloc(1, sizeof (struct catalog));
252 		p->fname = Xstrdup(filename);
253 		p->msg_size = DEF_MSG_NUM;
254 		p->nmsg = 0;
255 		p->msg = Xcalloc(p->msg_size, sizeof (struct messages));
256 		p->thash_size = find_prime(DEF_MSG_NUM);
257 		p->thash = Xcalloc(p->thash_size, sizeof (unsigned int));
258 		catalog_head = p;
259 	} else {
260 		p = catalog_head;
261 		for (; ; ) {
262 			struct catalog	*tmp;
263 			if (strcmp(p->fname, filename) == 0) {
264 				/* already registered */
265 				break;
266 			}
267 			if (p->next) {
268 				p = p->next;
269 				continue;
270 			}
271 			/*
272 			 * this domain hasn't been registered
273 			 */
274 			tmp = Xcalloc(1, sizeof (struct catalog));
275 			tmp->fname = Xstrdup(filename);
276 			tmp->msg_size = DEF_MSG_NUM;
277 			tmp->nmsg = 0;
278 			tmp->msg = Xcalloc(tmp->msg_size,
279 			    sizeof (struct messages));
280 			tmp->thash_size = find_prime(DEF_MSG_NUM);
281 			tmp->thash = Xcalloc(tmp->thash_size,
282 			    sizeof (unsigned int));
283 			p->next = tmp;
284 			p = tmp;
285 			break;
286 		}
287 	}
288 	cur_catalog = p;
289 	cur_mo = p->fname;
290 }
291 
292 
293 void
294 handle_comment(char *comment)
295 {
296 	char	*p;
297 
298 	p = comment;
299 
300 	if (*p != ',') {
301 		/*
302 		 * This comment is just informative only.
303 		 */
304 		free(comment);
305 		return;
306 	}
307 	/*
308 	 * Checks "fuzzy", "c-format", and "no-c-format"
309 	 */
310 	p++;
311 	if (strstr(p, "fuzzy") != NULL) {
312 		next_entry_is_fuzzy = 1;
313 	}
314 	if (strstr(p, "no-c-format") != NULL) {
315 		next_entry_is_c_format = 0;
316 	} else if (strstr(p, "c-format") != NULL) {
317 		next_entry_is_c_format = 1;
318 	}
319 
320 	free(comment);
321 }
322 
323 void
324 handle_message(struct entry *id, struct entry *str)
325 {
326 	char	*charset, *nplurals, *tmp, *p;
327 	struct messages	*msg, *dupmsg;
328 	size_t	len;
329 	unsigned int	hash_val;
330 	unsigned int	nmsg, n, thash_idx;
331 
332 	if (cur_mo == NULL) {
333 		/*
334 		 * output file hasn't been specified, nor
335 		 * no domain directive found
336 		 */
337 		char	*default_domain;
338 
339 		default_domain = strict_flag ? DEFAULT_DOMAIN_MO :
340 		    DEFAULT_DOMAIN;
341 		catalog_init(default_domain);
342 	}
343 
344 	/*
345 	 * cur_catalog should be valid, at this point
346 	 */
347 
348 	hash_val = hashpjw(id->str);
349 	dupmsg = search_msg(cur_catalog, id->str, hash_val);
350 
351 	if (dupmsg) {
352 		if ((dupmsg->str_len == str->len) &&
353 		    (memcmp(dupmsg->str, str->str, str->len) == 0)) {
354 			/* totally same entry */
355 			if (verbose_flag) {
356 				warning(gettext(WARN_DUP_ENTRIES),
357 				    dupmsg->num, po_names[dupmsg->po],
358 				    id->num, cur_po);
359 			}
360 			free(id->str);
361 			if (id->pos)
362 				free(id->pos);
363 			free(str->str);
364 			if (str->pos)
365 				free(str->pos);
366 			return;
367 		}
368 		/* duplicate msgid */
369 		if (verbose_flag) {
370 			diag(gettext(ERR_DUP_ENTRIES),
371 			    dupmsg->num, po_names[dupmsg->po],
372 			    id->num, cur_po);
373 			po_error++;
374 		}
375 		/* ignore this etnry */
376 		free(id->str);
377 		if (id->pos)
378 			free(id->pos);
379 		free(str->str);
380 		if (str->pos)
381 			free(str->pos);
382 		return;
383 	}
384 
385 	if (next_entry_is_fuzzy) {
386 		/* fuzzy entry */
387 		cur_catalog->fnum++;
388 		if (!fuzzy_flag) {
389 			/* ignore this entry */
390 			free(id->str);
391 			if (id->pos)
392 				free(id->pos);
393 			free(str->str);
394 			if (str->pos)
395 				free(str->pos);
396 			return;
397 		}
398 	}
399 
400 	if (str->len == str->no) {
401 		/* this entry is not translated */
402 		cur_catalog->unum++;
403 		free(id->str);
404 		if (id->pos)
405 			free(id->pos);
406 		free(str->str);
407 		if (str->pos)
408 			free(str->pos);
409 		return;
410 	}
411 
412 	/* Checks if this is the header entry */
413 	if ((id->no == 1) && (id->len == 1)) {
414 		/*
415 		 * Header entry
416 		 */
417 		cur_catalog->header++;
418 
419 		/*
420 		 * Need to extract the charset information
421 		 */
422 		charset = strstr(str->str, CHARSET_STR);
423 		if (charset == NULL) {
424 			/* no charset information */
425 			warning(gettext(WARN_NOCHARSET),
426 			    id->num, cur_po, str->num);
427 			conv_init(NULL);
428 		} else {
429 			charset += CHARSET_LEN;
430 			p = strpbrk(charset, " \t\n");
431 			if (p != NULL) {
432 				/* p points to a space, tab or new line char */
433 				len = p - charset;
434 			} else {
435 				/* not found */
436 				len = strlen(charset);
437 			}
438 			tmp = Xmalloc(len + 1);
439 			(void) memcpy(tmp, charset, len);
440 			*(tmp + len) = '\0';
441 			charset = tmp;
442 			conv_init(charset);
443 			free(charset);
444 		}
445 		nplurals = strstr(str->str, NPLURALS_STR);
446 		if (nplurals == NULL) {
447 			cur_catalog->nplurals = 0;
448 		} else {
449 			unsigned int	num;
450 			nplurals += NPLURALS_LEN;
451 			p = nplurals;
452 			num = 0;
453 			while (isdigit((unsigned char)*p)) {
454 				num = num * 10 + *p++ - '0';
455 			}
456 			cur_catalog->nplurals = num;
457 		}
458 	}
459 
460 	if (verbose_flag)
461 		check_format(id, str, next_entry_is_c_format);
462 
463 	if (id->pos)
464 		free(id->pos);
465 	if (str->pos)
466 		free(str->pos);
467 
468 	msg = cur_catalog->msg;
469 	nmsg = cur_catalog->nmsg;
470 
471 	msg[nmsg].po = cur_po_index;
472 	msg[nmsg].num = id->num;
473 	msg[nmsg].id = id->str;
474 	msg[nmsg].id_len = id->len;
475 	msg[nmsg].str = str->str;
476 	msg[nmsg].str_len = str->len;
477 	msg[nmsg].hash = hash_val;
478 
479 	thash_idx = get_hash_index(cur_catalog->thash,
480 	    hash_val, cur_catalog->thash_size);
481 	cur_catalog->thash[thash_idx] = nmsg + 1;
482 	cur_catalog->nmsg++;
483 
484 	if (cur_catalog->nmsg >= cur_catalog->msg_size) {
485 		/* no vacancy in message array */
486 		cur_catalog->msg_size += DEF_MSG_NUM;
487 		cur_catalog->msg = Xrealloc(cur_catalog->msg,
488 		    cur_catalog->msg_size * sizeof (struct messages));
489 
490 		cur_catalog->thash_size =
491 			find_prime(cur_catalog->msg_size);
492 		free(cur_catalog->thash);
493 		cur_catalog->thash = Xcalloc(cur_catalog->thash_size,
494 		    sizeof (unsigned int));
495 
496 		for (n = 0; n < cur_catalog->nmsg; n++) {
497 			thash_idx = get_hash_index(cur_catalog->thash,
498 			    cur_catalog->msg[n].hash,
499 			    cur_catalog->thash_size);
500 			cur_catalog->thash[thash_idx] = n + 1;
501 		}
502 	}
503 }
504 
505 void
506 po_init(const char *file)
507 {
508 	char	*filename;
509 
510 	if (!inputdir) {
511 		filename = Xstrdup(file);
512 	} else {
513 		size_t	dirlen, filelen, len;
514 
515 		dirlen = strlen(inputdir);
516 		filelen = strlen(file);
517 		len = dirlen + 1 + filelen + 1;
518 		filename = Xmalloc(len);
519 		(void) memcpy(filename, inputdir, dirlen);
520 		*(filename + dirlen) = '/';
521 		(void) memcpy(filename + dirlen + 1, file, filelen);
522 		*(filename + dirlen + 1 + filelen) = '\0';
523 	}
524 
525 	fp = fopen(filename, "r");
526 	if (fp == NULL) {
527 		error(gettext(ERR_OPEN_FAILED), filename);
528 		/* NOTREACHED */
529 	}
530 
531 	po_names[cur_po_index] = filename;
532 	cur_line = 1;
533 	cd = (iconv_t)-1;
534 	if (!outfile)
535 		cur_mo = NULL;
536 }
537 
538 void
539 po_fini(void)
540 {
541 	cur_po_index++;
542 	(void) fclose(fp);
543 	if (cd != (iconv_t)-1)
544 		(void) iconv_close(cd);
545 }
546