xref: /illumos-gate/usr/src/cmd/msgfmt/gnu_check.c (revision 35a5a3587fd94b666239c157d3722745250ccbd7)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License, Version 1.0 only
6  * (the "License").  You may not use this file except in compliance
7  * with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or http://www.opensolaris.org/os/licensing.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 /*
23  * Copyright 2003 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #pragma ident	"%Z%%M%	%I%	%E% SMI"
28 
29 #include "gnu_msgfmt.h"
30 
31 #define	OPT_L	0x01
32 #define	OPT_l	0x02
33 #define	OPT_ll	0x04
34 #define	OPT_w	0x08
35 #define	OPT_h	0x10
36 #define	OPT_hh	0x20
37 #define	OPT_j	0x40
38 
39 static int
40 extract_format(char *norm, const char *sfmt, size_t sz)
41 {
42 	const unsigned char	*fmt = (const unsigned char *)sfmt;
43 	unsigned char	c;
44 	int	t, arg, ap;
45 	int	dotseen;
46 	char	flag, conv;
47 	int	lastarg = -1;
48 	int	prevarg;
49 	int	max = 0;
50 	int	lflag;
51 
52 	for (; *fmt; fmt++) {
53 		if (*fmt == '%') {
54 			if (*++fmt == '%')
55 				continue;
56 			if (!*fmt)
57 				break;
58 			prevarg = lastarg;
59 			arg = ++lastarg;
60 
61 			t = 0;
62 			while (*fmt && isdigit(*fmt))
63 				t = t * 10 + *fmt++ - '0';
64 
65 			if (*fmt == '$') {
66 				lastarg = arg = t - 1;
67 				fmt++;
68 			}
69 
70 			if (!*fmt)
71 				goto end;
72 
73 			dotseen = 0;
74 			flag = 0;
75 			lflag = 0;
76 again:
77 			/* Skip flags */
78 			while ((c = *fmt) != '\0') {
79 				if (c == '\'' || c == '+' || c == '-' ||
80 					c == ' ' || c == '#' || c == '0') {
81 					fmt++;
82 					continue;
83 				}
84 				break;
85 			}
86 
87 			while (*fmt && isdigit(*fmt))
88 				fmt++;
89 
90 			if (*fmt == '*') {
91 				if (isdigit(*(fmt + 1))) {
92 					fmt++;
93 					t = 0;
94 					while (*fmt && isdigit(*fmt))
95 						t = t * 10 + *fmt++ - '0';
96 
97 					if (*fmt == '$') {
98 						/*
99 						 * %*4$
100 						 */
101 						ap = t - 1;
102 						if ((ap * 2 + 1 >= sz) ||
103 							(norm[ap * 2] &&
104 							norm[ap * 2] != '*')) {
105 							/* error in format */
106 							return (-1);
107 						} else {
108 							if (ap >= max)
109 								max = ap + 1;
110 							norm[ap * 2] = '*';
111 						}
112 					}
113 					/*
114 					 * If digits follow a '*', it is
115 					 * not loaded as an argument, the
116 					 * digits are used instead.
117 					 */
118 				} else {
119 					/*
120 					 * %*
121 					 */
122 					if (*(fmt + 1) == '$') {
123 						fmt++;
124 					} else {
125 						ap = arg;
126 						prevarg = arg;
127 						lastarg = ++arg;
128 						if ((ap * 2 + 1 >= sz) ||
129 							(norm[ap * 2] &&
130 							norm[ap * 2] != '*')) {
131 							/* error in format */
132 							return (-1);
133 						} else {
134 							if (ap >= max)
135 								max = ap + 1;
136 							norm[ap * 2] = '*';
137 						}
138 					}
139 				}
140 				fmt++;
141 			}
142 
143 			if ((*fmt == '.') || (*fmt == '*')) {
144 				if (dotseen)
145 					return (-1);
146 				dotseen = 1;
147 				fmt++;
148 				goto again;
149 			}
150 
151 			if (!*fmt)
152 				goto end;
153 
154 			while (*fmt) {
155 				switch (*fmt) {
156 				case 'l':
157 					if (!(flag & OPT_ll)) {
158 						if (lflag) {
159 							flag &= ~OPT_l;
160 							flag |= OPT_ll;
161 						} else {
162 							flag |= OPT_l;
163 						}
164 					}
165 					lflag++;
166 					break;
167 				case 'L':
168 					flag |= OPT_L;
169 					break;
170 				case 'w':
171 					flag |= OPT_w;
172 					break;
173 				case 'h':
174 					if (flag & (OPT_h|OPT_hh))
175 						flag |= OPT_hh;
176 					else
177 						flag |= OPT_h;
178 					break;
179 				case 'j':
180 					flag |= OPT_j;
181 					break;
182 				case 'z':
183 				case 't':
184 					if (!(flag & OPT_ll)) {
185 						flag |= OPT_l;
186 					}
187 					break;
188 				case '\'':
189 				case '+':
190 				case '-':
191 				case ' ':
192 				case '#':
193 				case '.':
194 				case '*':
195 					goto again;
196 				default:
197 					if (isdigit(*fmt))
198 						goto again;
199 					else
200 						goto done;
201 				}
202 				fmt++;
203 			}
204 done:
205 			if (!*fmt)
206 				goto end;
207 
208 			if ((c = *fmt) == 'C') {
209 				flag |= OPT_l;
210 				conv = 'c';
211 			} else if (c == 'd') {
212 				conv = 'd';
213 			} else if (c == 'S') {
214 				flag |= OPT_l;
215 				conv = 's';
216 			} else if (c == 's') {
217 				conv = 's';
218 			} else if (c == 'i') {
219 				conv = 'i';
220 			} else if (c == 'o') {
221 				conv = 'o';
222 			} else if (c == 'u') {
223 				conv = 'u';
224 			} else if (c == 'c') {
225 				conv = 'c';
226 			} else if (c == 'x') {
227 				conv = 'x';
228 			} else if (c == 'X') {
229 				conv = 'X';
230 			} else if (c == 'e') {
231 				conv = 'e';
232 			} else if (c == 'E') {
233 				conv = 'E';
234 			} else if (c == 'f') {
235 				conv = 'f';
236 			} else if (c == 'F') {
237 				conv = 'F';
238 			} else if (c == 'a') {
239 				conv = 'a';
240 			} else if (c == 'A') {
241 				conv = 'A';
242 			} else if (c == 'g') {
243 				conv = 'g';
244 			} else if (c == 'G') {
245 				conv = 'G';
246 			} else if (c == 'p') {
247 				conv = 'p';
248 			} else if (c == 'n') {
249 				conv = 'n';
250 			} else {
251 				lastarg = prevarg;
252 				continue;
253 			}
254 
255 			if ((arg * 2 + 1 >= sz) ||
256 				(norm[arg * 2] &&
257 				(norm[arg * 2] != conv))) {
258 				return (-1);
259 			} else {
260 				if (arg >= max)
261 					max = arg + 1;
262 				norm[arg * 2] = conv;
263 			}
264 			norm[arg * 2 + 1] = flag;
265 		}
266 	}
267 
268 end:
269 	for (arg = 0; arg < max; arg++) {
270 		if (norm[arg * 2] == '\0')
271 			return (-1);
272 	}
273 
274 	return (max);
275 }
276 
277 
278 void
279 check_format(struct entry *id, struct entry *str, int is_c_format)
280 {
281 	int	i, n;
282 	int	id_b_newline, id_e_newline;
283 	int	plural_b_newline, plural_e_newline;
284 	int	str_b_newline, str_e_newline;
285 	int	id_fmt, plural_fmt, str_fmt;
286 	int	*pstr_fmt;
287 	char	*msgid, *plural, *msgstr;
288 	char	*id_norm, *plural_norm, *str_norm;
289 	char	**pstr_norm;
290 	size_t	id_len, id_num;
291 	size_t	plural_off, plural_len, plural_num;
292 	size_t	str_len, str_num;
293 	size_t	osz, nsz;
294 	struct loc	*p;
295 
296 	if (id->len == 1) {
297 		/*
298 		 * null string: header entry
299 		 * no check is performed
300 		 */
301 		return;
302 	}
303 
304 	msgid = id->str;
305 	id_num = id->num;
306 	msgstr = str->str;
307 	if (id->no > 1) {
308 		/* plural */
309 		id_len = id->pos[0].len;
310 		plural_off = id->pos[1].off;
311 		plural_len = id->pos[1].len;
312 		plural_num = id->pos[1].num;
313 		plural = msgid + plural_off;
314 	} else {
315 		/* no plural form */
316 		id_len = id->len;
317 		str_len = str->len;
318 		str_num = str->num;
319 		plural = NULL;
320 	}
321 
322 	/*
323 	 * First checking the newline
324 	 */
325 
326 	if (!plural) {
327 		/* no plural form */
328 		id_b_newline = (msgid[0] == '\n');
329 		id_e_newline = (msgid[id_len - 1 - 1] == '\n');
330 
331 		str_b_newline = (msgstr[0] == '\n');
332 		str_e_newline = (msgstr[str_len - 1 - 1] == '\n');
333 		if (id_b_newline && !str_b_newline) {
334 			diag(gettext(ERR_BEGIN_NEWLINE_1),
335 				id_num, str_num, cur_po);
336 			po_error++;
337 		} else if (!id_b_newline && str_b_newline) {
338 			diag(gettext(ERR_BEGIN_NEWLINE_2),
339 				id_num, str_num, cur_po);
340 			po_error++;
341 		}
342 		if (id_e_newline && !str_e_newline) {
343 			diag(gettext(ERR_END_NEWLINE_1),
344 				id_num, str_num, cur_po);
345 			po_error++;
346 		} else if (!id_e_newline && str_e_newline) {
347 			diag(gettext(ERR_END_NEWLINE_2),
348 				id_num, str_num, cur_po);
349 			po_error++;
350 		}
351 	} else {
352 		/* plural form */
353 		id_b_newline = (msgid[0] == '\n');
354 		id_e_newline = (msgid[id_len - 1 - 1] == '\n');
355 
356 		plural_b_newline = (plural[0] == '\n');
357 		plural_e_newline = (plural[plural_len - 1 -1 ] == '\n');
358 
359 		/* between msgid and msgid_plural */
360 		if (id_b_newline && !plural_b_newline) {
361 			diag(gettext(ERR_BEGIN_NEWLINE_3),
362 				id_num, plural_num, cur_po);
363 			po_error++;
364 		} else if (!id_b_newline && plural_b_newline) {
365 			diag(gettext(ERR_BEGIN_NEWLINE_4),
366 				id_num, plural_num, cur_po);
367 			po_error++;
368 		}
369 		if (id_e_newline && !plural_e_newline) {
370 			diag(gettext(ERR_END_NEWLINE_3),
371 				id_num, plural_num, cur_po);
372 			po_error++;
373 		} else if (!id_e_newline && plural_e_newline) {
374 			diag(gettext(ERR_END_NEWLINE_4),
375 				id_num, plural_num, cur_po);
376 			po_error++;
377 		}
378 
379 		for (i = 0; i < str->no; i++) {
380 			p = str->pos + i;
381 			str_b_newline = (msgstr[p->off] == '\n');
382 			str_e_newline =
383 				(msgstr[p->off + p->len - 1 - 1] == '\n');
384 
385 			if (id_b_newline && !str_b_newline) {
386 				diag(gettext(ERR_BEGIN_NEWLINE_5),
387 					id_num, p->num, cur_po, i);
388 				po_error++;
389 			} else if (!id_b_newline && str_b_newline) {
390 				diag(gettext(ERR_BEGIN_NEWLINE_6),
391 					id_num, p->num, cur_po, i);
392 				po_error++;
393 			}
394 
395 			if (id_e_newline && !str_e_newline) {
396 				diag(gettext(ERR_END_NEWLINE_5),
397 					id_num, p->num, cur_po, i);
398 				po_error++;
399 			} else if (!id_e_newline && str_e_newline) {
400 				diag(gettext(ERR_END_NEWLINE_6),
401 					id_num, p->num, cur_po, i);
402 				po_error++;
403 			}
404 		}
405 	}
406 
407 	/*
408 	 * if c-format is not specified, no printf-format check
409 	 * is performed.
410 	 */
411 	if (!is_c_format) {
412 		return;
413 	}
414 
415 	osz = id_len * 2;
416 	id_norm = (char *)Xcalloc(1, osz);
417 	id_fmt = extract_format(id_norm, msgid, osz);
418 	if (id_fmt == -1) {
419 		diag(gettext(ERR_INVALID_FMT), id_num, cur_po);
420 		po_error++;
421 	}
422 
423 	if (!plural) {
424 		/* no plural */
425 
426 		nsz = str_len * 2;
427 		str_norm = (char *)Xcalloc(1, nsz);
428 		str_fmt = extract_format(str_norm, msgstr, nsz);
429 		if (str_fmt == -1) {
430 			diag(gettext(ERR_INVALID_FMT), str_num, cur_po);
431 			po_error++;
432 		}
433 
434 		if (id_fmt != str_fmt) {
435 			diag(gettext(ERR_INCMP_FMT),
436 				id_num, str_num, cur_po);
437 			diag(gettext(ERR_INCMP_FMT_DIFF_1),
438 				id_fmt, str_fmt);
439 			po_error++;
440 		} else {
441 			for (n = 0; n < id_fmt; n++) {
442 				if ((id_norm[n * 2] !=
443 					str_norm[n * 2]) ||
444 					(id_norm[n * 2 + 1] !=
445 					str_norm[n * 2 + 1])) {
446 					diag(gettext(ERR_INCMP_FMT),
447 						id_num, str_num, cur_po);
448 					diag(gettext(ERR_INCMP_FMT_DIFF_2),
449 						n + 1);
450 					po_error++;
451 				}
452 			}
453 		}
454 		free(str_norm);
455 		free(id_norm);
456 
457 		return;
458 	}
459 
460 	/* plural */
461 	nsz = plural_len * 2;
462 	plural_norm = (char *)Xcalloc(1, nsz);
463 	plural_fmt = extract_format(plural_norm, plural, nsz);
464 	if (plural_fmt == -1) {
465 		diag(gettext(ERR_INVALID_FMT), plural_num, cur_po);
466 		po_error++;
467 	}
468 
469 	pstr_norm = (char **)Xcalloc(str->no, sizeof (char *));
470 	pstr_fmt = (int *)Xcalloc(str->no, sizeof (int));
471 	for (i = 0; i < str->no; i++) {
472 		p = str->pos + i;
473 		nsz = p->len * 2;
474 		pstr_norm[i] = (char *)Xcalloc(1, nsz);
475 		pstr_fmt[i] = extract_format(pstr_norm[i],
476 			msgstr + p->off, nsz);
477 		if (pstr_fmt[i] == -1) {
478 			diag(gettext(ERR_INVALID_FMT),
479 				p->num, cur_po);
480 			po_error++;
481 		}
482 	}
483 
484 	/* between msgid and msgid_plural */
485 	if (id_fmt != plural_fmt) {
486 		diag(gettext(ERR_INCMP_FMT),
487 			id_num, plural_num, cur_po);
488 		diag(gettext(ERR_INCMP_FMT_DIFF_1),
489 			id_fmt, plural_fmt);
490 		po_error++;
491 	} else {
492 		for (n = 0; n < id_fmt; n++) {
493 			if ((id_norm[n * 2] !=
494 				plural_norm[n * 2]) ||
495 				(id_norm[n * 2 + 1] !=
496 				plural_norm[n * 2 + 1])) {
497 				diag(gettext(ERR_INCMP_FMT),
498 					id_num, plural_num, cur_po);
499 				diag(gettext(ERR_INCMP_FMT_DIFF_2),
500 					n + 1);
501 				po_error++;
502 			}
503 		}
504 	}
505 	free(plural_norm);
506 
507 	/* between msgid and msgstr */
508 	for (i = 0; i < str->no; i++) {
509 		p = str->pos + i;
510 		if (id_fmt != pstr_fmt[i]) {
511 			diag(gettext(ERR_INCMP_FMT),
512 				id_num, p->num, cur_po);
513 			diag(gettext(ERR_INCMP_FMT_DIFF_1),
514 				id_fmt, pstr_fmt[i]);
515 			po_error++;
516 		} else {
517 			for (n = 0; n < id_fmt; n++) {
518 				if ((id_norm[n * 2] !=
519 					pstr_norm[i][n * 2]) ||
520 					(id_norm[n * 2 + 1] !=
521 					pstr_norm[i][n * 2 + 1])) {
522 					diag(gettext(ERR_INCMP_FMT),
523 						id_num, p->num, cur_po);
524 					diag(gettext(ERR_INCMP_FMT_DIFF_2),
525 						n + 1);
526 					po_error++;
527 				}
528 			}
529 		}
530 		free(pstr_norm[i]);
531 	}
532 	free(pstr_norm);
533 	free(pstr_fmt);
534 	free(id_norm);
535 }
536