xref: /illumos-gate/usr/src/cmd/msgfmt/gnu_check.c (revision 2a8bcb4efb45d99ac41c94a75c396b362c414f7f)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License, Version 1.0 only
6  * (the "License").  You may not use this file except in compliance
7  * with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or http://www.opensolaris.org/os/licensing.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 /*
23  * Copyright 2003 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #include "gnu_msgfmt.h"
28 
29 #define	OPT_L	0x01
30 #define	OPT_l	0x02
31 #define	OPT_ll	0x04
32 #define	OPT_w	0x08
33 #define	OPT_h	0x10
34 #define	OPT_hh	0x20
35 #define	OPT_j	0x40
36 
37 static int
extract_format(char * norm,const char * sfmt,size_t sz)38 extract_format(char *norm, const char *sfmt, size_t sz)
39 {
40 	const unsigned char	*fmt = (const unsigned char *)sfmt;
41 	unsigned char	c;
42 	int	t, arg, ap;
43 	int	dotseen;
44 	char	flag, conv;
45 	int	lastarg = -1;
46 	int	prevarg;
47 	int	max = 0;
48 	int	lflag;
49 
50 	for (; *fmt; fmt++) {
51 		if (*fmt == '%') {
52 			if (*++fmt == '%')
53 				continue;
54 			if (!*fmt)
55 				break;
56 			prevarg = lastarg;
57 			arg = ++lastarg;
58 
59 			t = 0;
60 			while (*fmt && isdigit(*fmt))
61 				t = t * 10 + *fmt++ - '0';
62 
63 			if (*fmt == '$') {
64 				lastarg = arg = t - 1;
65 				fmt++;
66 			}
67 
68 			if (!*fmt)
69 				goto end;
70 
71 			dotseen = 0;
72 			flag = 0;
73 			lflag = 0;
74 again:
75 			/* Skip flags */
76 			while ((c = *fmt) != '\0') {
77 				if (c == '\'' || c == '+' || c == '-' ||
78 					c == ' ' || c == '#' || c == '0') {
79 					fmt++;
80 					continue;
81 				}
82 				break;
83 			}
84 
85 			while (*fmt && isdigit(*fmt))
86 				fmt++;
87 
88 			if (*fmt == '*') {
89 				if (isdigit(*(fmt + 1))) {
90 					fmt++;
91 					t = 0;
92 					while (*fmt && isdigit(*fmt))
93 						t = t * 10 + *fmt++ - '0';
94 
95 					if (*fmt == '$') {
96 						/*
97 						 * %*4$
98 						 */
99 						ap = t - 1;
100 						if ((ap * 2 + 1 >= sz) ||
101 							(norm[ap * 2] &&
102 							norm[ap * 2] != '*')) {
103 							/* error in format */
104 							return (-1);
105 						} else {
106 							if (ap >= max)
107 								max = ap + 1;
108 							norm[ap * 2] = '*';
109 						}
110 					}
111 					/*
112 					 * If digits follow a '*', it is
113 					 * not loaded as an argument, the
114 					 * digits are used instead.
115 					 */
116 				} else {
117 					/*
118 					 * %*
119 					 */
120 					if (*(fmt + 1) == '$') {
121 						fmt++;
122 					} else {
123 						ap = arg;
124 						prevarg = arg;
125 						lastarg = ++arg;
126 						if ((ap * 2 + 1 >= sz) ||
127 							(norm[ap * 2] &&
128 							norm[ap * 2] != '*')) {
129 							/* error in format */
130 							return (-1);
131 						} else {
132 							if (ap >= max)
133 								max = ap + 1;
134 							norm[ap * 2] = '*';
135 						}
136 					}
137 				}
138 				fmt++;
139 			}
140 
141 			if ((*fmt == '.') || (*fmt == '*')) {
142 				if (dotseen)
143 					return (-1);
144 				dotseen = 1;
145 				fmt++;
146 				goto again;
147 			}
148 
149 			if (!*fmt)
150 				goto end;
151 
152 			while (*fmt) {
153 				switch (*fmt) {
154 				case 'l':
155 					if (!(flag & OPT_ll)) {
156 						if (lflag) {
157 							flag &= ~OPT_l;
158 							flag |= OPT_ll;
159 						} else {
160 							flag |= OPT_l;
161 						}
162 					}
163 					lflag++;
164 					break;
165 				case 'L':
166 					flag |= OPT_L;
167 					break;
168 				case 'w':
169 					flag |= OPT_w;
170 					break;
171 				case 'h':
172 					if (flag & (OPT_h|OPT_hh))
173 						flag |= OPT_hh;
174 					else
175 						flag |= OPT_h;
176 					break;
177 				case 'j':
178 					flag |= OPT_j;
179 					break;
180 				case 'z':
181 				case 't':
182 					if (!(flag & OPT_ll)) {
183 						flag |= OPT_l;
184 					}
185 					break;
186 				case '\'':
187 				case '+':
188 				case '-':
189 				case ' ':
190 				case '#':
191 				case '.':
192 				case '*':
193 					goto again;
194 				default:
195 					if (isdigit(*fmt))
196 						goto again;
197 					else
198 						goto done;
199 				}
200 				fmt++;
201 			}
202 done:
203 			if (!*fmt)
204 				goto end;
205 
206 			if ((c = *fmt) == 'C') {
207 				flag |= OPT_l;
208 				conv = 'c';
209 			} else if (c == 'd') {
210 				conv = 'd';
211 			} else if (c == 'S') {
212 				flag |= OPT_l;
213 				conv = 's';
214 			} else if (c == 's') {
215 				conv = 's';
216 			} else if (c == 'i') {
217 				conv = 'i';
218 			} else if (c == 'o') {
219 				conv = 'o';
220 			} else if (c == 'u') {
221 				conv = 'u';
222 			} else if (c == 'c') {
223 				conv = 'c';
224 			} else if (c == 'x') {
225 				conv = 'x';
226 			} else if (c == 'X') {
227 				conv = 'X';
228 			} else if (c == 'e') {
229 				conv = 'e';
230 			} else if (c == 'E') {
231 				conv = 'E';
232 			} else if (c == 'f') {
233 				conv = 'f';
234 			} else if (c == 'F') {
235 				conv = 'F';
236 			} else if (c == 'a') {
237 				conv = 'a';
238 			} else if (c == 'A') {
239 				conv = 'A';
240 			} else if (c == 'g') {
241 				conv = 'g';
242 			} else if (c == 'G') {
243 				conv = 'G';
244 			} else if (c == 'p') {
245 				conv = 'p';
246 			} else if (c == 'n') {
247 				conv = 'n';
248 			} else {
249 				lastarg = prevarg;
250 				continue;
251 			}
252 
253 			if ((arg * 2 + 1 >= sz) ||
254 				(norm[arg * 2] &&
255 				(norm[arg * 2] != conv))) {
256 				return (-1);
257 			} else {
258 				if (arg >= max)
259 					max = arg + 1;
260 				norm[arg * 2] = conv;
261 			}
262 			norm[arg * 2 + 1] = flag;
263 		}
264 	}
265 
266 end:
267 	for (arg = 0; arg < max; arg++) {
268 		if (norm[arg * 2] == '\0')
269 			return (-1);
270 	}
271 
272 	return (max);
273 }
274 
275 
276 void
check_format(struct entry * id,struct entry * str,int is_c_format)277 check_format(struct entry *id, struct entry *str, int is_c_format)
278 {
279 	int	i, n;
280 	int	id_b_newline, id_e_newline;
281 	int	plural_b_newline, plural_e_newline;
282 	int	str_b_newline, str_e_newline;
283 	int	id_fmt, plural_fmt, str_fmt;
284 	int	*pstr_fmt;
285 	char	*msgid, *plural, *msgstr;
286 	char	*id_norm, *plural_norm, *str_norm;
287 	char	**pstr_norm;
288 	size_t	id_len, id_num;
289 	size_t	plural_off, plural_len, plural_num;
290 	size_t	str_len, str_num;
291 	size_t	osz, nsz;
292 	struct loc	*p;
293 
294 	if (id->len == 1) {
295 		/*
296 		 * null string: header entry
297 		 * no check is performed
298 		 */
299 		return;
300 	}
301 
302 	msgid = id->str;
303 	id_num = id->num;
304 	msgstr = str->str;
305 	if (id->no > 1) {
306 		/* plural */
307 		id_len = id->pos[0].len;
308 		plural_off = id->pos[1].off;
309 		plural_len = id->pos[1].len;
310 		plural_num = id->pos[1].num;
311 		plural = msgid + plural_off;
312 	} else {
313 		/* no plural form */
314 		id_len = id->len;
315 		str_len = str->len;
316 		str_num = str->num;
317 		plural = NULL;
318 	}
319 
320 	/*
321 	 * First checking the newline
322 	 */
323 
324 	if (!plural) {
325 		/* no plural form */
326 		id_b_newline = (msgid[0] == '\n');
327 		id_e_newline = (msgid[id_len - 1 - 1] == '\n');
328 
329 		str_b_newline = (msgstr[0] == '\n');
330 		str_e_newline = (msgstr[str_len - 1 - 1] == '\n');
331 		if (id_b_newline && !str_b_newline) {
332 			diag(gettext(ERR_BEGIN_NEWLINE_1),
333 				id_num, str_num, cur_po);
334 			po_error++;
335 		} else if (!id_b_newline && str_b_newline) {
336 			diag(gettext(ERR_BEGIN_NEWLINE_2),
337 				id_num, str_num, cur_po);
338 			po_error++;
339 		}
340 		if (id_e_newline && !str_e_newline) {
341 			diag(gettext(ERR_END_NEWLINE_1),
342 				id_num, str_num, cur_po);
343 			po_error++;
344 		} else if (!id_e_newline && str_e_newline) {
345 			diag(gettext(ERR_END_NEWLINE_2),
346 				id_num, str_num, cur_po);
347 			po_error++;
348 		}
349 	} else {
350 		/* plural form */
351 		id_b_newline = (msgid[0] == '\n');
352 		id_e_newline = (msgid[id_len - 1 - 1] == '\n');
353 
354 		plural_b_newline = (plural[0] == '\n');
355 		plural_e_newline = (plural[plural_len - 1 -1 ] == '\n');
356 
357 		/* between msgid and msgid_plural */
358 		if (id_b_newline && !plural_b_newline) {
359 			diag(gettext(ERR_BEGIN_NEWLINE_3),
360 				id_num, plural_num, cur_po);
361 			po_error++;
362 		} else if (!id_b_newline && plural_b_newline) {
363 			diag(gettext(ERR_BEGIN_NEWLINE_4),
364 				id_num, plural_num, cur_po);
365 			po_error++;
366 		}
367 		if (id_e_newline && !plural_e_newline) {
368 			diag(gettext(ERR_END_NEWLINE_3),
369 				id_num, plural_num, cur_po);
370 			po_error++;
371 		} else if (!id_e_newline && plural_e_newline) {
372 			diag(gettext(ERR_END_NEWLINE_4),
373 				id_num, plural_num, cur_po);
374 			po_error++;
375 		}
376 
377 		for (i = 0; i < str->no; i++) {
378 			p = str->pos + i;
379 			str_b_newline = (msgstr[p->off] == '\n');
380 			str_e_newline =
381 				(msgstr[p->off + p->len - 1 - 1] == '\n');
382 
383 			if (id_b_newline && !str_b_newline) {
384 				diag(gettext(ERR_BEGIN_NEWLINE_5),
385 					id_num, p->num, cur_po, i);
386 				po_error++;
387 			} else if (!id_b_newline && str_b_newline) {
388 				diag(gettext(ERR_BEGIN_NEWLINE_6),
389 					id_num, p->num, cur_po, i);
390 				po_error++;
391 			}
392 
393 			if (id_e_newline && !str_e_newline) {
394 				diag(gettext(ERR_END_NEWLINE_5),
395 					id_num, p->num, cur_po, i);
396 				po_error++;
397 			} else if (!id_e_newline && str_e_newline) {
398 				diag(gettext(ERR_END_NEWLINE_6),
399 					id_num, p->num, cur_po, i);
400 				po_error++;
401 			}
402 		}
403 	}
404 
405 	/*
406 	 * if c-format is not specified, no printf-format check
407 	 * is performed.
408 	 */
409 	if (!is_c_format) {
410 		return;
411 	}
412 
413 	osz = id_len * 2;
414 	id_norm = (char *)Xcalloc(1, osz);
415 	id_fmt = extract_format(id_norm, msgid, osz);
416 	if (id_fmt == -1) {
417 		diag(gettext(ERR_INVALID_FMT), id_num, cur_po);
418 		po_error++;
419 	}
420 
421 	if (!plural) {
422 		/* no plural */
423 
424 		nsz = str_len * 2;
425 		str_norm = (char *)Xcalloc(1, nsz);
426 		str_fmt = extract_format(str_norm, msgstr, nsz);
427 		if (str_fmt == -1) {
428 			diag(gettext(ERR_INVALID_FMT), str_num, cur_po);
429 			po_error++;
430 		}
431 
432 		if (id_fmt != str_fmt) {
433 			diag(gettext(ERR_INCMP_FMT),
434 				id_num, str_num, cur_po);
435 			diag(gettext(ERR_INCMP_FMT_DIFF_1),
436 				id_fmt, str_fmt);
437 			po_error++;
438 		} else {
439 			for (n = 0; n < id_fmt; n++) {
440 				if ((id_norm[n * 2] !=
441 					str_norm[n * 2]) ||
442 					(id_norm[n * 2 + 1] !=
443 					str_norm[n * 2 + 1])) {
444 					diag(gettext(ERR_INCMP_FMT),
445 						id_num, str_num, cur_po);
446 					diag(gettext(ERR_INCMP_FMT_DIFF_2),
447 						n + 1);
448 					po_error++;
449 				}
450 			}
451 		}
452 		free(str_norm);
453 		free(id_norm);
454 
455 		return;
456 	}
457 
458 	/* plural */
459 	nsz = plural_len * 2;
460 	plural_norm = (char *)Xcalloc(1, nsz);
461 	plural_fmt = extract_format(plural_norm, plural, nsz);
462 	if (plural_fmt == -1) {
463 		diag(gettext(ERR_INVALID_FMT), plural_num, cur_po);
464 		po_error++;
465 	}
466 
467 	pstr_norm = (char **)Xcalloc(str->no, sizeof (char *));
468 	pstr_fmt = (int *)Xcalloc(str->no, sizeof (int));
469 	for (i = 0; i < str->no; i++) {
470 		p = str->pos + i;
471 		nsz = p->len * 2;
472 		pstr_norm[i] = (char *)Xcalloc(1, nsz);
473 		pstr_fmt[i] = extract_format(pstr_norm[i],
474 			msgstr + p->off, nsz);
475 		if (pstr_fmt[i] == -1) {
476 			diag(gettext(ERR_INVALID_FMT),
477 				p->num, cur_po);
478 			po_error++;
479 		}
480 	}
481 
482 	/* between msgid and msgid_plural */
483 	if (id_fmt != plural_fmt) {
484 		diag(gettext(ERR_INCMP_FMT),
485 			id_num, plural_num, cur_po);
486 		diag(gettext(ERR_INCMP_FMT_DIFF_1),
487 			id_fmt, plural_fmt);
488 		po_error++;
489 	} else {
490 		for (n = 0; n < id_fmt; n++) {
491 			if ((id_norm[n * 2] !=
492 				plural_norm[n * 2]) ||
493 				(id_norm[n * 2 + 1] !=
494 				plural_norm[n * 2 + 1])) {
495 				diag(gettext(ERR_INCMP_FMT),
496 					id_num, plural_num, cur_po);
497 				diag(gettext(ERR_INCMP_FMT_DIFF_2),
498 					n + 1);
499 				po_error++;
500 			}
501 		}
502 	}
503 	free(plural_norm);
504 
505 	/* between msgid and msgstr */
506 	for (i = 0; i < str->no; i++) {
507 		p = str->pos + i;
508 		if (id_fmt != pstr_fmt[i]) {
509 			diag(gettext(ERR_INCMP_FMT),
510 				id_num, p->num, cur_po);
511 			diag(gettext(ERR_INCMP_FMT_DIFF_1),
512 				id_fmt, pstr_fmt[i]);
513 			po_error++;
514 		} else {
515 			for (n = 0; n < id_fmt; n++) {
516 				if ((id_norm[n * 2] !=
517 					pstr_norm[i][n * 2]) ||
518 					(id_norm[n * 2 + 1] !=
519 					pstr_norm[i][n * 2 + 1])) {
520 					diag(gettext(ERR_INCMP_FMT),
521 						id_num, p->num, cur_po);
522 					diag(gettext(ERR_INCMP_FMT_DIFF_2),
523 						n + 1);
524 					po_error++;
525 				}
526 			}
527 		}
528 		free(pstr_norm[i]);
529 	}
530 	free(pstr_norm);
531 	free(pstr_fmt);
532 	free(id_norm);
533 }
534