xref: /freebsd/usr.bin/gencat/gencat.c (revision 640235e2c2ba32947f7c59d168437ffa1280f1e6)
1 /* ex:ts=4
2  */
3 
4 /*	$NetBSD: gencat.c,v 1.18 2003/10/27 00:12:43 lukem Exp $	*/
5 
6 /*
7  * Copyright (c) 1996 The NetBSD Foundation, Inc.
8  * All rights reserved.
9  *
10  * This code is derived from software contributed to The NetBSD Foundation
11  * by J.T. Conklin.
12  *
13  * Redistribution and use in source and binary forms, with or without
14  * modification, are permitted provided that the following conditions
15  * are met:
16  * 1. Redistributions of source code must retain the above copyright
17  *    notice, this list of conditions and the following disclaimer.
18  * 2. Redistributions in binary form must reproduce the above copyright
19  *    notice, this list of conditions and the following disclaimer in the
20  *    documentation and/or other materials provided with the distribution.
21  *
22  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
23  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
24  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
25  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
26  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
27  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
28  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
29  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
30  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
31  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
32  * POSSIBILITY OF SUCH DAMAGE.
33  */
34 
35 /***********************************************************
36 Copyright 1990, by Alfalfa Software Incorporated, Cambridge, Massachusetts.
37 
38                         All Rights Reserved
39 
40 Permission to use, copy, modify, and distribute this software and its
41 documentation for any purpose and without fee is hereby granted,
42 provided that the above copyright notice appear in all copies and that
43 both that copyright notice and this permission notice appear in
44 supporting documentation, and that Alfalfa's name not be used in
45 advertising or publicity pertaining to distribution of the software
46 without specific, written prior permission.
47 
48 ALPHALPHA DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING
49 ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL
50 ALPHALPHA BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR
51 ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
52 WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
53 ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
54 SOFTWARE.
55 
56 If you make any modifications, bugfixes or other changes to this software
57 we'd appreciate it if you could send a copy to us so we can keep things
58 up-to-date.  Many thanks.
59 				Kee Hinckley
60 				Alfalfa Software, Inc.
61 				267 Allston St., #3
62 				Cambridge, MA 02139  USA
63 				nazgul@alfalfa.com
64 
65 ******************************************************************/
66 
67 #include <sys/cdefs.h>
68 __FBSDID("$FreeBSD$");
69 
70 #define _NLS_PRIVATE
71 
72 #include <sys/types.h>
73 #include <sys/queue.h>
74 
75 #include <arpa/inet.h>		/* for htonl() */
76 
77 #include <ctype.h>
78 #include <err.h>
79 #include <fcntl.h>
80 #include <limits.h>
81 #include <nl_types.h>
82 #include <stdio.h>
83 #include <stdlib.h>
84 #include <string.h>
85 #include <unistd.h>
86 
87 struct _msgT {
88 	long    msgId;
89 	char   *str;
90 	LIST_ENTRY(_msgT) entries;
91 };
92 
93 struct _setT {
94 	long    setId;
95 	LIST_HEAD(msghead, _msgT) msghead;
96 	LIST_ENTRY(_setT) entries;
97 };
98 
99 static LIST_HEAD(sethead, _setT) sethead;
100 static struct _setT *curSet;
101 
102 static char *curline = NULL;
103 static long lineno = 0;
104 
105 static	char   *cskip(char *);
106 static	void	error(const char *);
107 static	char   *get_line(int);
108 static	char   *getmsg(int, char *, char);
109 static	void	warning(const char *, const char *);
110 static	char   *wskip(char *);
111 static	char   *xstrdup(const char *);
112 static	void   *xmalloc(size_t);
113 static	void   *xrealloc(void *, size_t);
114 
115 void	MCParse(int);
116 void	MCReadCat(int);
117 void	MCWriteCat(int);
118 void	MCDelMsg(int);
119 void	MCAddMsg(int, const char *);
120 void	MCAddSet(int);
121 void	MCDelSet(int);
122 void	usage(void);
123 int	main(int, char **);
124 
125 void
126 usage(void)
127 {
128 	fprintf(stderr, "usage: %s catfile msgfile ...\n", getprogname());
129 	exit(1);
130 }
131 
132 int
133 main(int argc, char **argv)
134 {
135 	int     ofd, ifd;
136 	char	*catfile = NULL;
137 	int     c;
138 
139 #define DEPRECATEDMSG	1
140 
141 #ifdef DEPRECATEDMSG
142 	while ((c = getopt(argc, argv, "new")) != -1) {
143 #else
144 	while ((c = getopt(argc, argv, "")) != -1) {
145 #endif
146 		switch (c) {
147 #ifdef DEPRECATEDMSG
148 		case 'n':
149 			fprintf(stderr, "WARNING: Usage of \"-new\" argument is deprecated.\n");
150 		case 'e':
151 		case 'w':
152 			break;
153 #endif
154 		case '?':
155 		default:
156 			usage();
157 			/* NOTREACHED */
158 		}
159 	}
160 	argc -= optind;
161 	argv += optind;
162 
163 	if (argc < 2) {
164 		usage();
165 		/* NOTREACHED */
166 	}
167 	catfile = *argv++;
168 
169 	for (; *argv; argv++) {
170 		if ((ifd = open(*argv, O_RDONLY)) < 0)
171 			err(1, "Unable to read %s", *argv);
172 		MCParse(ifd);
173 		close(ifd);
174 	}
175 
176 	if ((ofd = open(catfile, O_WRONLY | O_TRUNC | O_CREAT, 0666)) < 0)
177 		err(1, "Unable to create a new %s", catfile);
178 	MCWriteCat(ofd);
179 	exit(0);
180 }
181 
182 static void
183 warning(const char *cptr, const char *msg)
184 {
185 	fprintf(stderr, "%s: %s on line %ld\n", getprogname(), msg, lineno);
186 	fprintf(stderr, "%s\n", curline);
187 	if (cptr) {
188 		char   *tptr;
189 		for (tptr = curline; tptr < cptr; ++tptr)
190 			putc(' ', stderr);
191 		fprintf(stderr, "^\n");
192 	}
193 }
194 
195 #define	CORRUPT()	{ error("corrupt message catalog"); }
196 #define	NOMEM()		{ error("out of memory"); }
197 
198 static void
199 error(const char *msg)
200 {
201 	warning(NULL, msg);
202 	exit(1);
203 }
204 
205 static void *
206 xmalloc(size_t len)
207 {
208 	void   *p;
209 
210 	if ((p = malloc(len)) == NULL)
211 		NOMEM();
212 	return (p);
213 }
214 
215 static void *
216 xrealloc(void *ptr, size_t size)
217 {
218 	if ((ptr = realloc(ptr, size)) == NULL)
219 		NOMEM();
220 	return (ptr);
221 }
222 
223 static char *
224 xstrdup(const char *str)
225 {
226 	char *nstr;
227 
228 	if ((nstr = strdup(str)) == NULL)
229 		NOMEM();
230 	return (nstr);
231 }
232 
233 static char *
234 get_line(int fd)
235 {
236 	static long curlen = BUFSIZ;
237 	static char buf[BUFSIZ], *bptr = buf, *bend = buf;
238 	char   *cptr, *cend;
239 	long    buflen;
240 
241 	if (!curline) {
242 		curline = xmalloc(curlen);
243 	}
244 	++lineno;
245 
246 	cptr = curline;
247 	cend = curline + curlen;
248 	for (;;) {
249 		for (; bptr < bend && cptr < cend; ++cptr, ++bptr) {
250 			if (*bptr == '\n') {
251 				*cptr = '\0';
252 				++bptr;
253 				return (curline);
254 			} else
255 				*cptr = *bptr;
256 		}
257 		if (cptr == cend) {
258 			cptr = curline = xrealloc(curline, curlen *= 2);
259 			cend = curline + curlen;
260 		}
261 		if (bptr == bend) {
262 			buflen = read(fd, buf, BUFSIZ);
263 			if (buflen <= 0) {
264 				if (cptr > curline) {
265 					*cptr = '\0';
266 					return (curline);
267 				}
268 				return (NULL);
269 			}
270 			bend = buf + buflen;
271 			bptr = buf;
272 		}
273 	}
274 }
275 
276 static char *
277 wskip(char *cptr)
278 {
279 	if (!*cptr || !isspace((unsigned char) *cptr)) {
280 		warning(cptr, "expected a space");
281 		return (cptr);
282 	}
283 	while (*cptr && isspace((unsigned char) *cptr))
284 		++cptr;
285 	return (cptr);
286 }
287 
288 static char *
289 cskip(char *cptr)
290 {
291 	if (!*cptr || isspace((unsigned char) *cptr)) {
292 		warning(cptr, "wasn't expecting a space");
293 		return (cptr);
294 	}
295 	while (*cptr && !isspace((unsigned char) *cptr))
296 		++cptr;
297 	return (cptr);
298 }
299 
300 static char *
301 getmsg(int fd, char *cptr, char quote)
302 {
303 	static char *msg = NULL;
304 	static long msglen = 0;
305 	long    clen, i;
306 	char   *tptr;
307 
308 	if (quote && *cptr == quote) {
309 		++cptr;
310 	}
311 
312 	clen = strlen(cptr) + 1;
313 	if (clen > msglen) {
314 		if (msglen)
315 			msg = xrealloc(msg, clen);
316 		else
317 			msg = xmalloc(clen);
318 		msglen = clen;
319 	}
320 	tptr = msg;
321 
322 	while (*cptr) {
323 		if (quote && *cptr == quote) {
324 			char   *tmp;
325 			tmp = cptr + 1;
326 			if (*tmp && (!isspace((unsigned char) *tmp) || *wskip(tmp))) {
327 				warning(cptr, "unexpected quote character, ignoring");
328 				*tptr++ = *cptr++;
329 			} else {
330 				*cptr = '\0';
331 			}
332 		} else
333 			if (*cptr == '\\') {
334 				++cptr;
335 				switch (*cptr) {
336 				case '\0':
337 					cptr = get_line(fd);
338 					if (!cptr)
339 						error("premature end of file");
340 					msglen += strlen(cptr);
341 					i = tptr - msg;
342 					msg = xrealloc(msg, msglen);
343 					tptr = msg + i;
344 					break;
345 
346 		#define	CASEOF(CS, CH)		\
347 			case CS:		\
348 				*tptr++ = CH;	\
349 				++cptr;		\
350 				break;		\
351 
352 				CASEOF('n', '\n');
353 				CASEOF('t', '\t');
354 				CASEOF('v', '\v');
355 				CASEOF('b', '\b');
356 				CASEOF('r', '\r');
357 				CASEOF('f', '\f');
358 				CASEOF('"', '"');
359 				CASEOF('\\', '\\');
360 
361 				default:
362 					if (quote && *cptr == quote) {
363 						*tptr++ = *cptr++;
364 					} else if (isdigit((unsigned char) *cptr)) {
365 						*tptr = 0;
366 						for (i = 0; i < 3; ++i) {
367 							if (!isdigit((unsigned char) *cptr))
368 								break;
369 							if (*cptr > '7')
370 								warning(cptr, "octal number greater than 7?!");
371 							*tptr *= 8;
372 							*tptr += (*cptr - '0');
373 							++cptr;
374 						}
375 					} else {
376 						warning(cptr, "unrecognized escape sequence");
377 					}
378 					break;
379 				}
380 			} else {
381 				*tptr++ = *cptr++;
382 			}
383 	}
384 	*tptr = '\0';
385 	return (msg);
386 }
387 
388 void
389 MCParse(int fd)
390 {
391 	char   *cptr, *str;
392 	int     setid, msgid = 0;
393 	char    quote = 0;
394 
395 	/* XXX: init sethead? */
396 
397 	while ((cptr = get_line(fd))) {
398 		if (*cptr == '$') {
399 			++cptr;
400 			if (strncmp(cptr, "set", 3) == 0) {
401 				cptr += 3;
402 				cptr = wskip(cptr);
403 				setid = atoi(cptr);
404 				MCAddSet(setid);
405 				msgid = 0;
406 			} else if (strncmp(cptr, "delset", 6) == 0) {
407 				cptr += 6;
408 				cptr = wskip(cptr);
409 				setid = atoi(cptr);
410 				MCDelSet(setid);
411 			} else if (strncmp(cptr, "quote", 5) == 0) {
412 				cptr += 5;
413 				if (!*cptr)
414 					quote = 0;
415 				else {
416 					cptr = wskip(cptr);
417 					if (!*cptr)
418 						quote = 0;
419 					else
420 						quote = *cptr;
421 				}
422 			} else if (isspace((unsigned char) *cptr)) {
423 				;
424 			} else {
425 				if (*cptr) {
426 					cptr = wskip(cptr);
427 					if (*cptr)
428 						warning(cptr, "unrecognized line");
429 				}
430 			}
431 		} else {
432 			/*
433 			 * First check for (and eat) empty lines....
434 			 */
435 			if (!*cptr)
436 				continue;
437 			/*
438 			 * We have a digit? Start of a message. Else,
439 			 * syntax error.
440 			 */
441 			if (isdigit((unsigned char) *cptr)) {
442 				msgid = atoi(cptr);
443 				cptr = cskip(cptr);
444 				cptr = wskip(cptr);
445 				/* if (*cptr) ++cptr; */
446 			} else {
447 				warning(cptr, "neither blank line nor start of a message id");
448 				continue;
449 			}
450 			/*
451 			 * If we have a message ID, but no message,
452 			 * then this means "delete this message id
453 			 * from the catalog".
454 			 */
455 			if (!*cptr) {
456 				MCDelMsg(msgid);
457 			} else {
458 				str = getmsg(fd, cptr, quote);
459 				MCAddMsg(msgid, str);
460 			}
461 		}
462 	}
463 }
464 
465 /*
466  * Write message catalog.
467  *
468  * The message catalog is first converted from its internal to its
469  * external representation in a chunk of memory allocated for this
470  * purpose.  Then the completed catalog is written.  This approach
471  * avoids additional housekeeping variables and/or a lot of seeks
472  * that would otherwise be required.
473  */
474 void
475 MCWriteCat(int fd)
476 {
477 	int     nsets;		/* number of sets */
478 	int     nmsgs;		/* number of msgs */
479 	int     string_size;	/* total size of string pool */
480 	int     msgcat_size;	/* total size of message catalog */
481 	void   *msgcat;		/* message catalog data */
482 	struct _nls_cat_hdr *cat_hdr;
483 	struct _nls_set_hdr *set_hdr;
484 	struct _nls_msg_hdr *msg_hdr;
485 	char   *strings;
486 	struct _setT *set;
487 	struct _msgT *msg;
488 	int     msg_index;
489 	int     msg_offset;
490 
491 	/* determine number of sets, number of messages, and size of the
492 	 * string pool */
493 	nsets = 0;
494 	nmsgs = 0;
495 	string_size = 0;
496 
497 	for (set = sethead.lh_first; set != NULL;
498 	    set = set->entries.le_next) {
499 		nsets++;
500 
501 		for (msg = set->msghead.lh_first; msg != NULL;
502 		    msg = msg->entries.le_next) {
503 			nmsgs++;
504 			string_size += strlen(msg->str) + 1;
505 		}
506 	}
507 
508 #ifdef DEBUG
509 	printf("number of sets: %d\n", nsets);
510 	printf("number of msgs: %d\n", nmsgs);
511 	printf("string pool size: %d\n", string_size);
512 #endif
513 
514 	/* determine size and then allocate buffer for constructing external
515 	 * message catalog representation */
516 	msgcat_size = sizeof(struct _nls_cat_hdr)
517 	    + (nsets * sizeof(struct _nls_set_hdr))
518 	    + (nmsgs * sizeof(struct _nls_msg_hdr))
519 	    + string_size;
520 
521 	msgcat = xmalloc(msgcat_size);
522 	memset(msgcat, '\0', msgcat_size);
523 
524 	/* fill in msg catalog header */
525 	cat_hdr = (struct _nls_cat_hdr *) msgcat;
526 	cat_hdr->__magic = htonl(_NLS_MAGIC);
527 	cat_hdr->__nsets = htonl(nsets);
528 	cat_hdr->__mem = htonl(msgcat_size - sizeof(struct _nls_cat_hdr));
529 	cat_hdr->__msg_hdr_offset =
530 	    htonl(nsets * sizeof(struct _nls_set_hdr));
531 	cat_hdr->__msg_txt_offset =
532 	    htonl(nsets * sizeof(struct _nls_set_hdr) +
533 	    nmsgs * sizeof(struct _nls_msg_hdr));
534 
535 	/* compute offsets for set & msg header tables and string pool */
536 	set_hdr = (struct _nls_set_hdr *)(void *)((char *)msgcat +
537 	    sizeof(struct _nls_cat_hdr));
538 	msg_hdr = (struct _nls_msg_hdr *)(void *)((char *)msgcat +
539 	    sizeof(struct _nls_cat_hdr) +
540 	    nsets * sizeof(struct _nls_set_hdr));
541 	strings = (char *) msgcat +
542 	    sizeof(struct _nls_cat_hdr) +
543 	    nsets * sizeof(struct _nls_set_hdr) +
544 	    nmsgs * sizeof(struct _nls_msg_hdr);
545 
546 	msg_index = 0;
547 	msg_offset = 0;
548 	for (set = sethead.lh_first; set != NULL;
549 	    set = set->entries.le_next) {
550 
551 		nmsgs = 0;
552 		for (msg = set->msghead.lh_first; msg != NULL;
553 		    msg = msg->entries.le_next) {
554 			int     msg_len = strlen(msg->str) + 1;
555 
556 			msg_hdr->__msgno = htonl(msg->msgId);
557 			msg_hdr->__msglen = htonl(msg_len);
558 			msg_hdr->__offset = htonl(msg_offset);
559 
560 			memcpy(strings, msg->str, msg_len);
561 			strings += msg_len;
562 			msg_offset += msg_len;
563 
564 			nmsgs++;
565 			msg_hdr++;
566 		}
567 
568 		set_hdr->__setno = htonl(set->setId);
569 		set_hdr->__nmsgs = htonl(nmsgs);
570 		set_hdr->__index = htonl(msg_index);
571 		msg_index += nmsgs;
572 		set_hdr++;
573 	}
574 
575 	/* write out catalog.  XXX: should this be done in small chunks? */
576 	write(fd, msgcat, msgcat_size);
577 }
578 
579 void
580 MCAddSet(int setId)
581 {
582 	struct _setT *p, *q;
583 
584 	if (setId <= 0) {
585 		error("setId's must be greater than zero");
586 		/* NOTREACHED */
587 	}
588 	if (setId > NL_SETMAX) {
589 		error("setId exceeds limit");
590 		/* NOTREACHED */
591 	}
592 
593 	p = sethead.lh_first;
594 	q = NULL;
595 	for (; p != NULL && p->setId < setId; q = p, p = p->entries.le_next);
596 
597 	if (p && p->setId == setId) {
598 		;
599 	} else {
600 		p = xmalloc(sizeof(struct _setT));
601 		memset(p, '\0', sizeof(struct _setT));
602 		LIST_INIT(&p->msghead);
603 
604 		p->setId = setId;
605 
606 		if (q == NULL) {
607 			LIST_INSERT_HEAD(&sethead, p, entries);
608 		} else {
609 			LIST_INSERT_AFTER(q, p, entries);
610 		}
611 	}
612 
613 	curSet = p;
614 }
615 
616 void
617 MCAddMsg(int msgId, const char *str)
618 {
619 	struct _msgT *p, *q;
620 
621 	if (!curSet)
622 		error("can't specify a message when no set exists");
623 
624 	if (msgId <= 0) {
625 		error("msgId's must be greater than zero");
626 		/* NOTREACHED */
627 	}
628 	if (msgId > NL_MSGMAX) {
629 		error("msgID exceeds limit");
630 		/* NOTREACHED */
631 	}
632 
633 	p = curSet->msghead.lh_first;
634 	q = NULL;
635 	for (; p != NULL && p->msgId < msgId; q = p, p = p->entries.le_next);
636 
637 	if (p && p->msgId == msgId) {
638 		free(p->str);
639 	} else {
640 		p = xmalloc(sizeof(struct _msgT));
641 		memset(p, '\0', sizeof(struct _msgT));
642 
643 		if (q == NULL) {
644 			LIST_INSERT_HEAD(&curSet->msghead, p, entries);
645 		} else {
646 			LIST_INSERT_AFTER(q, p, entries);
647 		}
648 	}
649 
650 	p->msgId = msgId;
651 	p->str = xstrdup(str);
652 }
653 
654 void
655 MCDelSet(int setId)
656 {
657 	struct _setT *set;
658 	struct _msgT *msg;
659 
660 	set = sethead.lh_first;
661 	for (; set != NULL && set->setId < setId; set = set->entries.le_next);
662 
663 	if (set && set->setId == setId) {
664 
665 		msg = set->msghead.lh_first;
666 		while (msg) {
667 			free(msg->str);
668 			LIST_REMOVE(msg, entries);
669 		}
670 
671 		LIST_REMOVE(set, entries);
672 		return;
673 	}
674 	warning(NULL, "specified set doesn't exist");
675 }
676 
677 void
678 MCDelMsg(int msgId)
679 {
680 	struct _msgT *msg;
681 
682 	if (!curSet)
683 		error("you can't delete a message before defining the set");
684 
685 	msg = curSet->msghead.lh_first;
686 	for (; msg != NULL && msg->msgId < msgId; msg = msg->entries.le_next);
687 
688 	if (msg && msg->msgId == msgId) {
689 		free(msg->str);
690 		LIST_REMOVE(msg, entries);
691 		return;
692 	}
693 	warning(NULL, "specified msg doesn't exist");
694 }
695