xref: /illumos-gate/usr/src/cmd/msgfmt/msgfmt.c (revision df69b3162f4007cd457c38c170602faca56dbf73)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License, Version 1.0 only
6  * (the "License").  You may not use this file except in compliance
7  * with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or http://www.opensolaris.org/os/licensing.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 /*
23  * Copyright 2004 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #include "sun_msgfmt.h"
28 
29 static void	read_psffm(char *);
30 static void	sortit(char *, char *);
31 static wchar_t	*consume_whitespace(wchar_t *);
32 static char	expand_meta(wchar_t **);
33 static struct domain_struct	*find_domain_node(char *);
34 static void	insert_message(struct domain_struct *, char *, char *);
35 static void	output_all_mo_files(void);
36 static void	output_one_mo_file(struct domain_struct *);
37 static size_t _mbsntowcs(wchar_t **, char **, size_t *);
38 
39 #ifdef DEBUG
40 static void	printlist(void);
41 #endif
42 
43 static char	gcurrent_domain[TEXTDOMAINMAX+1];
44 static char	*gmsgid;		/* Stores msgid when read po file */
45 static char	*gmsgstr;		/* Stores msgstr when read po file */
46 static int	gmsgid_size;		/* The current size of msgid buffer */
47 static int	gmsgstr_size;		/* The current size of msgstr buffer */
48 static char	*outfile = NULL;
49 static int	linenum;		/* The line number in the file */
50 static int	msgid_linenum;		/* The last msgid token line number */
51 static int	msgstr_linenum;		/* The last msgstr token line number */
52 
53 static int	oflag = 0;
54 static int	sun_p = 0;
55 int	verbose = 0;
56 
57 static struct domain_struct	*first_domain = NULL;
58 static struct domain_struct	*last_used_domain = NULL;
59 
60 static int	mbcurmax;
61 
62 static char	**oargv;
63 static char	*inputdir;
64 
65 extern void	check_gnu(char *, size_t);
66 
67 #define	GNU_MSGFMT	"/usr/lib/gmsgfmt"
68 void
invoke_gnu_msgfmt(void)69 invoke_gnu_msgfmt(void)
70 {
71 	/*
72 	 * Transferring to /usr/lib/gmsgfmt
73 	 */
74 	char	*gnu_msgfmt;
75 #ifdef	DEBUG_MSGFMT
76 	gnu_msgfmt = getenv("GNU_MSGFMT");
77 	if (!gnu_msgfmt)
78 		gnu_msgfmt = GNU_MSGFMT;
79 #else
80 	gnu_msgfmt = GNU_MSGFMT;
81 #endif
82 
83 	if (verbose) {
84 		diag(gettext(DIAG_INVOKING_GNU));
85 	}
86 
87 	(void) execv(gnu_msgfmt, oargv);
88 	/* exec failed */
89 	error(gettext(ERR_EXEC_FAILED), gnu_msgfmt);
90 	/* NOTREACHED */
91 }
92 
93 static void
usage(void)94 usage(void)
95 {
96 	(void) fprintf(stderr, gettext(ERR_USAGE));
97 	exit(2);
98 }
99 
100 /*
101  * msgfmt - Generate binary tree for runtime gettext() using psffm: "Portable
102  * Source File Format for Messages" file template. This file may have
103  * previously been generated by the xgettext filter for c source files.
104  */
105 
106 int
main(int argc,char ** argv)107 main(int argc, char **argv)
108 {
109 	int	ret;
110 	static struct flags	flag;
111 
112 	(void) setlocale(LC_ALL, "");
113 #if	!defined(TEXT_DOMAIN)
114 #define	TEXT_DOMAIN	"SYS_TEST"
115 #endif
116 	(void) textdomain(TEXT_DOMAIN);
117 
118 	oargv = argv;
119 	ret = parse_option(&argc, &argv, &flag);
120 	if (ret == -1) {
121 		usage();
122 		/* NOTREACHED */
123 	}
124 
125 	if (flag.sun_p) {
126 		/* never invoke gnu msgfmt */
127 		if (flag.gnu_p) {
128 			error(gettext(ERR_GNU_ON_SUN));
129 			/* NOTREACHED */
130 		}
131 		sun_p = flag.sun_p;
132 	}
133 	if (flag.idir) {
134 		inputdir = flag.idir;
135 	}
136 	if (flag.ofile) {
137 		oflag = 1;
138 		outfile = flag.ofile;
139 	}
140 	if (flag.verbose) {
141 		verbose = 1;
142 	}
143 
144 	if (flag.gnu_p) {
145 		/* invoke /usr/lib/gmsgfmt */
146 		invoke_gnu_msgfmt();
147 		/* NOTREACHED */
148 	}
149 
150 	/*
151 	 * read all portable object files specified in command arguments.
152 	 * Allocate initial size for msgid and msgstr. If it needs more
153 	 * spaces, realloc later.
154 	 */
155 	gmsgid = (char *)Xmalloc(MAX_VALUE_LEN);
156 	gmsgstr = (char *)Xmalloc(MAX_VALUE_LEN);
157 
158 	gmsgid_size = gmsgstr_size = MAX_VALUE_LEN;
159 	(void) memset(gmsgid, 0, gmsgid_size);
160 	(void) memset(gmsgstr, 0, gmsgstr_size);
161 
162 	mbcurmax = MB_CUR_MAX;
163 
164 	while (argc-- > 0) {
165 		if (verbose) {
166 			diag(gettext(DIAG_START_PROC), *argv);
167 		}
168 		read_psffm(*argv++);
169 	}
170 
171 	output_all_mo_files();
172 
173 #ifdef DEBUG
174 	printlist();
175 #endif
176 
177 	return (0);
178 
179 } /* main */
180 
181 
182 
183 /*
184  * read_psffm - read in "psffm" format file, check syntax, printing error
185  * messages as needed, output binary tree to file <domain>
186  */
187 
188 static void
read_psffm(char * file)189 read_psffm(char *file)
190 {
191 	int	fd;
192 	static char	msgfile[MAXPATHLEN];
193 	wchar_t	*linebufptr, *p;
194 	char	*bufptr = 0;
195 	int	quotefound;	/* double quote was seen */
196 	int	inmsgid = 0;	/* indicates "msgid" was seen */
197 	int	inmsgstr = 0;	/* indicates "msgstr" was seen */
198 	int	indomain = 0;	/* indicates "domain" was seen */
199 	wchar_t	wc;
200 	char	mb;
201 	int	n;
202 	char	token_found;	/* Boolean value */
203 	unsigned int	bufptr_index = 0; /* current index of bufptr */
204 	char	*mbuf, *addr;
205 	size_t	fsize, ln_size, ll;
206 	wchar_t	*linebufhead = NULL;
207 	struct stat64	statbuf;
208 	char	*filename;
209 
210 	/*
211 	 * For each po file to be read,
212 	 * 1) set domain to default and
213 	 * 2) set linenumer to 0.
214 	 */
215 	(void) strcpy(gcurrent_domain, DEFAULT_DOMAIN);
216 	linenum = 0;
217 
218 	if (!inputdir) {
219 		filename = Xstrdup(file);
220 	} else {
221 		size_t	dirlen, filelen, len;
222 
223 		dirlen = strlen(inputdir);
224 		filelen = strlen(file);
225 		len = dirlen + 1 + filelen + 1;
226 		filename = (char *)Xmalloc(len);
227 		(void) memcpy(filename, inputdir, dirlen);
228 		*(filename + dirlen) = '/';
229 		(void) memcpy(filename + dirlen + 1, file, filelen);
230 		*(filename + dirlen + 1 + filelen) = '\0';
231 	}
232 
233 	fd = open(filename, O_RDONLY);
234 	if (fd == -1) {
235 		error(gettext(ERR_OPEN_FAILED), filename);
236 		/* NOTREACHED */
237 	}
238 	if (fstat64(fd, &statbuf) == -1) {
239 		error(gettext(ERR_STAT_FAILED), filename);
240 		/* NOTREACHED */
241 	}
242 	fsize = (size_t)statbuf.st_size;
243 	if (fsize == 0) {
244 		/*
245 		 * The size of the specified po file is 0.
246 		 * In Solaris 8 and earlier, msgfmt was silent
247 		 * for the null po file.  So, just returns
248 		 * without generating an error message.
249 		 */
250 		(void) close(fd);
251 		free(filename);
252 		return;
253 	}
254 	addr = mmap(NULL, fsize, PROT_READ, MAP_SHARED, fd, 0);
255 	if (addr == MAP_FAILED) {
256 		error(gettext(ERR_MMAP_FAILED), filename);
257 		/* NOTREACHED */
258 	}
259 	(void) close(fd);
260 
261 	if (!sun_p)
262 		check_gnu(addr, fsize);
263 
264 	mbuf = addr;
265 	for (;;) {
266 		if (linebufhead) {
267 			free(linebufhead);
268 			linebufhead = NULL;
269 		}
270 		ln_size = _mbsntowcs(&linebufhead, &mbuf, &fsize);
271 		if (ln_size == (size_t)-1) {
272 			error(gettext(ERR_READ_FAILED), filename);
273 			/* NOTREACHED */
274 		} else if (ln_size == 0) {
275 			break;	/* End of File. */
276 		}
277 		linenum++;
278 
279 		linebufptr = linebufhead;
280 		quotefound = 0;
281 
282 		switch (*linebufptr) {
283 			case L'#':	/* comment    */
284 			case L'\n':	/* empty line */
285 				continue;
286 			case L'\"': /* multiple lines of msgid and msgstr */
287 				quotefound = 1;
288 				break;
289 		}
290 
291 		/*
292 		 * Process MSGID Tokens.
293 		 */
294 		token_found = (wcsncmp(MSGID_TOKEN, linebufptr,
295 		    MSGID_LEN) == 0) ? 1 : 0;
296 
297 		if (token_found || (quotefound && inmsgid)) {
298 
299 			if (token_found) {
300 				if (!CK_NXT_CH(linebufptr, MSGID_LEN+1)) {
301 					diag(gettext(ERR_NOSPC), linenum);
302 					error(gettext(ERR_EXITING));
303 					/* NOTREACHED */
304 				}
305 			}
306 
307 			if (inmsgid && !quotefound) {
308 				warning(gettext(WARN_NO_MSGSTR), msgid_linenum);
309 				continue;
310 			}
311 			if (inmsgstr) {
312 				sortit(gmsgid, gmsgstr);
313 				(void) memset(gmsgid, 0, gmsgid_size);
314 				(void) memset(gmsgstr, 0, gmsgstr_size);
315 			}
316 
317 			if (inmsgid) {
318 				/* multiple lines of msgid */
319 				/* cancel the previous null termination */
320 				bufptr_index--;
321 			} else {
322 				/*
323 				 * The first line of msgid.
324 				 * Save linenum of msgid to be used when
325 				 * printing warning or error message.
326 				 */
327 				msgid_linenum = linenum;
328 				p = linebufptr;
329 				linebufptr = consume_whitespace(
330 				    linebufptr + MSGID_LEN);
331 				ln_size -= linebufptr - p;
332 				bufptr = gmsgid;
333 				bufptr_index = 0;
334 			}
335 
336 			inmsgid = 1;
337 			inmsgstr = 0;
338 			indomain = 0;
339 			goto load_buffer;
340 		}
341 
342 		/*
343 		 * Process MSGSTR Tokens.
344 		 */
345 		token_found = (wcsncmp(MSGSTR_TOKEN, linebufptr,
346 		    MSGSTR_LEN) == 0) ? 1 : 0;
347 		if (token_found || (quotefound && inmsgstr)) {
348 
349 			if (token_found) {
350 				if (!CK_NXT_CH(linebufptr, MSGSTR_LEN+1)) {
351 					diag(gettext(ERR_NOSPC), linenum);
352 					error(gettext(ERR_EXITING));
353 					/* NOTREACHED */
354 				}
355 			}
356 
357 
358 			if (inmsgstr && !quotefound) {
359 				warning(gettext(WARN_NO_MSGID), msgstr_linenum);
360 				continue;
361 			}
362 			if (inmsgstr) {
363 				/* multiple lines of msgstr */
364 				/* cancel the previous null termination */
365 				bufptr_index--;
366 			} else {
367 				/*
368 				 * The first line of msgstr.
369 				 * Save linenum of msgid to be used when
370 				 * printing warning or error message.
371 				 */
372 				msgstr_linenum = linenum;
373 				p = linebufptr;
374 				linebufptr = consume_whitespace(
375 				    linebufptr + MSGSTR_LEN);
376 				ln_size -= linebufptr - p;
377 				bufptr = gmsgstr;
378 				bufptr_index = 0;
379 			}
380 
381 			inmsgstr = 1;
382 			inmsgid = 0;
383 			indomain = 0;
384 			goto load_buffer;
385 		}
386 
387 		/*
388 		 * Process DOMAIN Tokens.
389 		 * Add message id and message string to sorted list
390 		 * if msgstr was processed last time.
391 		 */
392 		token_found = (wcsncmp(DOMAIN_TOKEN, linebufptr,
393 		    DOMAIN_LEN) == 0) ? 1 : 0;
394 		if ((token_found) || (quotefound && indomain)) {
395 			if (token_found) {
396 				if (!CK_NXT_CH(linebufptr, DOMAIN_LEN+1)) {
397 					diag(gettext(ERR_NOSPC), linenum);
398 					error(gettext(ERR_EXITING));
399 					/* NOTREACHED */
400 				}
401 			}
402 
403 
404 			/*
405 			 * process msgid and msgstr pair for previous domain
406 			 */
407 			if (inmsgstr) {
408 				sortit(gmsgid, gmsgstr);
409 			}
410 
411 			/* refresh msgid and msgstr buffer */
412 			if (inmsgstr || inmsgid) {
413 				(void) memset(gmsgid, 0, gmsgid_size);
414 				(void) memset(gmsgstr, 0, gmsgstr_size);
415 			}
416 
417 			if (indomain) {
418 				/* multiple lines of domain */
419 				/* cancel the previous null termination */
420 				bufptr_index--;
421 			} else {
422 				p = linebufptr;
423 				linebufptr = consume_whitespace(
424 				    linebufptr + DOMAIN_LEN);
425 				(void) memset(gcurrent_domain, 0,
426 				    sizeof (gcurrent_domain));
427 				ln_size -= linebufptr - p;
428 				bufptr = gcurrent_domain;
429 				bufptr_index = 0;
430 			}
431 
432 			indomain = 1;
433 			inmsgid = 0;
434 			inmsgstr = 0;
435 		} /* if */
436 
437 load_buffer:
438 		/*
439 		 * Now, fill up the buffer pointed by bufptr.
440 		 * At this point bufptr should point to one of
441 		 * msgid, msgptr, or current_domain.
442 		 * Otherwise, the entire line is ignored.
443 		 */
444 
445 		if (!bufptr) {
446 			warning(gettext(WARN_SYNTAX_ERR), linenum);
447 			continue;
448 		}
449 
450 		if (*linebufptr++ != L'\"') {
451 			warning(gettext(WARN_MISSING_QUOTE), linenum);
452 			--linebufptr;
453 		}
454 		quotefound = 0;
455 
456 		/*
457 		 * If there is not enough space in the buffer,
458 		 * increase buffer by ln_size by realloc.
459 		 */
460 		ll = ln_size * mbcurmax;
461 		if (bufptr == gmsgid) {
462 			if (gmsgid_size < (bufptr_index + ll)) {
463 				gmsgid = (char *)Xrealloc(gmsgid,
464 				    bufptr_index + ll);
465 				bufptr = gmsgid;
466 				gmsgid_size = bufptr_index + ll;
467 			}
468 		} else if (bufptr == gmsgstr) {
469 			if (gmsgstr_size < (bufptr_index + ll)) {
470 				gmsgstr = (char *)Xrealloc(gmsgstr,
471 				    bufptr_index + ll);
472 				bufptr = gmsgstr;
473 				gmsgstr_size = bufptr_index + ll;
474 			}
475 		}
476 
477 		while (wc = *linebufptr++) {
478 			switch (wc) {
479 			case L'\n':
480 				if (!quotefound) {
481 warning(gettext(WARN_MISSING_QUOTE_AT_EOL), linenum);
482 				}
483 				break;
484 
485 			case L'\"':
486 				quotefound = 1;
487 				break;
488 
489 			case L'\\':
490 				if ((mb = expand_meta(&linebufptr)) != '\0')
491 					bufptr[bufptr_index++] = mb;
492 				break;
493 
494 			default:
495 				if ((n = wctomb(&bufptr[bufptr_index], wc)) > 0)
496 					bufptr_index += n;
497 			} /* switch */
498 			if (quotefound) {
499 				/*
500 				 * Check if any remaining characters
501 				 * after closing quote.
502 				 */
503 				linebufptr = consume_whitespace(linebufptr);
504 				if (*linebufptr != L'\n') {
505 					warning(gettext(WARN_INVALID_STRING),
506 					    linenum);
507 				}
508 				break;
509 			}
510 		} /* while */
511 
512 		bufptr[bufptr_index++] = '\0';
513 
514 		(void) strcpy(msgfile, gcurrent_domain);
515 		(void) strcat(msgfile, ".mo");
516 	} /* for(;;) */
517 
518 	if (inmsgstr) {
519 		sortit(gmsgid, gmsgstr);
520 	}
521 
522 	if (linebufhead)
523 		free(linebufhead);
524 	if (munmap(addr, statbuf.st_size) == -1) {
525 		error(gettext(ERR_MUNMAP_FAILED), filename);
526 		/* NOTREACHED */
527 	}
528 
529 	free(filename);
530 	return;
531 
532 } /* read_psffm */
533 
534 
535 /*
536  * Skip leading white spaces and tabs.
537  */
538 static wchar_t *
consume_whitespace(wchar_t * buf)539 consume_whitespace(wchar_t *buf)
540 {
541 	wchar_t	*bufptr = buf;
542 	wchar_t	c;
543 
544 	/*
545 	 * Skip leading white spaces.
546 	 */
547 	while ((c = *bufptr) != L'\0') {
548 		if (c == L' ' || c == L'\t') {
549 			bufptr++;
550 			continue;
551 		}
552 		break;
553 	}
554 	return (bufptr);
555 } /* consume_white_space */
556 
557 
558 /*
559  * handle escape sequences.
560  */
561 static char
expand_meta(wchar_t ** buf)562 expand_meta(wchar_t **buf)
563 {
564 	wchar_t	wc = **buf;
565 	char	n;
566 
567 	switch (wc) {
568 	case L'"':
569 		(*buf)++;
570 		return ('\"');
571 	case L'\\':
572 		(*buf)++;
573 		return ('\\');
574 	case L'b':
575 		(*buf)++;
576 		return ('\b');
577 	case L'f':
578 		(*buf)++;
579 		return ('\f');
580 	case L'n':
581 		(*buf)++;
582 		return ('\n');
583 	case L'r':
584 		(*buf)++;
585 		return ('\r');
586 	case L't':
587 		(*buf)++;
588 		return ('\t');
589 	case L'v':
590 		(*buf)++;
591 		return ('\v');
592 	case L'a':
593 		(*buf)++;
594 		return ('\a');
595 	case L'\'':
596 		(*buf)++;
597 		return ('\'');
598 	case L'?':
599 		(*buf)++;
600 		return ('\?');
601 	case L'0':
602 	case L'1':
603 	case L'2':
604 	case L'3':
605 	case L'4':
606 	case L'5':
607 	case L'6':
608 	case L'7':
609 		/*
610 		 * This case handles \ddd where ddd is octal number.
611 		 * There could be one, two, or three octal numbers.
612 		 */
613 		(*buf)++;
614 		n = (char)(wc - L'0');
615 		wc = **buf;
616 		if (wc >= L'0' && wc <= L'7') {
617 			(*buf)++;
618 			n = 8*n + (char)(wc - L'0');
619 			wc = **buf;
620 			if (wc >= L'0' && wc <= L'7') {
621 				(*buf)++;
622 				n = 8*n + (char)(wc - L'0');
623 			}
624 		}
625 		return (n);
626 	default:
627 		return ('\0');
628 	}
629 } /* expand_meta */
630 
631 /*
632  * Finds the head of the current domain linked list and
633  * call insert_message() to insert msgid and msgstr pair
634  * to the linked list.
635  */
636 static void
sortit(char * msgid,char * msgstr)637 sortit(char *msgid, char *msgstr)
638 {
639 	struct domain_struct	*dom;
640 
641 #ifdef DEBUG
642 	(void) fprintf(stderr,
643 	    "==> sortit(), domain=<%s> msgid=<%s> msgstr=<%s>\n",
644 	    gcurrent_domain, msgid, msgstr);
645 #endif
646 
647 	/*
648 	 * If "-o filename" is specified, then all "domain" directive
649 	 * are ignored and, all messages will be stored in domain
650 	 * whose name is filename.
651 	 */
652 	if (oflag) {
653 		dom = find_domain_node(outfile);
654 	} else {
655 		dom = find_domain_node(gcurrent_domain);
656 	}
657 
658 	insert_message(dom, msgid, msgstr);
659 }
660 
661 /*
662  * This routine inserts message in the current domain message list.
663  * It is inserted in ascending order.
664  */
665 static void
insert_message(struct domain_struct * dom,char * msgid,char * msgstr)666 insert_message(struct domain_struct *dom,
667     char *msgid, char *msgstr)
668 {
669 	struct msg_chain	*p1;
670 	struct msg_chain	*node, *prev_node;
671 	int			b;
672 
673 	/*
674 	 * Find the optimal starting search position.
675 	 * The starting search position is either the first node
676 	 * or the current_elem of domain.
677 	 * The current_elem is the pointer to the node which
678 	 * is most recently accessed in domain.
679 	 */
680 	if (dom->current_elem != NULL) {
681 		b = strcmp(msgid, dom->current_elem->msgid);
682 		if (b == 0) {
683 			if (verbose)
684 				warning(gettext(WARN_DUP_MSG),
685 				    msgid, msgid_linenum);
686 			return;
687 		} else if (b > 0) { /* to implement descending order */
688 			p1 = dom->first_elem;
689 		} else {
690 			p1 = dom->current_elem;
691 		}
692 	} else {
693 		p1 = dom->first_elem;
694 	}
695 
696 	/*
697 	 * search msgid insert position in the list
698 	 * Search starts from the node pointed by p1.
699 	 */
700 	prev_node = NULL;
701 	while (p1) {
702 		b = strcmp(msgid, p1->msgid);
703 		if (b == 0) {
704 			if (verbose)
705 				warning(gettext(WARN_DUP_MSG),
706 				    msgid, msgid_linenum);
707 			return;
708 		} else if (b < 0) {  /* to implement descending order */
709 			/* move to the next node */
710 			prev_node = p1;
711 			p1 = p1->next;
712 		} else {
713 			/* insert a new msg node */
714 			node = (struct msg_chain *)
715 			    Xmalloc(sizeof (struct msg_chain));
716 			node->next = p1;
717 			node->msgid  = Xstrdup(msgid);
718 			node->msgstr = Xstrdup(msgstr);
719 
720 			if (prev_node) {
721 				prev_node->next = node;
722 			} else {
723 				dom->first_elem = node;
724 			}
725 			dom->current_elem = node;
726 			return;
727 		}
728 	} /* while */
729 
730 	/*
731 	 * msgid is smaller than any of msgid in the list or
732 	 * list is empty.
733 	 * Therefore, append it.
734 	 */
735 	node = (struct msg_chain *)
736 	    Xmalloc(sizeof (struct msg_chain));
737 	node->next = NULL;
738 	node->msgid  = Xstrdup(msgid);
739 	node->msgstr = Xstrdup(msgstr);
740 
741 	if (prev_node) {
742 		prev_node->next = node;
743 	} else {
744 		dom->first_elem = node;
745 	}
746 	dom->current_elem = node;
747 
748 	return;
749 
750 } /* insert_message */
751 
752 
753 /*
754  * This routine will find head of the linked list for the given
755  * domain_name. This looks up cache entry first and if cache misses,
756  * scans the list.
757  * If not found, then create a new node.
758  */
759 static struct domain_struct *
find_domain_node(char * domain_name)760 find_domain_node(char *domain_name)
761 {
762 	struct domain_struct	*p1;
763 	struct domain_struct	*node;
764 	struct domain_struct	*prev_node;
765 	int			b;
766 
767 
768 	/* for perfomance, check cache 'last_used_domain' */
769 	if (last_used_domain) {
770 		b = strcmp(domain_name, last_used_domain->domain);
771 		if (b == 0) {
772 			return (last_used_domain);
773 		} else if (b < 0) {
774 			p1 = first_domain;
775 		} else {
776 			p1 = last_used_domain;
777 		}
778 	} else {
779 		p1 = first_domain;
780 	}
781 
782 	prev_node = NULL;
783 	while (p1) {
784 		b = strcmp(domain_name, p1->domain);
785 		if (b == 0) {
786 			/* node found */
787 			last_used_domain = p1;
788 			return (p1);
789 		} else if (b > 0) {
790 			/* move to the next node */
791 			prev_node = p1;
792 			p1 = p1->next;
793 		} else {
794 			/* insert a new domain node */
795 			node = (struct domain_struct *)
796 			    Xmalloc(sizeof (struct domain_struct));
797 			node->next = p1;
798 			node->domain = Xstrdup(domain_name);
799 			node->first_elem = NULL;
800 			node->current_elem = NULL;
801 			if (prev_node) {
802 				/* insert the node in the middle */
803 				prev_node->next = node;
804 			} else {
805 				/* node inserted is the smallest */
806 				first_domain = node;
807 			}
808 			last_used_domain = node;
809 			return (node);
810 		}
811 	} /* while */
812 
813 	/*
814 	 * domain_name is larger than any of domain name in the list or
815 	 * list is empty.
816 	 */
817 	node = (struct domain_struct *)
818 	    Xmalloc(sizeof (struct domain_struct));
819 	node->next = NULL;
820 	node->domain = Xstrdup(domain_name);
821 	node->first_elem = NULL;
822 	node->current_elem = NULL;
823 	if (prev_node) {
824 		/* domain list is not empty */
825 		prev_node->next = node;
826 	} else {
827 		/* domain list is empty */
828 		first_domain = node;
829 	}
830 	last_used_domain = node;
831 
832 	return (node);
833 
834 } /* find_domain_node */
835 
836 
837 /*
838  * binary_compute() is used for pre-computing a binary search.
839  */
840 static int
binary_compute(int i,int j,int * more,int * less)841 binary_compute(int i, int j, int *more, int *less)
842 {
843 	int	k;
844 
845 	if (i > j) {
846 		return (LEAFINDICATOR);
847 	}
848 	k = (i + j) / 2;
849 
850 	less[k] = binary_compute(i, k - 1, more, less);
851 	more[k] = binary_compute(k + 1, j, more, less);
852 
853 	return (k);
854 
855 } /* binary_compute */
856 
857 
858 /*
859  * Write all domain data to file.
860  * Each domain will create one file.
861  */
862 static void
output_all_mo_files(void)863 output_all_mo_files(void)
864 {
865 	struct domain_struct	*p;
866 
867 	p = first_domain;
868 	while (p) {
869 		/*
870 		 * generate message object file only if there is
871 		 * at least one element.
872 		 */
873 		if (p->first_elem) {
874 			output_one_mo_file(p);
875 		}
876 		p = p->next;
877 	}
878 	return;
879 
880 } /* output_all_mo_files */
881 
882 
883 /*
884  * Write one domain data list to file.
885  */
886 static void
output_one_mo_file(struct domain_struct * dom)887 output_one_mo_file(struct domain_struct *dom)
888 {
889 	FILE	*fp;
890 	struct msg_chain	*p;
891 	int	message_count;
892 	int	string_count_msgid;
893 	int	string_count_msg;
894 	int	msgid_index = 0;
895 	int	msgstr_index = 0;
896 	int	*less, *more;
897 	int	i;
898 	char	fname [TEXTDOMAINMAX+1];
899 
900 	if (!dom || !dom->first_elem)
901 		return;
902 
903 	/*
904 	 * If -o flag is specified, then file name is used as domain name.
905 	 * If not, ".mo" is appended to the domain name.
906 	 */
907 	(void) strcpy(fname, dom->domain);
908 	if (!oflag) {
909 		(void) strcat(fname, ".mo");
910 	}
911 	fp = fopen(fname, "w");
912 	if (fp == NULL) {
913 		error(gettext(ERR_OPEN_FAILED), fname);
914 		/* NOTREACHED */
915 	}
916 
917 	/* compute offsets and counts */
918 	message_count = 0;
919 	p = dom->first_elem;
920 	while (p) {
921 		p->msgid_offset = msgid_index;
922 		p->msgstr_offset = msgstr_index;
923 		msgid_index += strlen(p->msgid) + 1;
924 		msgstr_index += strlen(p->msgstr) + 1;
925 		message_count++;
926 		p = p->next;
927 	}
928 
929 	/*
930 	 * Fill up less and more entries to be used for binary search.
931 	 */
932 	string_count_msgid = msgid_index;
933 	string_count_msg = msgstr_index;
934 	less = (int *)Xcalloc(message_count, sizeof (int));
935 	more = (int *)Xcalloc(message_count, sizeof (int));
936 
937 	(void) binary_compute(0, message_count - 1, more, less);
938 
939 #ifdef DEBUG
940 	{
941 		int i;
942 		for (i = 0; i < message_count; i++) {
943 			(void) fprintf(stderr,
944 			    "  less[%2d]=%2d, more[%2d]=%2d\n",
945 			    i, less[i], i, more[i]);
946 		}
947 	}
948 #endif
949 
950 	/*
951 	 * write out the message object file.
952 	 * The middle one is the first message to check by gettext().
953 	 */
954 	i = (message_count - 1) / 2;
955 	(void) fwrite(&i, sizeof (int), 1, fp);
956 	(void) fwrite(&message_count, sizeof (int), 1, fp);
957 	(void) fwrite(&string_count_msgid, sizeof (int), 1, fp);
958 	(void) fwrite(&string_count_msg, sizeof (int), 1, fp);
959 	i = MSG_STRUCT_SIZE * message_count;
960 	(void) fwrite(&i, sizeof (int), 1, fp);
961 
962 	/* march through linked list and write out all nodes. */
963 	i = 0;
964 	p = dom->first_elem;
965 	while (p) {	/* put out message struct */
966 		(void) fwrite(&less[i], sizeof (int), 1, fp);
967 		(void) fwrite(&more[i], sizeof (int), 1, fp);
968 		(void) fwrite(&p->msgid_offset, sizeof (int), 1, fp);
969 		(void) fwrite(&p->msgstr_offset, sizeof (int), 1, fp);
970 		i++;
971 		p = p->next;
972 	}
973 
974 	/* put out message id strings */
975 	p = dom->first_elem;
976 	while (p) {
977 		(void) fwrite(p->msgid, strlen(p->msgid)+1, 1, fp);
978 		p = p->next;
979 	}
980 
981 	/* put out message strings */
982 	p = dom->first_elem;
983 	while (p) {
984 		(void) fwrite(p->msgstr, strlen(p->msgstr)+1, 1, fp);
985 		p = p->next;
986 	}
987 
988 	(void) fclose(fp);
989 	free(less);
990 	free(more);
991 
992 	return;
993 
994 } /* output_one_mo_file */
995 
996 
997 /*
998  * read one line from *mbuf,
999  * skip preceding whitespaces,
1000  * convert the line to wide characters,
1001  * place the wide characters into *bufhead, and
1002  * return the number of wide characters placed.
1003  *
1004  * INPUT:
1005  *		**bufhead - address of a variable that is the pointer
1006  *			to wchar_t.
1007  *			The variable should been initialized to NULL.
1008  *		**mbuf - address of a variable that is the pointer
1009  *			to char.
1010  *			The pointer should point to the memory mmapped to
1011  *			the file to input.
1012  *		**fsize - address of a size_t variable that contains
1013  *			the size of unread bytes in the file to input.
1014  * OUTPUT:
1015  *		return - the number of wide characters placed.
1016  *		**bufhead - _mbsntowcs allocates the buffer to store
1017  *			one line in wchar_t from *mbuf and sets the address
1018  *			to *bufhead.
1019  *		**mbuf - _mbsntowcs reads one line from *mbuf and sets *mbuf
1020  *			to the beginning of the next line.
1021  *		**fsize - *fsize will be set to the size of the unread
1022  *			bytes in the file.
1023  */
1024 static size_t
_mbsntowcs(wchar_t ** bufhead,char ** mbuf,size_t * fsize)1025 _mbsntowcs(wchar_t **bufhead, char **mbuf, size_t *fsize)
1026 {
1027 	wchar_t	*tp, *th;
1028 	wchar_t	wc;
1029 	size_t	tbufsize = LINE_SIZE;
1030 	size_t	ttbufsize, nc;
1031 	char	*pc = *mbuf;
1032 	int	nb;
1033 
1034 	if (*fsize == 0) {
1035 		/* eof */
1036 		return (0);
1037 	}
1038 
1039 	th = (wchar_t *)Xmalloc(sizeof (wchar_t) * tbufsize);
1040 	nc = tbufsize;
1041 
1042 	/* skip preceding whitespaces */
1043 	while ((*pc != '\0')) {
1044 		if ((*pc == ' ') || (*pc == '\t')) {
1045 			pc++;
1046 			(*fsize)--;
1047 		} else {
1048 			break;
1049 		}
1050 	}
1051 
1052 	tp = th;
1053 	while (*fsize > 0) {
1054 		nb = mbtowc(&wc, pc, mbcurmax);
1055 		if (nb == -1) {
1056 			return ((size_t)-1);
1057 		}
1058 
1059 		if (*pc == '\n') {
1060 			/* found eol */
1061 			if (nc <= 1) {
1062 				/*
1063 				 * not enough buffer
1064 				 * at least 2 more bytes are required for
1065 				 * L'\n' and L'\0'
1066 				 */
1067 				ttbufsize = tbufsize + 2;
1068 				th = (wchar_t *)Xrealloc(th,
1069 				    sizeof (wchar_t) * ttbufsize);
1070 				tp = th + tbufsize - nc;
1071 				tbufsize = ttbufsize;
1072 			}
1073 			*tp++ = L'\n';
1074 			*tp++ = L'\0';
1075 			pc += nb;
1076 			*fsize -= nb;
1077 			*mbuf = pc;
1078 			*bufhead = th;
1079 			return ((size_t)(tp - th));
1080 		}
1081 		if (nc == 0) {
1082 			ttbufsize = tbufsize + LINE_SIZE;
1083 			th = (wchar_t *)Xrealloc(th,
1084 			    sizeof (wchar_t) * ttbufsize);
1085 			tp = th + tbufsize;
1086 			nc = LINE_SIZE;
1087 			tbufsize = ttbufsize;
1088 		}
1089 		*tp++ = wc;
1090 		nc--;
1091 		pc += nb;
1092 		*fsize -= nb;
1093 	}	/* while */
1094 
1095 	/*
1096 	 * At this point, the input file has been consumed,
1097 	 * but there is no ending '\n'; we add it to
1098 	 * the output file.
1099 	 */
1100 	if (nc <= 1) {
1101 		/*
1102 		 * not enough buffer
1103 		 * at least 2 more bytes are required for
1104 		 * L'\n' and L'\0'
1105 		 */
1106 		ttbufsize = tbufsize + 2;
1107 		th = (wchar_t *)Xrealloc(th,
1108 		    sizeof (wchar_t) * ttbufsize);
1109 		tp = th + tbufsize - nc;
1110 		tbufsize = ttbufsize;
1111 	}
1112 	*tp++ = L'\n';
1113 	*tp++ = L'\0';
1114 	*mbuf = pc;
1115 	*bufhead = th;
1116 	return ((size_t)(tp - th));
1117 }
1118 
1119 
1120 /*
1121  * This is debug function. Not compiled in the final executable.
1122  */
1123 #ifdef DEBUG
1124 static void
printlist(void)1125 printlist(void)
1126 {
1127 	struct domain_struct	*p;
1128 	struct msg_chain	*m;
1129 
1130 	(void) fprintf(stderr, "\n=== Printing contents of all domains ===\n");
1131 	p = first_domain;
1132 	while (p) {
1133 		(void) fprintf(stderr, "domain name = <%s>\n", p->domain);
1134 		m = p->first_elem;
1135 		while (m) {
1136 			(void) fprintf(stderr, "   msgid=<%s>, msgstr=<%s>\n",
1137 			    m->msgid, m->msgstr);
1138 			m = m->next;
1139 		}
1140 		p = p->next;
1141 	}
1142 } /* printlist */
1143 #endif
1144