xref: /illumos-gate/usr/src/cmd/msgfmt/msgfmt.c (revision 35a5a3587fd94b666239c157d3722745250ccbd7)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License, Version 1.0 only
6  * (the "License").  You may not use this file except in compliance
7  * with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or http://www.opensolaris.org/os/licensing.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 /*
23  * Copyright 2004 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #pragma ident	"%Z%%M%	%I%	%E% SMI"
28 
29 #include "sun_msgfmt.h"
30 
31 static void	read_psffm(char *);
32 static void	sortit(char *, char *);
33 static wchar_t	*consume_whitespace(wchar_t *);
34 static char	expand_meta(wchar_t **);
35 static struct domain_struct	*find_domain_node(char *);
36 static void	insert_message(struct domain_struct *, char *, char *);
37 static void	output_all_mo_files(void);
38 static void	output_one_mo_file(struct domain_struct *);
39 static size_t _mbsntowcs(wchar_t **, char **, size_t *);
40 
41 #ifdef DEBUG
42 static void	printlist(void);
43 #endif
44 
45 static char	gcurrent_domain[TEXTDOMAINMAX+1];
46 static char	*gmsgid;		/* Stores msgid when read po file */
47 static char	*gmsgstr;		/* Stores msgstr when read po file */
48 static int	gmsgid_size;		/* The current size of msgid buffer */
49 static int	gmsgstr_size;		/* The current size of msgstr buffer */
50 static char	*outfile = NULL;
51 static int	linenum;		/* The line number in the file */
52 static int	msgid_linenum;		/* The last msgid token line number */
53 static int	msgstr_linenum;		/* The last msgstr token line number */
54 
55 static int	oflag = 0;
56 static int	sun_p = 0;
57 int	verbose = 0;
58 
59 static struct domain_struct	*first_domain = NULL;
60 static struct domain_struct	*last_used_domain = NULL;
61 
62 static int	mbcurmax;
63 
64 static char	**oargv;
65 static char	*inputdir;
66 
67 extern void	check_gnu(char *, size_t);
68 
69 #define	GNU_MSGFMT	"/usr/lib/gmsgfmt"
70 void
71 invoke_gnu_msgfmt(void)
72 {
73 	/*
74 	 * Transferring to /usr/lib/gmsgfmt
75 	 */
76 	char	*gnu_msgfmt;
77 #ifdef	DEBUG_MSGFMT
78 	gnu_msgfmt = getenv("GNU_MSGFMT");
79 	if (!gnu_msgfmt)
80 		gnu_msgfmt = GNU_MSGFMT;
81 #else
82 	gnu_msgfmt = GNU_MSGFMT;
83 #endif
84 
85 	if (verbose) {
86 		diag(gettext(DIAG_INVOKING_GNU));
87 	}
88 
89 	(void) execv(gnu_msgfmt, oargv);
90 	/* exec failed */
91 	error(gettext(ERR_EXEC_FAILED), gnu_msgfmt);
92 	/* NOTREACHED */
93 }
94 
95 static void
96 usage(void)
97 {
98 	(void) fprintf(stderr, gettext(ERR_USAGE));
99 	exit(2);
100 }
101 
102 /*
103  * msgfmt - Generate binary tree for runtime gettext() using psffm: "Portable
104  * Source File Format for Messages" file template. This file may have
105  * previously been generated by the xgettext filter for c source files.
106  */
107 
108 int
109 main(int argc, char **argv)
110 {
111 	int	ret;
112 	static struct flags	flag;
113 
114 	(void) setlocale(LC_ALL, "");
115 #if	!defined(TEXT_DOMAIN)
116 #define	TEXT_DOMAIN	"SYS_TEST"
117 #endif
118 	(void) textdomain(TEXT_DOMAIN);
119 
120 	oargv = argv;
121 	ret = parse_option(&argc, &argv, &flag);
122 	if (ret == -1) {
123 		usage();
124 		/* NOTREACHED */
125 	}
126 
127 	if (flag.sun_p) {
128 		/* never invoke gnu msgfmt */
129 		if (flag.gnu_p) {
130 			error(gettext(ERR_GNU_ON_SUN));
131 			/* NOTREACHED */
132 		}
133 		sun_p = flag.sun_p;
134 	}
135 	if (flag.idir) {
136 		inputdir = flag.idir;
137 	}
138 	if (flag.ofile) {
139 		oflag = 1;
140 		outfile = flag.ofile;
141 	}
142 	if (flag.verbose) {
143 		verbose = 1;
144 	}
145 
146 	if (flag.gnu_p) {
147 		/* invoke /usr/lib/gmsgfmt */
148 		invoke_gnu_msgfmt();
149 		/* NOTREACHED */
150 	}
151 
152 	/*
153 	 * read all portable object files specified in command arguments.
154 	 * Allocate initial size for msgid and msgstr. If it needs more
155 	 * spaces, realloc later.
156 	 */
157 	gmsgid = (char *)Xmalloc(MAX_VALUE_LEN);
158 	gmsgstr = (char *)Xmalloc(MAX_VALUE_LEN);
159 
160 	gmsgid_size = gmsgstr_size = MAX_VALUE_LEN;
161 	(void) memset(gmsgid, 0, gmsgid_size);
162 	(void) memset(gmsgstr, 0, gmsgstr_size);
163 
164 	mbcurmax = MB_CUR_MAX;
165 
166 	while (argc-- > 0) {
167 		if (verbose) {
168 			diag(gettext(DIAG_START_PROC), *argv);
169 		}
170 		read_psffm(*argv++);
171 	}
172 
173 	output_all_mo_files();
174 
175 #ifdef DEBUG
176 	printlist();
177 #endif
178 
179 	return (0);
180 
181 } /* main */
182 
183 
184 
185 /*
186  * read_psffm - read in "psffm" format file, check syntax, printing error
187  * messages as needed, output binary tree to file <domain>
188  */
189 
190 static void
191 read_psffm(char *file)
192 {
193 	int	fd;
194 	static char	msgfile[MAXPATHLEN];
195 	wchar_t	*linebufptr, *p;
196 	char	*bufptr = 0;
197 	int	quotefound;	/* double quote was seen */
198 	int	inmsgid = 0;	/* indicates "msgid" was seen */
199 	int	inmsgstr = 0;	/* indicates "msgstr" was seen */
200 	int	indomain = 0;	/* indicates "domain" was seen */
201 	wchar_t	wc;
202 	char	mb;
203 	int	n;
204 	char	token_found;	/* Boolean value */
205 	unsigned int	bufptr_index = 0; /* current index of bufptr */
206 	char	*mbuf, *addr;
207 	size_t	fsize, ln_size, ll;
208 	wchar_t	*linebufhead = NULL;
209 	struct stat64	statbuf;
210 	char	*filename;
211 
212 	/*
213 	 * For each po file to be read,
214 	 * 1) set domain to default and
215 	 * 2) set linenumer to 0.
216 	 */
217 	(void) strcpy(gcurrent_domain, DEFAULT_DOMAIN);
218 	linenum = 0;
219 
220 	if (!inputdir) {
221 		filename = Xstrdup(file);
222 	} else {
223 		size_t	dirlen, filelen, len;
224 
225 		dirlen = strlen(inputdir);
226 		filelen = strlen(file);
227 		len = dirlen + 1 + filelen + 1;
228 		filename = (char *)Xmalloc(len);
229 		(void) memcpy(filename, inputdir, dirlen);
230 		*(filename + dirlen) = '/';
231 		(void) memcpy(filename + dirlen + 1, file, filelen);
232 		*(filename + dirlen + 1 + filelen) = '\0';
233 	}
234 
235 	fd = open(filename, O_RDONLY);
236 	if (fd == -1) {
237 		error(gettext(ERR_OPEN_FAILED), filename);
238 		/* NOTREACHED */
239 	}
240 	if (fstat64(fd, &statbuf) == -1) {
241 		error(gettext(ERR_STAT_FAILED), filename);
242 		/* NOTREACHED */
243 	}
244 	fsize = (size_t)statbuf.st_size;
245 	if (fsize == 0) {
246 		/*
247 		 * The size of the specified po file is 0.
248 		 * In Solaris 8 and earlier, msgfmt was silent
249 		 * for the null po file.  So, just returns
250 		 * without generating an error message.
251 		 */
252 		(void) close(fd);
253 		free(filename);
254 		return;
255 	}
256 	addr = mmap(NULL, fsize, PROT_READ, MAP_SHARED, fd, 0);
257 	if (addr == MAP_FAILED) {
258 		error(gettext(ERR_MMAP_FAILED), filename);
259 		/* NOTREACHED */
260 	}
261 	(void) close(fd);
262 
263 	if (!sun_p)
264 		check_gnu(addr, fsize);
265 
266 	mbuf = addr;
267 	for (;;) {
268 		if (linebufhead) {
269 			free(linebufhead);
270 			linebufhead = NULL;
271 		}
272 		ln_size = _mbsntowcs(&linebufhead, &mbuf, &fsize);
273 		if (ln_size == (size_t)-1) {
274 			error(gettext(ERR_READ_FAILED), filename);
275 			/* NOTREACHED */
276 		} else if (ln_size == 0) {
277 			break;	/* End of File. */
278 		}
279 		linenum++;
280 
281 		linebufptr = linebufhead;
282 		quotefound = 0;
283 
284 		switch (*linebufptr) {
285 			case L'#':	/* comment    */
286 			case L'\n':	/* empty line */
287 				continue;
288 			case L'\"': /* multiple lines of msgid and msgstr */
289 				quotefound = 1;
290 				break;
291 		}
292 
293 		/*
294 		 * Process MSGID Tokens.
295 		 */
296 		token_found = (wcsncmp(MSGID_TOKEN, linebufptr,
297 				MSGID_LEN) == 0) ? 1 : 0;
298 
299 		if (token_found || (quotefound && inmsgid)) {
300 
301 			if (token_found) {
302 				if (!CK_NXT_CH(linebufptr, MSGID_LEN+1)) {
303 					diag(gettext(ERR_NOSPC), linenum);
304 					error(gettext(ERR_EXITING));
305 					/* NOTREACHED */
306 				}
307 			}
308 
309 			if (inmsgid && !quotefound) {
310 				warning(gettext(WARN_NO_MSGSTR), msgid_linenum);
311 				continue;
312 			}
313 			if (inmsgstr) {
314 				sortit(gmsgid, gmsgstr);
315 				(void) memset(gmsgid, 0, gmsgid_size);
316 				(void) memset(gmsgstr, 0, gmsgstr_size);
317 			}
318 
319 			if (inmsgid) {
320 				/* multiple lines of msgid */
321 				/* cancel the previous null termination */
322 				bufptr_index--;
323 			} else {
324 				/*
325 				 * The first line of msgid.
326 				 * Save linenum of msgid to be used when
327 				 * printing warning or error message.
328 				 */
329 				msgid_linenum = linenum;
330 				p = linebufptr;
331 				linebufptr = consume_whitespace(
332 					linebufptr + MSGID_LEN);
333 				ln_size -= linebufptr - p;
334 				bufptr = gmsgid;
335 				bufptr_index = 0;
336 			}
337 
338 			inmsgid = 1;
339 			inmsgstr = 0;
340 			indomain = 0;
341 			goto load_buffer;
342 		}
343 
344 		/*
345 		 * Process MSGSTR Tokens.
346 		 */
347 		token_found = (wcsncmp(MSGSTR_TOKEN, linebufptr,
348 			MSGSTR_LEN) == 0) ? 1 : 0;
349 		if (token_found || (quotefound && inmsgstr)) {
350 
351 			if (token_found) {
352 				if (!CK_NXT_CH(linebufptr, MSGSTR_LEN+1)) {
353 					diag(gettext(ERR_NOSPC), linenum);
354 					error(gettext(ERR_EXITING));
355 					/* NOTREACHED */
356 				}
357 			}
358 
359 
360 			if (inmsgstr && !quotefound) {
361 				warning(gettext(WARN_NO_MSGID), msgstr_linenum);
362 				continue;
363 			}
364 			if (inmsgstr) {
365 				/* multiple lines of msgstr */
366 				/* cancel the previous null termination */
367 				bufptr_index--;
368 			} else {
369 				/*
370 				 * The first line of msgstr.
371 				 * Save linenum of msgid to be used when
372 				 * printing warning or error message.
373 				 */
374 				msgstr_linenum = linenum;
375 				p = linebufptr;
376 				linebufptr = consume_whitespace(
377 					linebufptr + MSGSTR_LEN);
378 				ln_size -= linebufptr - p;
379 				bufptr = gmsgstr;
380 				bufptr_index = 0;
381 			}
382 
383 			inmsgstr = 1;
384 			inmsgid = 0;
385 			indomain = 0;
386 			goto load_buffer;
387 		}
388 
389 		/*
390 		 * Process DOMAIN Tokens.
391 		 * Add message id and message string to sorted list
392 		 * if msgstr was processed last time.
393 		 */
394 		token_found = (wcsncmp(DOMAIN_TOKEN, linebufptr,
395 			DOMAIN_LEN) == 0) ? 1 : 0;
396 		if ((token_found) || (quotefound && indomain)) {
397 			if (token_found) {
398 				if (!CK_NXT_CH(linebufptr, DOMAIN_LEN+1)) {
399 					diag(gettext(ERR_NOSPC), linenum);
400 					error(gettext(ERR_EXITING));
401 					/* NOTREACHED */
402 				}
403 			}
404 
405 
406 			/*
407 			 * process msgid and msgstr pair for previous domain
408 			 */
409 			if (inmsgstr) {
410 				sortit(gmsgid, gmsgstr);
411 			}
412 
413 			/* refresh msgid and msgstr buffer */
414 			if (inmsgstr || inmsgid) {
415 				(void) memset(gmsgid, 0, gmsgid_size);
416 				(void) memset(gmsgstr, 0, gmsgstr_size);
417 			}
418 
419 			if (indomain) {
420 				/* multiple lines of domain */
421 				/* cancel the previous null termination */
422 				bufptr_index--;
423 			} else {
424 				p = linebufptr;
425 				linebufptr = consume_whitespace(
426 					linebufptr + DOMAIN_LEN);
427 				(void) memset(gcurrent_domain, 0,
428 						sizeof (gcurrent_domain));
429 				ln_size -= linebufptr - p;
430 				bufptr = gcurrent_domain;
431 				bufptr_index = 0;
432 			}
433 
434 			indomain = 1;
435 			inmsgid = 0;
436 			inmsgstr = 0;
437 		} /* if */
438 
439 load_buffer:
440 		/*
441 		 * Now, fill up the buffer pointed by bufptr.
442 		 * At this point bufptr should point to one of
443 		 * msgid, msgptr, or current_domain.
444 		 * Otherwise, the entire line is ignored.
445 		 */
446 
447 		if (!bufptr) {
448 			warning(gettext(WARN_SYNTAX_ERR), linenum);
449 			continue;
450 		}
451 
452 		if (*linebufptr++ != L'\"') {
453 			warning(gettext(WARN_MISSING_QUOTE), linenum);
454 			--linebufptr;
455 		}
456 		quotefound = 0;
457 
458 		/*
459 		 * If there is not enough space in the buffer,
460 		 * increase buffer by ln_size by realloc.
461 		 */
462 		ll = ln_size * mbcurmax;
463 		if (bufptr == gmsgid) {
464 			if (gmsgid_size < (bufptr_index + ll)) {
465 				gmsgid = (char *)Xrealloc(gmsgid,
466 					bufptr_index + ll);
467 				bufptr = gmsgid;
468 				gmsgid_size = bufptr_index + ll;
469 			}
470 		} else if (bufptr == gmsgstr) {
471 			if (gmsgstr_size < (bufptr_index + ll)) {
472 				gmsgstr = (char *)Xrealloc(gmsgstr,
473 					bufptr_index + ll);
474 				bufptr = gmsgstr;
475 				gmsgstr_size = bufptr_index + ll;
476 			}
477 		}
478 
479 		while (wc = *linebufptr++) {
480 			switch (wc) {
481 			case L'\n':
482 				if (!quotefound) {
483 warning(gettext(WARN_MISSING_QUOTE_AT_EOL), linenum);
484 				}
485 				break;
486 
487 			case L'\"':
488 				quotefound = 1;
489 				break;
490 
491 			case L'\\':
492 				if ((mb = expand_meta(&linebufptr)) != NULL)
493 					bufptr[bufptr_index++] = mb;
494 				break;
495 
496 			default:
497 				if ((n = wctomb(&bufptr[bufptr_index], wc)) > 0)
498 					bufptr_index += n;
499 			} /* switch */
500 			if (quotefound) {
501 				/*
502 				 * Check if any remaining characters
503 				 * after closing quote.
504 				 */
505 				linebufptr = consume_whitespace(linebufptr);
506 				if (*linebufptr != L'\n') {
507 					warning(gettext(WARN_INVALID_STRING),
508 						linenum);
509 				}
510 				break;
511 			}
512 		} /* while */
513 
514 		bufptr[bufptr_index++] = '\0';
515 
516 		(void) strcpy(msgfile, gcurrent_domain);
517 		(void) strcat(msgfile, ".mo");
518 	} /* for(;;) */
519 
520 	if (inmsgstr) {
521 		sortit(gmsgid, gmsgstr);
522 	}
523 
524 	if (linebufhead)
525 		free(linebufhead);
526 	if (munmap(addr, statbuf.st_size) == -1) {
527 		error(gettext(ERR_MUNMAP_FAILED), filename);
528 		/* NOTREACHED */
529 	}
530 
531 	free(filename);
532 	return;
533 
534 } /* read_psffm */
535 
536 
537 /*
538  * Skip leading white spaces and tabs.
539  */
540 static wchar_t *
541 consume_whitespace(wchar_t *buf)
542 {
543 	wchar_t	*bufptr = buf;
544 	wchar_t	c;
545 
546 	/*
547 	 * Skip leading white spaces.
548 	 */
549 	while ((c = *bufptr) != L'\0') {
550 		if (c == L' ' || c == L'\t') {
551 			bufptr++;
552 			continue;
553 		}
554 		break;
555 	}
556 	return (bufptr);
557 } /* consume_white_space */
558 
559 
560 /*
561  * handle escape sequences.
562  */
563 static char
564 expand_meta(wchar_t **buf)
565 {
566 	wchar_t	wc = **buf;
567 	char	n;
568 
569 	switch (wc) {
570 	case L'"':
571 		(*buf)++;
572 		return ('\"');
573 	case L'\\':
574 		(*buf)++;
575 		return ('\\');
576 	case L'b':
577 		(*buf)++;
578 		return ('\b');
579 	case L'f':
580 		(*buf)++;
581 		return ('\f');
582 	case L'n':
583 		(*buf)++;
584 		return ('\n');
585 	case L'r':
586 		(*buf)++;
587 		return ('\r');
588 	case L't':
589 		(*buf)++;
590 		return ('\t');
591 	case L'v':
592 		(*buf)++;
593 		return ('\v');
594 	case L'a':
595 		(*buf)++;
596 		return ('\a');
597 	case L'\'':
598 		(*buf)++;
599 		return ('\'');
600 	case L'?':
601 		(*buf)++;
602 		return ('\?');
603 	case L'0':
604 	case L'1':
605 	case L'2':
606 	case L'3':
607 	case L'4':
608 	case L'5':
609 	case L'6':
610 	case L'7':
611 		/*
612 		 * This case handles \ddd where ddd is octal number.
613 		 * There could be one, two, or three octal numbers.
614 		 */
615 		(*buf)++;
616 		n = (char)(wc - L'0');
617 		wc = **buf;
618 		if (wc >= L'0' && wc <= L'7') {
619 			(*buf)++;
620 			n = 8*n + (char)(wc - L'0');
621 			wc = **buf;
622 			if (wc >= L'0' && wc <= L'7') {
623 				(*buf)++;
624 				n = 8*n + (char)(wc - L'0');
625 			}
626 		}
627 		return (n);
628 	default:
629 		return (NULL);
630 	}
631 } /* expand_meta */
632 
633 /*
634  * Finds the head of the current domain linked list and
635  * call insert_message() to insert msgid and msgstr pair
636  * to the linked list.
637  */
638 static void
639 sortit(char *msgid, char *msgstr)
640 {
641 	struct domain_struct	*dom;
642 
643 #ifdef DEBUG
644 	(void) fprintf(stderr,
645 		"==> sortit(), domain=<%s> msgid=<%s> msgstr=<%s>\n",
646 		gcurrent_domain, msgid, msgstr);
647 #endif
648 
649 	/*
650 	 * If "-o filename" is specified, then all "domain" directive
651 	 * are ignored and, all messages will be stored in domain
652 	 * whose name is filename.
653 	 */
654 	if (oflag) {
655 		dom = find_domain_node(outfile);
656 	} else {
657 		dom = find_domain_node(gcurrent_domain);
658 	}
659 
660 	insert_message(dom, msgid, msgstr);
661 }
662 
663 /*
664  * This routine inserts message in the current domain message list.
665  * It is inserted in ascending order.
666  */
667 static void
668 insert_message(struct domain_struct *dom,
669 	char *msgid, char *msgstr)
670 {
671 	struct msg_chain	*p1;
672 	struct msg_chain	*node, *prev_node;
673 	int			b;
674 
675 	/*
676 	 * Find the optimal starting search position.
677 	 * The starting search position is either the first node
678 	 * or the current_elem of domain.
679 	 * The current_elem is the pointer to the node which
680 	 * is most recently accessed in domain.
681 	 */
682 	if (dom->current_elem != NULL) {
683 		b = strcmp(msgid, dom->current_elem->msgid);
684 		if (b == 0) {
685 			if (verbose)
686 				warning(gettext(WARN_DUP_MSG),
687 					msgid, msgid_linenum);
688 			return;
689 		} else if (b > 0) { /* to implement descending order */
690 			p1 = dom->first_elem;
691 		} else {
692 			p1 = dom->current_elem;
693 		}
694 	} else {
695 		p1 = dom->first_elem;
696 	}
697 
698 	/*
699 	 * search msgid insert position in the list
700 	 * Search starts from the node pointed by p1.
701 	 */
702 	prev_node = NULL;
703 	while (p1) {
704 		b = strcmp(msgid, p1->msgid);
705 		if (b == 0) {
706 			if (verbose)
707 				warning(gettext(WARN_DUP_MSG),
708 					msgid, msgid_linenum);
709 			return;
710 		} else if (b < 0) {  /* to implement descending order */
711 			/* move to the next node */
712 			prev_node = p1;
713 			p1 = p1->next;
714 		} else {
715 			/* insert a new msg node */
716 			node = (struct msg_chain *)
717 				Xmalloc(sizeof (struct msg_chain));
718 			node->next = p1;
719 			node->msgid  = Xstrdup(msgid);
720 			node->msgstr = Xstrdup(msgstr);
721 
722 			if (prev_node) {
723 				prev_node->next = node;
724 			} else {
725 				dom->first_elem = node;
726 			}
727 			dom->current_elem = node;
728 			return;
729 		}
730 	} /* while */
731 
732 	/*
733 	 * msgid is smaller than any of msgid in the list or
734 	 * list is empty.
735 	 * Therefore, append it.
736 	 */
737 	node = (struct msg_chain *)
738 		Xmalloc(sizeof (struct msg_chain));
739 	node->next = NULL;
740 	node->msgid  = Xstrdup(msgid);
741 	node->msgstr = Xstrdup(msgstr);
742 
743 	if (prev_node) {
744 		prev_node->next = node;
745 	} else {
746 		dom->first_elem = node;
747 	}
748 	dom->current_elem = node;
749 
750 	return;
751 
752 } /* insert_message */
753 
754 
755 /*
756  * This routine will find head of the linked list for the given
757  * domain_name. This looks up cache entry first and if cache misses,
758  * scans the list.
759  * If not found, then create a new node.
760  */
761 static struct domain_struct *
762 find_domain_node(char *domain_name)
763 {
764 	struct domain_struct	*p1;
765 	struct domain_struct	*node;
766 	struct domain_struct	*prev_node;
767 	int			b;
768 
769 
770 	/* for perfomance, check cache 'last_used_domain' */
771 	if (last_used_domain) {
772 		b = strcmp(domain_name, last_used_domain->domain);
773 		if (b == 0) {
774 			return (last_used_domain);
775 		} else if (b < 0) {
776 			p1 = first_domain;
777 		} else {
778 			p1 = last_used_domain;
779 		}
780 	} else {
781 		p1 = first_domain;
782 	}
783 
784 	prev_node = NULL;
785 	while (p1) {
786 		b = strcmp(domain_name, p1->domain);
787 		if (b == 0) {
788 			/* node found */
789 			last_used_domain = p1;
790 			return (p1);
791 		} else if (b > 0) {
792 			/* move to the next node */
793 			prev_node = p1;
794 			p1 = p1->next;
795 		} else {
796 			/* insert a new domain node */
797 			node = (struct domain_struct *)
798 				Xmalloc(sizeof (struct domain_struct));
799 			node->next = p1;
800 			node->domain = Xstrdup(domain_name);
801 			node->first_elem = NULL;
802 			node->current_elem = NULL;
803 			if (prev_node) {
804 				/* insert the node in the middle */
805 				prev_node->next = node;
806 			} else {
807 				/* node inserted is the smallest */
808 				first_domain = node;
809 			}
810 			last_used_domain = node;
811 			return (node);
812 		}
813 	} /* while */
814 
815 	/*
816 	 * domain_name is larger than any of domain name in the list or
817 	 * list is empty.
818 	 */
819 	node = (struct domain_struct *)
820 		Xmalloc(sizeof (struct domain_struct));
821 	node->next = NULL;
822 	node->domain = Xstrdup(domain_name);
823 	node->first_elem = NULL;
824 	node->current_elem = NULL;
825 	if (prev_node) {
826 		/* domain list is not empty */
827 		prev_node->next = node;
828 	} else {
829 		/* domain list is empty */
830 		first_domain = node;
831 	}
832 	last_used_domain = node;
833 
834 	return (node);
835 
836 } /* find_domain_node */
837 
838 
839 /*
840  * binary_compute() is used for pre-computing a binary search.
841  */
842 static int
843 binary_compute(int i, int j, int *more, int *less)
844 {
845 	int	k;
846 
847 	if (i > j) {
848 		return (LEAFINDICATOR);
849 	}
850 	k = (i + j) / 2;
851 
852 	less[k] = binary_compute(i, k - 1, more, less);
853 	more[k] = binary_compute(k + 1, j, more, less);
854 
855 	return (k);
856 
857 } /* binary_compute */
858 
859 
860 /*
861  * Write all domain data to file.
862  * Each domain will create one file.
863  */
864 static void
865 output_all_mo_files(void)
866 {
867 	struct domain_struct 	*p;
868 
869 	p = first_domain;
870 	while (p) {
871 		/*
872 		 * generate message object file only if there is
873 		 * at least one element.
874 		 */
875 		if (p->first_elem) {
876 			output_one_mo_file(p);
877 		}
878 		p = p->next;
879 	}
880 	return;
881 
882 } /* output_all_mo_files */
883 
884 
885 /*
886  * Write one domain data list to file.
887  */
888 static void
889 output_one_mo_file(struct domain_struct *dom)
890 {
891 	FILE	*fp;
892 	struct msg_chain	*p;
893 	int	message_count;
894 	int	string_count_msgid;
895 	int	string_count_msg;
896 	int	msgid_index = 0;
897 	int	msgstr_index = 0;
898 	int	*less, *more;
899 	int	i;
900 	char	fname [TEXTDOMAINMAX+1];
901 
902 	if (!dom || !dom->first_elem)
903 		return;
904 
905 	/*
906 	 * If -o flag is specified, then file name is used as domain name.
907 	 * If not, ".mo" is appended to the domain name.
908 	 */
909 	(void) strcpy(fname, dom->domain);
910 	if (!oflag) {
911 		(void) strcat(fname, ".mo");
912 	}
913 	fp = fopen(fname, "w");
914 	if (fp == NULL) {
915 		error(gettext(ERR_OPEN_FAILED), fname);
916 		/* NOTREACHED */
917 	}
918 
919 	/* compute offsets and counts */
920 	message_count = 0;
921 	p = dom->first_elem;
922 	while (p) {
923 		p->msgid_offset = msgid_index;
924 		p->msgstr_offset = msgstr_index;
925 		msgid_index += strlen(p->msgid) + 1;
926 		msgstr_index += strlen(p->msgstr) + 1;
927 		message_count++;
928 		p = p->next;
929 	}
930 
931 	/*
932 	 * Fill up less and more entries to be used for binary search.
933 	 */
934 	string_count_msgid = msgid_index;
935 	string_count_msg = msgstr_index;
936 	less = (int *)Xcalloc(message_count, sizeof (int));
937 	more = (int *)Xcalloc(message_count, sizeof (int));
938 
939 	(void) binary_compute(0, message_count - 1, more, less);
940 
941 #ifdef DEBUG
942 	{
943 		int i;
944 		for (i = 0; i < message_count; i++) {
945 			(void) fprintf(stderr,
946 				"  less[%2d]=%2d, more[%2d]=%2d\n",
947 				i, less[i], i, more[i]);
948 		}
949 	}
950 #endif
951 
952 	/*
953 	 * write out the message object file.
954 	 * The middle one is the first message to check by gettext().
955 	 */
956 	i = (message_count - 1) / 2;
957 	(void) fwrite(&i, sizeof (int), 1, fp);
958 	(void) fwrite(&message_count, sizeof (int), 1, fp);
959 	(void) fwrite(&string_count_msgid, sizeof (int), 1, fp);
960 	(void) fwrite(&string_count_msg, sizeof (int), 1, fp);
961 	i = MSG_STRUCT_SIZE * message_count;
962 	(void) fwrite(&i, sizeof (int), 1, fp);
963 
964 	/* march through linked list and write out all nodes. */
965 	i = 0;
966 	p = dom->first_elem;
967 	while (p) {	/* put out message struct */
968 		(void) fwrite(&less[i], sizeof (int), 1, fp);
969 		(void) fwrite(&more[i], sizeof (int), 1, fp);
970 		(void) fwrite(&p->msgid_offset, sizeof (int), 1, fp);
971 		(void) fwrite(&p->msgstr_offset, sizeof (int), 1, fp);
972 		i++;
973 		p = p->next;
974 	}
975 
976 	/* put out message id strings */
977 	p = dom->first_elem;
978 	while (p) {
979 		(void) fwrite(p->msgid, strlen(p->msgid)+1, 1, fp);
980 		p = p->next;
981 	}
982 
983 	/* put out message strings */
984 	p = dom->first_elem;
985 	while (p) {
986 		(void) fwrite(p->msgstr, strlen(p->msgstr)+1, 1, fp);
987 		p = p->next;
988 	}
989 
990 	(void) fclose(fp);
991 	free(less);
992 	free(more);
993 
994 	return;
995 
996 } /* output_one_mo_file */
997 
998 
999 /*
1000  * read one line from *mbuf,
1001  * skip preceding whitespaces,
1002  * convert the line to wide characters,
1003  * place the wide characters into *bufhead, and
1004  * return the number of wide characters placed.
1005  *
1006  * INPUT:
1007  *		**bufhead - address of a variable that is the pointer
1008  *			to wchar_t.
1009  *			The variable should been initialized to NULL.
1010  *		**mbuf - address of a variable that is the pointer
1011  *			to char.
1012  *			The pointer should point to the memory mmapped to
1013  *			the file to input.
1014  *		**fsize - address of a size_t variable that contains
1015  *			the size of unread bytes in the file to input.
1016  * OUTPUT:
1017  *		return - the number of wide characters placed.
1018  *		**bufhead - _mbsntowcs allocates the buffer to store
1019  *			one line in wchar_t from *mbuf and sets the address
1020  *			to *bufhead.
1021  *		**mbuf - _mbsntowcs reads one line from *mbuf and sets *mbuf
1022  *			to the beginning of the next line.
1023  *		**fsize - *fsize will be set to the size of the unread
1024  *			bytes in the file.
1025  */
1026 static size_t
1027 _mbsntowcs(wchar_t **bufhead, char **mbuf, size_t *fsize)
1028 {
1029 	wchar_t	*tp, *th;
1030 	wchar_t	wc;
1031 	size_t	tbufsize = LINE_SIZE;
1032 	size_t	ttbufsize, nc;
1033 	char	*pc = *mbuf;
1034 	int	nb;
1035 
1036 	if (*fsize == 0) {
1037 		/* eof */
1038 		return (0);
1039 	}
1040 
1041 	th = (wchar_t *)Xmalloc(sizeof (wchar_t) * tbufsize);
1042 	nc = tbufsize;
1043 
1044 	/* skip preceding whitespaces */
1045 	while ((*pc != '\0')) {
1046 		if ((*pc == ' ') || (*pc == '\t')) {
1047 			pc++;
1048 			(*fsize)--;
1049 		} else {
1050 			break;
1051 		}
1052 	}
1053 
1054 	tp = th;
1055 	while (*fsize > 0) {
1056 		nb = mbtowc(&wc, pc, mbcurmax);
1057 		if (nb == -1) {
1058 			return ((size_t)-1);
1059 		}
1060 
1061 		if (*pc == '\n') {
1062 			/* found eol */
1063 			if (nc <= 1) {
1064 				/*
1065 				 * not enough buffer
1066 				 * at least 2 more bytes are required for
1067 				 * L'\n' and L'\0'
1068 				 */
1069 				ttbufsize = tbufsize + 2;
1070 				th = (wchar_t *)Xrealloc(th,
1071 					sizeof (wchar_t) * ttbufsize);
1072 				tp = th + tbufsize - nc;
1073 				tbufsize = ttbufsize;
1074 			}
1075 			*tp++ = L'\n';
1076 			*tp++ = L'\0';
1077 			pc += nb;
1078 			*fsize -= nb;
1079 			*mbuf = pc;
1080 			*bufhead = th;
1081 			return ((size_t)(tp - th));
1082 		}
1083 		if (nc == 0) {
1084 			ttbufsize = tbufsize + LINE_SIZE;
1085 			th = (wchar_t *)Xrealloc(th,
1086 				sizeof (wchar_t) * ttbufsize);
1087 			tp = th + tbufsize;
1088 			nc = LINE_SIZE;
1089 			tbufsize = ttbufsize;
1090 		}
1091 		*tp++ = wc;
1092 		nc--;
1093 		pc += nb;
1094 		*fsize -= nb;
1095 	}	/* while */
1096 
1097 	/*
1098 	 * At this point, the input file has been consumed,
1099 	 * but there is no ending '\n'; we add it to
1100 	 * the output file.
1101 	 */
1102 	if (nc <= 1) {
1103 		/*
1104 		 * not enough buffer
1105 		 * at least 2 more bytes are required for
1106 		 * L'\n' and L'\0'
1107 		 */
1108 		ttbufsize = tbufsize + 2;
1109 		th = (wchar_t *)Xrealloc(th,
1110 			sizeof (wchar_t) * ttbufsize);
1111 		tp = th + tbufsize - nc;
1112 		tbufsize = ttbufsize;
1113 	}
1114 	*tp++ = L'\n';
1115 	*tp++ = L'\0';
1116 	*mbuf = pc;
1117 	*bufhead = th;
1118 	return ((size_t)(tp - th));
1119 }
1120 
1121 
1122 /*
1123  * This is debug function. Not compiled in the final executable.
1124  */
1125 #ifdef DEBUG
1126 static void
1127 printlist(void)
1128 {
1129 	struct domain_struct	*p;
1130 	struct msg_chain	*m;
1131 
1132 	(void) fprintf(stderr, "\n=== Printing contents of all domains ===\n");
1133 	p = first_domain;
1134 	while (p) {
1135 		(void) fprintf(stderr, "domain name = <%s>\n", p->domain);
1136 		m = p->first_elem;
1137 		while (m) {
1138 			(void) fprintf(stderr, "   msgid=<%s>, msgstr=<%s>\n",
1139 					m->msgid, m->msgstr);
1140 			m = m->next;
1141 		}
1142 		p = p->next;
1143 	}
1144 } /* printlist */
1145 #endif
1146