1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License, Version 1.0 only
6 * (the "License"). You may not use this file except in compliance
7 * with the License.
8 *
9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10 * or http://www.opensolaris.org/os/licensing.
11 * See the License for the specific language governing permissions
12 * and limitations under the License.
13 *
14 * When distributing Covered Code, include this CDDL HEADER in each
15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16 * If applicable, add the following below this CDDL HEADER, with the
17 * fields enclosed by brackets "[]" replaced with your own identifying
18 * information: Portions Copyright [yyyy] [name of copyright owner]
19 *
20 * CDDL HEADER END
21 */
22 /*
23 * Copyright 1991, 1999, 2001-2002 Sun Microsystems, Inc.
24 * All rights reserved.
25 * Use is subject to license terms.
26 */
27
28 #include <ctype.h>
29 #include <stdio.h>
30 #include <stdlib.h>
31 #include <string.h>
32
33 #define TRUE 1
34 #define FALSE 0
35 #define MAX_PATH_LEN 1024
36 #define MAX_DOMAIN_LEN 1024
37 #define MAX_STRING_LEN 2048
38
39 #define USAGE "Usage: xgettext [-a [-x exclude-file]] [-jns]\
40 [-c comment-tag]\n [-d default-domain] [-m prefix] \
41 [-M suffix] [-p pathname] files ...\n\
42 xgettext -h\n"
43
44 #define DEFAULT_DOMAIN "messages"
45
46 extern char yytext[];
47 extern int yylex(void);
48
49 /*
50 * Contains a list of strings to be used to store ANSI-C style string.
51 * Each quoted string is stored in one node.
52 */
53 struct strlist_st {
54 char *str;
55 struct strlist_st *next;
56 };
57
58 /*
59 * istextdomain : Boolean telling if this node contains textdomain call.
60 * isduplicate : Boolean telling if this node duplicate of any other msgid.
61 * msgid : contains msgid or textdomain if istextdomain is true.
62 * msgstr : contains msgstr.
63 * comment : comment extracted in case of -c option.
64 * fname : tells which file contains msgid.
65 * linenum : line number in the file.
66 * next : Next node.
67 */
68 struct element_st {
69 char istextdomain;
70 char isduplicate;
71 struct strlist_st *msgid;
72 struct strlist_st *msgstr;
73 struct strlist_st *comment;
74 char *fname;
75 int linenum;
76 struct element_st *next;
77 };
78
79 /*
80 * dname : domain name. NULL if default domain.
81 * gettext_head : Head of linked list containing [d]gettext().
82 * gettext_tail : Tail of linked list containing [d]gettext().
83 * textdomain_head : Head of linked list containing textdomain().
84 * textdomain_tail : Tail of linked list containing textdomain().
85 * next : Next node.
86 *
87 * Each domain contains two linked list.
88 * (gettext_head, textdomain_head)
89 * If -s option is used, then textdomain_head contains all
90 * textdomain() calls and no textdomain() calls are stored in gettext_head.
91 * If -s option is not used, textdomain_head is empty list and
92 * gettext_head contains all gettext() dgettext(), and textdomain() calls.
93 */
94 struct domain_st {
95 char *dname;
96 struct element_st *gettext_head;
97 struct element_st *gettext_tail;
98 struct element_st *textdomain_head;
99 struct element_st *textdomain_tail;
100 struct domain_st *next;
101 };
102
103 /*
104 * There are two domain linked lists.
105 * def_dom contains default domain linked list and
106 * dom_head contains all other deomain linked lists to be created by
107 * dgettext() calls.
108 */
109 static struct domain_st *def_dom = NULL;
110 static struct domain_st *dom_head = NULL;
111 static struct domain_st *dom_tail = NULL;
112
113 /*
114 * This linked list contains a list of strings to be excluded when
115 * -x option is used.
116 */
117 static struct exclude_st {
118 struct strlist_st *exstr;
119 struct exclude_st *next;
120 } *excl_head;
121
122 /*
123 * All option flags and values for each option if any.
124 */
125 static int aflg = FALSE;
126 static int cflg = FALSE;
127 static char *comment_tag = NULL;
128 static char *default_domain = NULL;
129 static int hflg = FALSE;
130 static int jflg = FALSE;
131 static int mflg = FALSE;
132 static int Mflg = FALSE;
133 static char *suffix = NULL;
134 static char *prefix = NULL;
135 static int nflg = FALSE;
136 static int pflg = FALSE;
137 static char *pathname = NULL;
138 static int sflg = FALSE;
139 static int tflg = FALSE; /* Undocumented option to extract dcgettext */
140 static int xflg = FALSE;
141 static char *exclude_file = NULL;
142
143 /*
144 * Each variable shows the current state of parsing input file.
145 *
146 * in_comment : Means inside comment block (C or C++).
147 * in_cplus_comment : Means inside C++ comment block.
148 * in_gettext : Means inside gettext call.
149 * in_dgettext : Means inside dgettext call.
150 * in_dcgettext : Means inside dcgettext call.
151 * in_textdomain : Means inside textdomain call.
152 * in_str : Means currently processing ANSI style string.
153 * in_quote : Means currently processing double quoted string.
154 * in_skippable_string : Means currently processing double quoted string,
155 * that occurs outside a call to gettext, dgettext,
156 * dcgettext, textdomain, with -a not specified.
157 * is_last_comment_line : Means the current line is the last line
158 * of the comment block. This is necessary because
159 * in_comment becomes FALSE when '* /' is encountered.
160 * is_first_comma_found : This is used only for dcgettext because dcgettext()
161 * requires 2 commas. So need to do different action
162 * depending on which commas encountered.
163 * num_nested_open_paren : This keeps track of the number of open parens to
164 * handle dcgettext ((const char *)0,"msg",LC_TIME);
165 */
166 static int in_comment = FALSE;
167 static int in_cplus_comment = FALSE;
168 static int in_gettext = FALSE;
169 static int in_dgettext = FALSE;
170 static int in_dcgettext = FALSE;
171 static int in_textdomain = FALSE;
172 static int in_str = FALSE;
173 static int in_quote = FALSE;
174 static int is_last_comment_line = FALSE;
175 static int is_first_comma_found = FALSE;
176 static int in_skippable_string = FALSE;
177 static int num_nested_open_paren = 0;
178
179 /*
180 * This variable contains the first line of gettext(), dgettext(), or
181 * textdomain() calls.
182 * This is necessary for multiple lines of a single call to store
183 * the starting line.
184 */
185 static int linenum_saved = 0;
186
187 int stdin_only = FALSE; /* Read input from stdin */
188
189 /*
190 * curr_file : Contains current file name processed.
191 * curr_domain : Contains the current domain for each dgettext().
192 * This is NULL for gettext().
193 * curr_line : Contains the current line processed.
194 * qstring_buf : Contains the double quoted string processed.
195 * curr_linenum : Line number being processed in the current input file.
196 * warn_linenum : Line number of current warning message.
197 */
198 char curr_file[MAX_PATH_LEN];
199 static char curr_domain[MAX_DOMAIN_LEN];
200 static char curr_line[MAX_STRING_LEN];
201 static char qstring_buf[MAX_STRING_LEN];
202 int curr_linenum = 1;
203 int warn_linenum = 0;
204
205 /*
206 * strhead : This list contains ANSI style string.
207 * Each node contains double quoted string.
208 * strtail : This is the tail of strhead.
209 * commhead : This list contains comments string.
210 * Each node contains one line of comment.
211 * commtail : This is the tail of commhead.
212 */
213 static struct strlist_st *strhead = NULL;
214 static struct strlist_st *strtail = NULL;
215 static struct strlist_st *commhead = NULL;
216 static struct strlist_st *commtail = NULL;
217
218 /*
219 * gargc : Same as argc. Used to pass argc to lex routine.
220 * gargv : Same as argv. Used to pass argc to lex routine.
221 */
222 int gargc;
223 char **gargv;
224
225 static void add_line_to_comment(void);
226 static void add_qstring_to_str(void);
227 static void add_str_to_element_list(int, char *);
228 static void copy_strlist_to_str(char *, struct strlist_st *);
229 static void end_ansi_string(void);
230 static void free_strlist(struct strlist_st *);
231 void handle_newline(void);
232 static void initialize_globals(void);
233 static void output_comment(FILE *, struct strlist_st *);
234 static void output_msgid(FILE *, struct strlist_st *, int);
235 static void output_textdomain(FILE *, struct element_st *);
236 static void print_help(void);
237 static void read_exclude_file(void);
238 static void trim_line(char *);
239 static void write_all_files(void);
240 static void write_one_file(struct domain_st *);
241
242 static void lstrcat(char *, const char *);
243
244 /*
245 * Utility functions to malloc a node and initialize fields.
246 */
247 static struct domain_st *new_domain(void);
248 static struct strlist_st *new_strlist(void);
249 static struct element_st *new_element(void);
250 static struct exclude_st *new_exclude(void);
251
252 /*
253 * Main program of xgettext.
254 */
255 int
main(int argc,char ** argv)256 main(int argc, char **argv)
257 {
258 int opterr = FALSE;
259 int c;
260
261 initialize_globals();
262
263 while ((c = getopt(argc, argv, "jhax:nsc:d:m:M:p:t")) != EOF) {
264 switch (c) {
265 case 'a':
266 aflg = TRUE;
267 break;
268 case 'c':
269 cflg = TRUE;
270 comment_tag = optarg;
271 break;
272 case 'd':
273 default_domain = optarg;
274 break;
275 case 'h':
276 hflg = TRUE;
277 break;
278 case 'j':
279 jflg = TRUE;
280 break;
281 case 'M':
282 Mflg = TRUE;
283 suffix = optarg;
284 break;
285 case 'm':
286 mflg = TRUE;
287 prefix = optarg;
288 break;
289 case 'n':
290 nflg = TRUE;
291 break;
292 case 'p':
293 pflg = TRUE;
294 pathname = optarg;
295 break;
296 case 's':
297 sflg = TRUE;
298 break;
299 case 't':
300 tflg = TRUE;
301 break;
302 case 'x':
303 xflg = TRUE;
304 exclude_file = optarg;
305 break;
306 case '?':
307 opterr = TRUE;
308 break;
309 }
310 }
311
312 /* if -h is used, ignore all other options. */
313 if (hflg == TRUE) {
314 (void) fprintf(stderr, USAGE);
315 print_help();
316 exit(0);
317 }
318
319 /* -x can be used only with -a */
320 if ((xflg == TRUE) && (aflg == FALSE))
321 opterr = TRUE;
322
323 /* -j cannot be used with -a */
324 if ((jflg == TRUE) && (aflg == TRUE)) {
325 (void) fprintf(stderr,
326 "-a and -j options cannot be used together.\n");
327 opterr = TRUE;
328 }
329
330 /* -j cannot be used with -s */
331 if ((jflg == TRUE) && (sflg == TRUE)) {
332 (void) fprintf(stderr,
333 "-j and -s options cannot be used together.\n");
334 opterr = TRUE;
335 }
336
337 if (opterr == TRUE) {
338 (void) fprintf(stderr, USAGE);
339 exit(2);
340 }
341
342 /* error, if no files are specified. */
343 if (optind == argc) {
344 (void) fprintf(stderr, USAGE);
345 exit(2);
346 }
347
348 if (xflg == TRUE) {
349 read_exclude_file();
350 }
351
352 /* If files are -, then read from stdin */
353 if (argv[optind][0] == '-') {
354 stdin_only = TRUE;
355 optind++;
356 } else {
357 stdin_only = FALSE;
358 }
359
360 /* Store argc and argv to pass to yylex() */
361 gargc = argc;
362 gargv = argv;
363
364 #ifdef DEBUG
365 (void) printf("optind=%d\n", optind);
366 {
367 int i = optind;
368 for (; i < argc; i++) {
369 (void) printf(" %d, <%s>\n", i, argv[i]);
370 }
371 }
372 #endif
373
374 if (stdin_only == FALSE) {
375 if (freopen(argv[optind], "r", stdin) == NULL) {
376 (void) fprintf(stderr,
377 "ERROR, can't open input file: %s\n", argv[optind]);
378 exit(2);
379 }
380 (void) strcpy(curr_file, gargv[optind]);
381 optind++;
382 }
383
384 /*
385 * Process input.
386 */
387 (void) yylex();
388
389 #ifdef DEBUG
390 printf("\n======= default_domain ========\n");
391 print_one_domain(def_dom);
392 printf("======= domain list ========\n");
393 print_all_domain(dom_head);
394 #endif
395
396 /*
397 * Write out all .po files.
398 */
399 write_all_files();
400
401 return (0);
402 } /* main */
403
404 /*
405 * Prints help information for each option.
406 */
407 static void
print_help(void)408 print_help(void)
409 {
410 (void) fprintf(stderr, "\n");
411 (void) fprintf(stderr, "-a\t\t\tfind ALL strings\n");
412 (void) fprintf(stderr,
413 "-c <comment-tag>\tget comments containing <flag>\n");
414 (void) fprintf(stderr,
415 "-d <default-domain>\tuse <default-domain> for default domain\n");
416 (void) fprintf(stderr, "-h\t\t\tHelp\n");
417 (void) fprintf(stderr,
418 "-j\t\t\tupdate existing file with the current result\n");
419 (void) fprintf(stderr,
420 "-M <suffix>\t\tfill in msgstr with msgid<suffix>\n");
421 (void) fprintf(stderr,
422 "-m <prefix>\t\tfill in msgstr with <prefix>msgid\n");
423 (void) fprintf(stderr,
424 "-n\t\t\tline# file name and line number info in output\n");
425 (void) fprintf(stderr,
426 "-p <pathname>\t\tuse <pathname> for output file directory\n");
427 (void) fprintf(stderr,
428 "-s\t\t\tgenerate sorted output files\n");
429 (void) fprintf(stderr, "-x <exclude-file>\texclude strings in file "
430 "<exclude-file> from output\n");
431 (void) fprintf(stderr,
432 "-\t\t\tread stdin, use as a filter (input only)\n");
433 } /* print_help */
434
435 /*
436 * Extract file name and line number information from macro line
437 * and set the global variable accordingly.
438 * The valid line format is
439 * 1) # nnn
440 * or
441 * 2) # nnn "xxxxx"
442 * where nnn is line number and xxxxx is file name.
443 */
444 static void
extract_filename_linenumber(char * mline)445 extract_filename_linenumber(char *mline)
446 {
447 int num;
448 char *p, *q, *r;
449
450 /*
451 * mline can contain multi newline.
452 * line number should be increased by the number of newlines.
453 */
454 p = mline;
455 while ((p = strchr(p, '\n')) != NULL) {
456 p++;
457 curr_linenum++;
458 }
459 p = strchr(mline, ' ');
460 if (p == NULL)
461 return;
462 q = strchr(++p, ' ');
463 if (q == NULL) {
464 /* case 1 */
465 if ((num = atoi(p)) > 0) {
466 curr_linenum = num;
467 return;
468 }
469 } else {
470 /* case 2 */
471 *q++ = 0;
472 if (*q == '"') {
473 q++;
474 r = strchr(q, '"');
475 if (r == NULL) {
476 return;
477 }
478 *r = 0;
479 if ((num = atoi(p)) > 0) {
480 curr_linenum = num;
481 (void) strcpy(curr_file, q);
482 }
483 }
484 }
485 } /* extract_filename_linenumber */
486
487 /*
488 * Handler for MACRO line which starts with #.
489 */
490 void
handle_macro_line(void)491 handle_macro_line(void)
492 {
493 #ifdef DEBUG
494 (void) printf("Macro line=<%s>\n", yytext);
495 #endif
496 if (cflg == TRUE)
497 lstrcat(curr_line, yytext);
498
499 if (in_quote == TRUE) {
500 lstrcat(qstring_buf, yytext);
501 } else if (in_comment == FALSE) {
502 extract_filename_linenumber(yytext);
503 }
504
505 curr_linenum--;
506 handle_newline();
507 } /* handle_macro_line */
508
509 /*
510 * Handler for C++ comments which starts with //.
511 */
512 void
handle_cplus_comment_line(void)513 handle_cplus_comment_line(void)
514 {
515 if (cflg == TRUE)
516 lstrcat(curr_line, yytext);
517
518 if (in_quote == TRUE) {
519 lstrcat(qstring_buf, yytext);
520 } else if ((in_comment == FALSE) &&
521 (in_skippable_string == FALSE)) {
522
523 /*
524 * If already in c comments, don't do anything.
525 * Set both flags to TRUE here.
526 * Both flags will be set to FALSE when newline
527 * encounters.
528 */
529 in_cplus_comment = TRUE;
530 in_comment = TRUE;
531 }
532 } /* handle_cplus_comment_line */
533
534 /*
535 * Handler for the comment start (slash asterisk) in input file.
536 */
537 void
handle_open_comment(void)538 handle_open_comment(void)
539 {
540 if (cflg == TRUE)
541 lstrcat(curr_line, yytext);
542
543 if (in_quote == TRUE) {
544 lstrcat(qstring_buf, yytext);
545 } else if ((in_comment == FALSE) &&
546 (in_skippable_string == FALSE)) {
547
548 in_comment = TRUE;
549 is_last_comment_line = FALSE;
550 /*
551 * If there is any comment extracted before accidently,
552 * clean it up and start the new comment again.
553 */
554 free_strlist(commhead);
555 commhead = commtail = NULL;
556 }
557 }
558
559 /*
560 * Handler for the comment end (asterisk slash) in input file.
561 */
562 void
handle_close_comment(void)563 handle_close_comment(void)
564 {
565 if (cflg == TRUE)
566 lstrcat(curr_line, yytext);
567
568 if (in_quote == TRUE) {
569 lstrcat(qstring_buf, yytext);
570 } else if (in_skippable_string == FALSE) {
571 in_comment = FALSE;
572 is_last_comment_line = TRUE;
573 }
574 }
575
576 /*
577 * Handler for "gettext" in input file.
578 */
579 void
handle_gettext(void)580 handle_gettext(void)
581 {
582 /*
583 * If -t option is specified to extrct dcgettext,
584 * don't do anything for gettext().
585 */
586 if (tflg == TRUE) {
587 return;
588 }
589
590 num_nested_open_paren = 0;
591
592 if (cflg == TRUE)
593 lstrcat(curr_line, yytext);
594
595 if (in_quote == TRUE) {
596 lstrcat(qstring_buf, yytext);
597 } else if (in_comment == FALSE) {
598 in_gettext = TRUE;
599 linenum_saved = curr_linenum;
600 /*
601 * gettext will be put into default domain .po file
602 * curr_domain does not change for gettext.
603 */
604 curr_domain[0] = '\0';
605 }
606 } /* handle_gettext */
607
608 /*
609 * Handler for "dgettext" in input file.
610 */
611 void
handle_dgettext(void)612 handle_dgettext(void)
613 {
614 /*
615 * If -t option is specified to extrct dcgettext,
616 * don't do anything for dgettext().
617 */
618 if (tflg == TRUE) {
619 return;
620 }
621
622 num_nested_open_paren = 0;
623
624 if (cflg == TRUE)
625 lstrcat(curr_line, yytext);
626
627 if (in_quote == TRUE) {
628 lstrcat(qstring_buf, yytext);
629 } else if (in_comment == FALSE) {
630 in_dgettext = TRUE;
631 linenum_saved = curr_linenum;
632 /*
633 * dgettext will be put into domain file specified.
634 * curr_domain will follow.
635 */
636 curr_domain[0] = '\0';
637 }
638 } /* handle_dgettext */
639
640 /*
641 * Handler for "dcgettext" in input file.
642 */
643 void
handle_dcgettext(void)644 handle_dcgettext(void)
645 {
646 /*
647 * dcgettext will be extracted only when -t flag is specified.
648 */
649 if (tflg == FALSE) {
650 return;
651 }
652
653 num_nested_open_paren = 0;
654
655 is_first_comma_found = FALSE;
656
657 if (cflg == TRUE)
658 lstrcat(curr_line, yytext);
659
660 if (in_quote == TRUE) {
661 lstrcat(qstring_buf, yytext);
662 } else if (in_comment == FALSE) {
663 in_dcgettext = TRUE;
664 linenum_saved = curr_linenum;
665 /*
666 * dcgettext will be put into domain file specified.
667 * curr_domain will follow.
668 */
669 curr_domain[0] = '\0';
670 }
671 } /* handle_dcgettext */
672
673 /*
674 * Handler for "textdomain" in input file.
675 */
676 void
handle_textdomain(void)677 handle_textdomain(void)
678 {
679 if (cflg == TRUE)
680 lstrcat(curr_line, yytext);
681
682 if (in_quote == TRUE) {
683 lstrcat(qstring_buf, yytext);
684 } else if (in_comment == FALSE) {
685 in_textdomain = TRUE;
686 linenum_saved = curr_linenum;
687 curr_domain[0] = '\0';
688 }
689 } /* handle_textdomain */
690
691 /*
692 * Handler for '(' in input file.
693 */
694 void
handle_open_paren(void)695 handle_open_paren(void)
696 {
697 if (cflg == TRUE)
698 lstrcat(curr_line, yytext);
699
700 if (in_quote == TRUE) {
701 lstrcat(qstring_buf, yytext);
702 } else if (in_comment == FALSE) {
703 if ((in_gettext == TRUE) ||
704 (in_dgettext == TRUE) ||
705 (in_dcgettext == TRUE) ||
706 (in_textdomain == TRUE)) {
707 in_str = TRUE;
708 num_nested_open_paren++;
709 }
710 }
711 } /* handle_open_paren */
712
713 /*
714 * Handler for ')' in input file.
715 */
716 void
handle_close_paren(void)717 handle_close_paren(void)
718 {
719 if (cflg == TRUE)
720 lstrcat(curr_line, yytext);
721
722 if (in_quote == TRUE) {
723 lstrcat(qstring_buf, yytext);
724 } else if (in_comment == FALSE) {
725 if ((in_gettext == TRUE) ||
726 (in_dgettext == TRUE) ||
727 (in_dcgettext == TRUE) ||
728 (in_textdomain == TRUE)) {
729 /*
730 * If this is not the matching close paren with
731 * the first open paren, no action is necessary.
732 */
733 if (--num_nested_open_paren > 0)
734 return;
735 add_str_to_element_list(in_textdomain, curr_domain);
736 in_str = FALSE;
737 in_gettext = FALSE;
738 in_dgettext = FALSE;
739 in_dcgettext = FALSE;
740 in_textdomain = FALSE;
741 } else if (aflg == TRUE) {
742 end_ansi_string();
743 }
744 }
745 } /* handle_close_paren */
746
747 /*
748 * Handler for '\\n' in input file.
749 *
750 * This is a '\' followed by new line.
751 * This can be treated like a new line except when this is a continuation
752 * of a ANSI-C string.
753 * If this is a part of ANSI string, treat the current line as a double
754 * quoted string and the next line is the start of the double quoted
755 * string.
756 */
757 void
handle_esc_newline(void)758 handle_esc_newline(void)
759 {
760 if (cflg == TRUE)
761 lstrcat(curr_line, "\\");
762
763 curr_linenum++;
764
765 if (in_quote == TRUE) {
766 add_qstring_to_str();
767 } else if ((in_comment == TRUE) ||
768 (is_last_comment_line == TRUE)) {
769 if (in_cplus_comment == FALSE) {
770 add_line_to_comment();
771 }
772 }
773
774 curr_line[0] = '\0';
775 } /* handle_esc_newline */
776
777 /*
778 * Handler for '"' in input file.
779 */
780 void
handle_quote(void)781 handle_quote(void)
782 {
783 if (cflg == TRUE)
784 lstrcat(curr_line, yytext);
785
786 if (in_comment == TRUE) {
787 /*EMPTY*/
788 } else if ((in_gettext == TRUE) ||
789 (in_dgettext == TRUE) ||
790 (in_dcgettext == TRUE) ||
791 (in_textdomain == TRUE)) {
792 if (in_str == TRUE) {
793 if (in_quote == FALSE) {
794 in_quote = TRUE;
795 } else {
796 add_qstring_to_str();
797 in_quote = FALSE;
798 }
799 }
800 } else if (aflg == TRUE) {
801 /*
802 * The quote is found outside of gettext, dgetext, and
803 * textdomain. Everytime a quoted string is found,
804 * add it to the string list.
805 * in_str stays TRUE until ANSI string ends.
806 */
807 if (in_str == TRUE) {
808 if (in_quote == TRUE) {
809 in_quote = FALSE;
810 add_qstring_to_str();
811 } else {
812 in_quote = TRUE;
813 }
814 } else {
815 in_str = TRUE;
816 in_quote = TRUE;
817 linenum_saved = curr_linenum;
818 }
819 } else {
820 in_skippable_string = (in_skippable_string == TRUE) ?
821 FALSE : TRUE;
822 }
823 } /* handle_quote */
824
825 /*
826 * Handler for ' ' or TAB in input file.
827 */
828 void
handle_spaces(void)829 handle_spaces(void)
830 {
831 if (cflg == TRUE)
832 lstrcat(curr_line, yytext);
833
834 if (in_quote == TRUE) {
835 lstrcat(qstring_buf, yytext);
836 }
837 } /* handle_spaces */
838
839 /*
840 * Flattens a linked list containing ANSI string to the one string.
841 */
842 static void
copy_strlist_to_str(char * str,struct strlist_st * strlist)843 copy_strlist_to_str(char *str, struct strlist_st *strlist)
844 {
845 struct strlist_st *p;
846
847 str[0] = '\0';
848
849 if (strlist != NULL) {
850 p = strlist;
851 while (p != NULL) {
852 if (p->str != NULL) {
853 lstrcat(str, p->str);
854 }
855 p = p->next;
856 }
857 }
858 } /* copy_strlist_to_str */
859
860 /*
861 * Handler for ',' in input file.
862 */
863 void
handle_comma(void)864 handle_comma(void)
865 {
866 if (cflg == TRUE)
867 lstrcat(curr_line, yytext);
868
869 if (in_quote == TRUE) {
870 lstrcat(qstring_buf, yytext);
871 } else if (in_comment == FALSE) {
872 if (in_str == TRUE) {
873 if (in_dgettext == TRUE) {
874 copy_strlist_to_str(curr_domain, strhead);
875 free_strlist(strhead);
876 strhead = strtail = NULL;
877 } else if (in_dcgettext == TRUE) {
878 /*
879 * Ignore the second comma.
880 */
881 if (is_first_comma_found == FALSE) {
882 copy_strlist_to_str(curr_domain,
883 strhead);
884 free_strlist(strhead);
885 strhead = strtail = NULL;
886 is_first_comma_found = TRUE;
887 }
888 } else if (aflg == TRUE) {
889 end_ansi_string();
890 }
891 }
892 }
893 } /* handle_comma */
894
895 /*
896 * Handler for any other character that does not have special handler.
897 */
898 void
handle_character(void)899 handle_character(void)
900 {
901 if (cflg == TRUE)
902 lstrcat(curr_line, yytext);
903
904 if (in_quote == TRUE) {
905 lstrcat(qstring_buf, yytext);
906 } else if (in_comment == FALSE) {
907 if (in_str == TRUE) {
908 if (aflg == TRUE) {
909 end_ansi_string();
910 }
911 }
912 }
913 } /* handle_character */
914
915 /*
916 * Handler for new line in input file.
917 */
918 void
handle_newline(void)919 handle_newline(void)
920 {
921 curr_linenum++;
922
923 /*
924 * in_quote is always FALSE here for ANSI-C code.
925 */
926 if ((in_comment == TRUE) ||
927 (is_last_comment_line == TRUE)) {
928 if (in_cplus_comment == TRUE) {
929 in_cplus_comment = FALSE;
930 in_comment = FALSE;
931 } else {
932 add_line_to_comment();
933 }
934 }
935
936 curr_line[0] = '\0';
937 /*
938 * C++ comment always ends with new line.
939 */
940 } /* handle_newline */
941
942 /*
943 * Process ANSI string.
944 */
945 static void
end_ansi_string(void)946 end_ansi_string(void)
947 {
948 if ((aflg == TRUE) &&
949 (in_str == TRUE) &&
950 (in_gettext == FALSE) &&
951 (in_dgettext == FALSE) &&
952 (in_dcgettext == FALSE) &&
953 (in_textdomain == FALSE)) {
954 add_str_to_element_list(FALSE, curr_domain);
955 in_str = FALSE;
956 }
957 } /* end_ansi_string */
958
959 /*
960 * Initialize global variables if necessary.
961 */
962 static void
initialize_globals(void)963 initialize_globals(void)
964 {
965 default_domain = strdup(DEFAULT_DOMAIN);
966 curr_domain[0] = '\0';
967 curr_file[0] = '\0';
968 qstring_buf[0] = '\0';
969 } /* initialize_globals() */
970
971 /*
972 * Extract only string part when read a exclude file by removing
973 * keywords (e.g. msgid, msgstr, # ) and heading and trailing blanks and
974 * double quotes.
975 */
976 static void
trim_line(char * line)977 trim_line(char *line)
978 {
979 int i, p, len;
980 int first = 0;
981 int last = 0;
982 char c;
983
984 len = strlen(line);
985
986 /*
987 * Find the position of the last non-whitespace character.
988 */
989 i = len - 1;
990 /*CONSTCOND*/
991 while (1) {
992 c = line[i--];
993 if ((c != ' ') && (c != '\n') && (c != '\t')) {
994 last = ++i;
995 break;
996 }
997 }
998
999 /*
1000 * Find the position of the first non-whitespace character
1001 * by skipping "msgid" initially.
1002 */
1003 if (strncmp("msgid ", line, 6) == 0) {
1004 i = 5;
1005 } else if (strncmp("msgstr ", line, 7) == 0) {
1006 i = 6;
1007 } else if (strncmp("# ", line, 2) == 0) {
1008 i = 2;
1009 } else {
1010 i = 0;
1011 }
1012
1013 /*CONSTCOND*/
1014 while (1) {
1015 c = line[i++];
1016 if ((c != ' ') && (c != '\n') && (c != '\t')) {
1017 first = --i;
1018 break;
1019 }
1020 }
1021
1022 /*
1023 * For Backward compatibility, we consider both double quoted
1024 * string and non-quoted string.
1025 * The double quote is removed before being stored if exists.
1026 */
1027 if (line[first] == '"') {
1028 first++;
1029 }
1030 if (line[last] == '"') {
1031 last--;
1032 }
1033
1034 /*
1035 * Now copy the valid part of the string.
1036 */
1037 p = first;
1038 for (i = 0; i <= (last-first); i++) {
1039 line[i] = line[p++];
1040 }
1041 line [i] = '\0';
1042 } /* trim_line */
1043
1044 /*
1045 * Read exclude file and stores it in the global linked list.
1046 */
1047 static void
read_exclude_file(void)1048 read_exclude_file(void)
1049 {
1050 FILE *fp;
1051 struct exclude_st *tmp_excl;
1052 struct strlist_st *tail;
1053 int ignore_line;
1054 char line [MAX_STRING_LEN];
1055
1056 if ((fp = fopen(exclude_file, "r")) == NULL) {
1057 (void) fprintf(stderr, "ERROR, can't open exclude file: %s\n",
1058 exclude_file);
1059 exit(2);
1060 }
1061
1062 ignore_line = TRUE;
1063 while (fgets(line, MAX_STRING_LEN, fp) != NULL) {
1064 /*
1065 * Line starting with # is a comment line and ignored.
1066 * Blank line is ignored, too.
1067 */
1068 if ((line[0] == '\n') || (line[0] == '#')) {
1069 continue;
1070 } else if (strncmp(line, "msgstr", 6) == 0) {
1071 ignore_line = TRUE;
1072 } else if (strncmp(line, "domain", 6) == 0) {
1073 ignore_line = TRUE;
1074 } else if (strncmp(line, "msgid", 5) == 0) {
1075 ignore_line = FALSE;
1076 tmp_excl = new_exclude();
1077 tmp_excl->exstr = new_strlist();
1078 trim_line(line);
1079 tmp_excl->exstr->str = strdup(line);
1080 tail = tmp_excl->exstr;
1081 /*
1082 * Prepend new exclude string node to the list.
1083 */
1084 tmp_excl->next = excl_head;
1085 excl_head = tmp_excl;
1086 } else {
1087 /*
1088 * If more than one line of string forms msgid,
1089 * append it to the string linked list.
1090 */
1091 if (ignore_line == FALSE) {
1092 trim_line(line);
1093 tail->next = new_strlist();
1094 tail->next->str = strdup(line);
1095 tail = tail->next;
1096 }
1097 }
1098 } /* while */
1099
1100 #ifdef DEBUG
1101 tmp_excl = excl_head;
1102 while (tmp_excl != NULL) {
1103 printf("============================\n");
1104 tail = tmp_excl->exstr;
1105 while (tail != NULL) {
1106 printf("%s###\n", tail->str);
1107 tail = tail->next;
1108 }
1109 tmp_excl = tmp_excl->next;
1110 }
1111 #endif
1112 } /* read_exclude_file */
1113
1114 /*
1115 * Get next character from the string list containing ANSI style string.
1116 * This function returns three valus. (p, *m, *c).
1117 * p is returned by return value and, *m and *c are returned by changing
1118 * values in the location pointed.
1119 *
1120 * p : points node in the linked list for ANSI string.
1121 * Each node contains double quoted string.
1122 * m : The location of the next characters in the double quoted string
1123 * as integer index in the string.
1124 * When it gets to end of quoted string, the next node will be
1125 * read and m starts as zero for every new node.
1126 * c : Stores the value of the characterto be returned.
1127 */
1128 static struct strlist_st *
get_next_ch(struct strlist_st * p,int * m,char * c)1129 get_next_ch(struct strlist_st *p, int *m, char *c)
1130 {
1131 char ch, oct, hex;
1132 int value, i;
1133
1134 /*
1135 * From the string list, find non-null string first.
1136 */
1137
1138 /*CONSTCOND*/
1139 while (1) {
1140 if (p == NULL) {
1141 break;
1142 } else if (p->str == NULL) {
1143 p = p->next;
1144 } else if (p->str[*m] == '\0') {
1145 p = p->next;
1146 *m = 0;
1147 } else {
1148 break;
1149 }
1150 }
1151
1152 /*
1153 * No more character is available.
1154 */
1155 if (p == NULL) {
1156 *c = 0;
1157 return (NULL);
1158 }
1159
1160 /*
1161 * Check if the character back slash.
1162 * If yes, ANSI defined escape sequence rule is used.
1163 */
1164 if (p->str[*m] != '\\') {
1165 *c = p->str[*m];
1166 *m = *m + 1;
1167 return (p);
1168 } else {
1169 /*
1170 * Get next character after '\'.
1171 */
1172 *m = *m + 1;
1173 ch = p->str[*m];
1174 switch (ch) {
1175 case 'a':
1176 *c = '\a';
1177 break;
1178 case 'b':
1179 *c = '\b';
1180 break;
1181 case 'f':
1182 *c = '\f';
1183 break;
1184 case 'n':
1185 *c = '\n';
1186 break;
1187 case 'r':
1188 *c = '\r';
1189 break;
1190 case 't':
1191 *c = '\t';
1192 break;
1193 case 'v':
1194 *c = '\v';
1195 break;
1196 case '0':
1197 case '1':
1198 case '2':
1199 case '3':
1200 case '4':
1201 case '5':
1202 case '6':
1203 case '7':
1204 /*
1205 * Get maximum of three octal digits.
1206 */
1207 value = ch;
1208 for (i = 0; i < 2; i++) {
1209 *m = *m + 1;
1210 oct = p->str[*m];
1211 if ((oct >= '0') && (oct <= '7')) {
1212 value = value * 8 + (oct - '0');
1213 } else {
1214 *m = *m - 1;
1215 break;
1216 }
1217 }
1218 *c = value;
1219 #ifdef DEBUG
1220 /* (void) fprintf(stderr, "octal=%d\n", value); */
1221 #endif
1222 break;
1223 case 'x':
1224 value = 0;
1225 /*
1226 * Remove all heading zeros first and
1227 * get one or two valuid hexadecimal charaters.
1228 */
1229 *m = *m + 1;
1230 while (p->str[*m] == '0') {
1231 *m = *m + 1;
1232 }
1233 value = 0;
1234 for (i = 0; i < 2; i++) {
1235 hex = p->str[*m];
1236 *m = *m + 1;
1237 if (isdigit(hex)) {
1238 value = value * 16 + (hex - '0');
1239 } else if (isxdigit(hex)) {
1240 hex = tolower(hex);
1241 value = value * 16 + (hex - 'a' + 10);
1242 } else {
1243 *m = *m - 1;
1244 break;
1245 }
1246 }
1247 *c = value;
1248 #ifdef DEBUG
1249 (void) fprintf(stderr, "hex=%d\n", value);
1250 #endif
1251 *m = *m - 1;
1252 break;
1253 default :
1254 /*
1255 * Undefined by ANSI.
1256 * Just ignore "\".
1257 */
1258 *c = p->str[*m];
1259 break;
1260 }
1261 /*
1262 * Advance pointer to point the next character to be parsed.
1263 */
1264 *m = *m + 1;
1265 return (p);
1266 }
1267 } /* get_next_ch */
1268
1269 /*
1270 * Compares two msgids.
1271 * Comparison is done by values, not by characters represented.
1272 * For example, '\t', '\011' and '0x9' are identical values.
1273 * Return values are same as in strcmp.
1274 * 1 if msgid1 > msgid2
1275 * 0 if msgid1 = msgid2
1276 * -1 if msgid1 < msgid2
1277 */
1278 static int
msgidcmp(struct strlist_st * id1,struct strlist_st * id2)1279 msgidcmp(struct strlist_st *id1, struct strlist_st *id2)
1280 {
1281 char c1, c2;
1282 int m1, m2;
1283
1284 m1 = 0;
1285 m2 = 0;
1286
1287 /*CONSTCOND*/
1288 while (1) {
1289 id1 = get_next_ch(id1, &m1, &c1);
1290 id2 = get_next_ch(id2, &m2, &c2);
1291
1292 if ((c1 == 0) && (c2 == 0)) {
1293 return (0);
1294 }
1295
1296 if (c1 > c2) {
1297 return (1);
1298 } else if (c1 < c2) {
1299 return (-1);
1300 }
1301 }
1302 /*NOTREACHED*/
1303 } /* msgidcmp */
1304
1305 /*
1306 * Check if a ANSI string (which is a linked list itself) is a duplicate
1307 * of any string in the list of ANSI string.
1308 */
1309 static int
isduplicate(struct element_st * list,struct strlist_st * str)1310 isduplicate(struct element_st *list, struct strlist_st *str)
1311 {
1312 struct element_st *p;
1313
1314 if (list == NULL) {
1315 return (FALSE);
1316 }
1317
1318 p = list;
1319 while (p != NULL) {
1320 if (p->msgid != NULL) {
1321 if (msgidcmp(p->msgid, str) == 0) {
1322 return (TRUE);
1323 }
1324 }
1325 p = p->next;
1326 }
1327
1328 return (FALSE);
1329 } /* isduplicate */
1330
1331 /*
1332 * Extract a comment line and add to the linked list containing
1333 * comment block.
1334 * Each comment line is stored in the node.
1335 */
1336 static void
add_line_to_comment(void)1337 add_line_to_comment(void)
1338 {
1339 struct strlist_st *tmp_str;
1340
1341 tmp_str = new_strlist();
1342 tmp_str->str = strdup(curr_line);
1343 tmp_str->next = NULL;
1344
1345 if (commhead == NULL) {
1346 /* Empty comment list */
1347 commhead = tmp_str;
1348 commtail = tmp_str;
1349 } else {
1350 /* append it to the list */
1351 commtail->next = tmp_str;
1352 commtail = commtail->next;
1353 }
1354
1355 is_last_comment_line = FALSE;
1356 } /* add_line_to_comment */
1357
1358 /*
1359 * Add a double quoted string to the linked list containing ANSI string.
1360 */
1361 static void
add_qstring_to_str(void)1362 add_qstring_to_str(void)
1363 {
1364 struct strlist_st *tmp_str;
1365
1366 tmp_str = new_strlist();
1367 tmp_str->str = strdup(qstring_buf);
1368 tmp_str->next = NULL;
1369
1370 if (strhead == NULL) {
1371 /* Null ANSI string */
1372 strhead = tmp_str;
1373 strtail = tmp_str;
1374 } else {
1375 /* Append it to the ANSI string linked list */
1376 strtail->next = tmp_str;
1377 strtail = strtail->next;
1378 }
1379
1380 qstring_buf[0] = '\0';
1381 } /* add_qstring_to_str */
1382
1383 /*
1384 * Finds the head of domain nodes given domain name.
1385 */
1386 static struct domain_st *
find_domain_node(char * dname)1387 find_domain_node(char *dname)
1388 {
1389 struct domain_st *tmp_dom, *p;
1390
1391 /*
1392 * If -a option is specified everything will be written to the
1393 * default domain file.
1394 */
1395 if (aflg == TRUE) {
1396 if (def_dom == NULL) {
1397 def_dom = new_domain();
1398 }
1399 return (def_dom);
1400 }
1401
1402 if ((dname == NULL) ||
1403 (dname[0] == '\0') ||
1404 (strcmp(dname, default_domain) == 0)) {
1405 if (def_dom == NULL) {
1406 def_dom = new_domain();
1407 }
1408 if (strcmp(dname, default_domain) == 0) {
1409 (void) fprintf(stderr, "%s \"%s\" is used in dgettext "
1410 "of file:%s line:%d.\n",
1411 "Warning: default domain name",
1412 default_domain, curr_file, curr_linenum);
1413 }
1414 return (def_dom);
1415 } else {
1416 p = dom_head;
1417 while (p != NULL) {
1418 if (strcmp(p->dname, dname) == 0) {
1419 return (p);
1420 }
1421 p = p->next;
1422 }
1423
1424 tmp_dom = new_domain();
1425 tmp_dom->dname = strdup(dname);
1426
1427 if (dom_head == NULL) {
1428 dom_head = tmp_dom;
1429 dom_tail = tmp_dom;
1430 } else {
1431 dom_tail->next = tmp_dom;
1432 dom_tail = dom_tail->next;
1433 }
1434 return (tmp_dom);
1435 }
1436 } /* find_domain_node */
1437
1438 /*
1439 * Frees the ANSI string linked list.
1440 */
1441 static void
free_strlist(struct strlist_st * ptr)1442 free_strlist(struct strlist_st *ptr)
1443 {
1444 struct strlist_st *p;
1445
1446 p = ptr;
1447 ptr = NULL;
1448 while (p != NULL) {
1449 ptr = p->next;
1450 free(p->str);
1451 free(p);
1452 p = ptr;
1453 }
1454 } /* free_strlist */
1455
1456 /*
1457 * Finds if a ANSI string is contained in the exclude file.
1458 */
1459 static int
isexcluded(struct strlist_st * strlist)1460 isexcluded(struct strlist_st *strlist)
1461 {
1462 struct exclude_st *p;
1463
1464 p = excl_head;
1465 while (p != NULL) {
1466 if (msgidcmp(p->exstr, strlist) == 0) {
1467 return (TRUE);
1468 }
1469 p = p->next;
1470 }
1471 return (FALSE);
1472 } /* isexcluded */
1473
1474 /*
1475 * Finds if a comment block is to be extracted.
1476 *
1477 * When -c option is specified, find out if comment block contains
1478 * comment-tag as a token separated by blanks. If it does, this
1479 * comment block is associated with the next msgid encountered.
1480 * Comment block is a linked list where each node contains one line
1481 * of comments.
1482 */
1483 static int
isextracted(struct strlist_st * strlist)1484 isextracted(struct strlist_st *strlist)
1485 {
1486 struct strlist_st *p;
1487 char *first, *pc;
1488
1489
1490 p = strlist;
1491 while (p != NULL) {
1492 first = strdup(p->str);
1493 while ((first != NULL) && (first[0] != '\0')) {
1494 pc = first;
1495
1496 /*CONSTCOND*/
1497 while (1) {
1498 if (*pc == '\0') {
1499 break;
1500 } else if ((*pc == ' ') || (*pc == '\t')) {
1501 *pc++ = '\0';
1502 break;
1503 }
1504 pc++;
1505 }
1506 if (strcmp(first, comment_tag) == 0) {
1507 return (TRUE);
1508 }
1509 first = pc;
1510 }
1511 p = p->next;
1512 } /* while */
1513
1514 /*
1515 * Not found.
1516 */
1517 return (FALSE);
1518 } /* isextracted */
1519
1520 /*
1521 * Adds ANSI string to the domain element list.
1522 */
1523 static void
add_str_to_element_list(int istextdomain,char * domain_list)1524 add_str_to_element_list(int istextdomain, char *domain_list)
1525 {
1526 struct element_st *tmp_elem;
1527 struct element_st *p, *q;
1528 struct domain_st *tmp_dom;
1529 int result;
1530
1531 /*
1532 * This can happen if something like gettext(USAGE) is used
1533 * and it is impossible to get msgid for this gettext.
1534 * Since -x option should be used in this kind of cases,
1535 * it is OK not to catch msgid.
1536 */
1537 if (strhead == NULL) {
1538 return;
1539 }
1540
1541 /*
1542 * The global variable curr_domain contains either NULL
1543 * for default_domain or domain name for dgettext().
1544 */
1545 tmp_dom = find_domain_node(domain_list);
1546
1547 /*
1548 * If this msgid is in the exclude file,
1549 * then free the linked list and return.
1550 */
1551 if ((istextdomain == FALSE) &&
1552 (isexcluded(strhead) == TRUE)) {
1553 free_strlist(strhead);
1554 strhead = strtail = NULL;
1555 return;
1556 }
1557
1558 tmp_elem = new_element();
1559 tmp_elem->msgid = strhead;
1560 tmp_elem->istextdomain = istextdomain;
1561 /*
1562 * If -c option is specified and TAG matches,
1563 * then associate the comment to the next [d]gettext() calls
1564 * encountered in the source code.
1565 * textdomain() calls will not have any effect.
1566 */
1567 if (istextdomain == FALSE) {
1568 if ((cflg == TRUE) && (commhead != NULL)) {
1569 if (isextracted(commhead) == TRUE) {
1570 tmp_elem->comment = commhead;
1571 } else {
1572 free_strlist(commhead);
1573 }
1574 commhead = commtail = NULL;
1575 }
1576 }
1577
1578 tmp_elem->linenum = linenum_saved;
1579 tmp_elem->fname = strdup(curr_file);
1580
1581
1582 if (sflg == TRUE) {
1583 /*
1584 * If this is textdomain() call and -s option is specified,
1585 * append this node to the textdomain linked list.
1586 */
1587 if (istextdomain == TRUE) {
1588 if (tmp_dom->textdomain_head == NULL) {
1589 tmp_dom->textdomain_head = tmp_elem;
1590 tmp_dom->textdomain_tail = tmp_elem;
1591 } else {
1592 tmp_dom->textdomain_tail->next = tmp_elem;
1593 tmp_dom->textdomain_tail = tmp_elem;
1594 }
1595 strhead = strtail = NULL;
1596 return;
1597 }
1598
1599 /*
1600 * Insert the node to the properly sorted position.
1601 */
1602 q = NULL;
1603 p = tmp_dom->gettext_head;
1604 while (p != NULL) {
1605 result = msgidcmp(strhead, p->msgid);
1606 if (result == 0) {
1607 /*
1608 * Duplicate id. Do not store.
1609 */
1610 free_strlist(strhead);
1611 strhead = strtail = NULL;
1612 return;
1613 } else if (result > 0) {
1614 /* move to the next node */
1615 q = p;
1616 p = p->next;
1617 } else {
1618 tmp_elem->next = p;
1619 if (q != NULL) {
1620 q->next = tmp_elem;
1621 } else {
1622 tmp_dom->gettext_head = tmp_elem;
1623 }
1624 strhead = strtail = NULL;
1625 return;
1626 }
1627 } /* while */
1628
1629 /*
1630 * New msgid is the largest or empty list.
1631 */
1632 if (q != NULL) {
1633 /* largest case */
1634 q->next = tmp_elem;
1635 } else {
1636 /* empty list */
1637 tmp_dom->gettext_head = tmp_elem;
1638 }
1639 } else {
1640 /*
1641 * Check if this msgid is already in the same domain.
1642 */
1643 if (tmp_dom != NULL) {
1644 if (isduplicate(tmp_dom->gettext_head,
1645 tmp_elem->msgid) == TRUE) {
1646 tmp_elem->isduplicate = TRUE;
1647 }
1648 }
1649 /*
1650 * If -s option is not specified, then everything
1651 * is stored in gettext linked list.
1652 */
1653 if (tmp_dom->gettext_head == NULL) {
1654 tmp_dom->gettext_head = tmp_elem;
1655 tmp_dom->gettext_tail = tmp_elem;
1656 } else {
1657 tmp_dom->gettext_tail->next = tmp_elem;
1658 tmp_dom->gettext_tail = tmp_elem;
1659 }
1660 }
1661
1662 strhead = strtail = NULL;
1663 } /* add_str_to_element_list */
1664
1665 /*
1666 * Write all domain linked list to the files.
1667 */
1668 static void
write_all_files(void)1669 write_all_files(void)
1670 {
1671 struct domain_st *tmp;
1672
1673 /*
1674 * Write out default domain file.
1675 */
1676 write_one_file(def_dom);
1677
1678 /*
1679 * If dgettext() exists and -a option is not used,
1680 * then there are non-empty linked list.
1681 */
1682 tmp = dom_head;
1683 while (tmp != NULL) {
1684 write_one_file(tmp);
1685 tmp = tmp->next;
1686 }
1687 } /* write_all_files */
1688
1689 /*
1690 * add an element_st list to the linked list.
1691 */
1692 static void
add_node_to_polist(struct element_st ** pohead,struct element_st ** potail,struct element_st * elem)1693 add_node_to_polist(struct element_st **pohead,
1694 struct element_st **potail, struct element_st *elem)
1695 {
1696 if (elem == NULL) {
1697 return;
1698 }
1699
1700 if (*pohead == NULL) {
1701 *pohead = *potail = elem;
1702 } else {
1703 (*potail)->next = elem;
1704 *potail = (*potail)->next;
1705 }
1706 } /* add_node_to_polist */
1707
1708 #define INIT_STATE 0
1709 #define IN_MSGID 1
1710 #define IN_MSGSTR 2
1711 #define IN_COMMENT 3
1712 /*
1713 * Reads existing po file into the linked list and returns the head
1714 * of the linked list.
1715 */
1716 static struct element_st *
read_po(char * fname)1717 read_po(char *fname)
1718 {
1719 struct element_st *tmp_elem = NULL;
1720 struct element_st *ehead = NULL, *etail = NULL;
1721 struct strlist_st *comment_tail = NULL;
1722 struct strlist_st *msgid_tail = NULL;
1723 struct strlist_st *msgstr_tail = NULL;
1724 int state = INIT_STATE;
1725 char line [MAX_STRING_LEN];
1726 FILE *fp;
1727
1728 if ((fp = fopen(fname, "r")) == NULL) {
1729 return (NULL);
1730 }
1731
1732 while (fgets(line, MAX_STRING_LEN, fp) != NULL) {
1733 /*
1734 * Line starting with # is a comment line and ignored.
1735 * Blank line is ignored, too.
1736 */
1737 if (line[0] == '\n') {
1738 continue;
1739 } else if (line[0] == '#') {
1740 /*
1741 * If tmp_elem is not NULL, there is msgid pair
1742 * stored. Therefore, add it.
1743 */
1744 if ((tmp_elem != NULL) && (state == IN_MSGSTR)) {
1745 add_node_to_polist(&ehead, &etail, tmp_elem);
1746 }
1747
1748 if ((state == INIT_STATE) || (state == IN_MSGSTR)) {
1749 state = IN_COMMENT;
1750 tmp_elem = new_element();
1751 tmp_elem->comment = comment_tail =
1752 new_strlist();
1753 /*
1754 * remove new line and skip "# "
1755 * in the beginning of the existing
1756 * comment line.
1757 */
1758 line[strlen(line)-1] = 0;
1759 comment_tail->str = strdup(line+2);
1760 } else if (state == IN_COMMENT) {
1761 comment_tail->next = new_strlist();
1762 comment_tail = comment_tail->next;
1763 /*
1764 * remove new line and skip "# "
1765 * in the beginning of the existing
1766 * comment line.
1767 */
1768 line[strlen(line)-1] = 0;
1769 comment_tail->str = strdup(line+2);
1770 }
1771
1772 } else if (strncmp(line, "domain", 6) == 0) {
1773 /* ignore domain line */
1774 continue;
1775 } else if (strncmp(line, "msgid", 5) == 0) {
1776 if (state == IN_MSGSTR) {
1777 add_node_to_polist(&ehead, &etail, tmp_elem);
1778 tmp_elem = new_element();
1779 } else if (state == INIT_STATE) {
1780 tmp_elem = new_element();
1781 }
1782
1783 state = IN_MSGID;
1784 trim_line(line);
1785 tmp_elem->msgid = msgid_tail = new_strlist();
1786 msgid_tail->str = strdup(line);
1787
1788 } else if (strncmp(line, "msgstr", 6) == 0) {
1789 state = IN_MSGSTR;
1790 trim_line(line);
1791 tmp_elem->msgstr = msgstr_tail = new_strlist();
1792 msgstr_tail->str = strdup(line);
1793 } else {
1794 /*
1795 * If more than one line of string forms msgid,
1796 * append it to the string linked list.
1797 */
1798 if (state == IN_MSGID) {
1799 trim_line(line);
1800 msgid_tail->next = new_strlist();
1801 msgid_tail = msgid_tail->next;
1802 msgid_tail->str = strdup(line);
1803 } else if (state == IN_MSGSTR) {
1804 trim_line(line);
1805 msgstr_tail->next = new_strlist();
1806 msgstr_tail = msgstr_tail->next;
1807 msgstr_tail->str = strdup(line);
1808 }
1809 }
1810 } /* while */
1811
1812 /*
1813 * To insert the last msgid pair.
1814 */
1815 if (tmp_elem != NULL) {
1816 add_node_to_polist(&ehead, &etail, tmp_elem);
1817 }
1818
1819 #ifdef DEBUG
1820 {
1821 struct domain_st *tmp_domain = new_domain();
1822 char tmpstr[256];
1823
1824 sprintf(tmpstr, "existing_po file : <%s>", fname);
1825 tmp_domain->dname = strdup(tmpstr);
1826 tmp_domain->gettext_head = ehead;
1827 printf("======= existing po file <%s> ========\n", fname);
1828 print_one_domain(tmp_domain);
1829 }
1830 #endif /* DEBUG */
1831
1832 (void) fclose(fp);
1833 return (ehead);
1834 } /* read_po */
1835
1836 /*
1837 * This function will append the second list to the first list.
1838 * If the msgid in the second list contains msgid in the first list,
1839 * it will be marked as duplicate.
1840 */
1841 static struct element_st *
append_list(struct element_st * l1,struct element_st * l2)1842 append_list(struct element_st *l1, struct element_st *l2)
1843 {
1844 struct element_st *p = NULL, *q = NULL, *l1_tail = NULL;
1845
1846 if (l1 == NULL)
1847 return (l2);
1848 if (l2 == NULL)
1849 return (l1);
1850
1851 /*
1852 * in this while loop, just mark isduplicate field of node in the
1853 * l2 list if the same msgid exists in l1 list.
1854 */
1855 p = l2;
1856 while (p != NULL) {
1857 q = l1;
1858 while (q != NULL) {
1859 if (msgidcmp(p->msgid, q->msgid) == 0) {
1860 p->isduplicate = TRUE;
1861 break;
1862 }
1863 q = q->next;
1864 }
1865 p = p->next;
1866 }
1867
1868 /* Now connect two linked lists. */
1869 l1_tail = l1;
1870 while (l1_tail->next != NULL) {
1871 if (l1->next == NULL)
1872 break;
1873 l1_tail = l1_tail-> next;
1874 }
1875 l1_tail->next = l2;
1876
1877 return (l1);
1878 } /* append_list */
1879
1880 /*
1881 * Writes one domain list to the file.
1882 */
1883 static void
write_one_file(struct domain_st * head)1884 write_one_file(struct domain_st *head)
1885 {
1886 FILE *fp;
1887 char fname [MAX_PATH_LEN];
1888 char dname [MAX_DOMAIN_LEN];
1889 struct element_st *p;
1890 struct element_st *existing_po_list;
1891
1892 /*
1893 * If head is NULL, then it still has to create .po file
1894 * so that it will guarantee that the previous .po file was
1895 * alwasys deleted.
1896 * This is why checking NULL pointer has been moved to after
1897 * creating .po file.
1898 */
1899
1900 /*
1901 * If domain name is NULL, it is the default domain list.
1902 * The domain name is either "messages" or specified by option -d.
1903 * The default domain name is contained in default_domain variable.
1904 */
1905 dname[0] = '\0';
1906 if ((head != NULL) &&
1907 (head->dname != NULL)) {
1908 (void) strcpy(dname, head->dname);
1909 } else {
1910 (void) strcpy(dname, default_domain);
1911 }
1912
1913 /*
1914 * path is the current directory if not specified by option -p.
1915 */
1916 fname[0] = 0;
1917 if (pflg == TRUE) {
1918 (void) strcat(fname, pathname);
1919 (void) strcat(fname, "/");
1920 }
1921 (void) strcat(fname, dname);
1922 (void) strcat(fname, ".po");
1923
1924 /*
1925 * If -j flag is specified, read exsiting .po file and
1926 * append the current list to the end of the list read from
1927 * the existing .po file.
1928 */
1929 if (jflg == TRUE) {
1930 /*
1931 * If head is NULL, we don't have to change existing file.
1932 * Therefore, just return it.
1933 */
1934 if (head == NULL) {
1935 return;
1936 }
1937 existing_po_list = read_po(fname);
1938 head->gettext_head = append_list(existing_po_list,
1939 head->gettext_head);
1940 #ifdef DEBUG
1941 if (head->dname != NULL) {
1942 printf("===after merge (-j option): <%s>===\n",
1943 head->dname);
1944 } else {
1945 printf("===after merge (-j option): <NULL>===\n");
1946 }
1947 print_one_domain(head);
1948 #endif
1949
1950 } /* if jflg */
1951
1952 if ((fp = fopen(fname, "w")) == NULL) {
1953 (void) fprintf(stderr,
1954 "ERROR, can't open output file: %s\n", fname);
1955 exit(2);
1956 }
1957
1958 (void) fprintf(fp, "domain \"%s\"\n", dname);
1959
1960 /* See comments above in the beginning of this function */
1961 if (head == NULL)
1962 return;
1963
1964 /*
1965 * There are separate storage for textdomain() calls if
1966 * -s option is used (textdomain_head linked list).
1967 * Otherwise, textdomain() is mixed with gettext(0 and dgettext().
1968 * If mixed, the boolean varaible istextdomain is used to see
1969 * if the current node contains textdomain() or [d]gettext().
1970 */
1971 if (sflg == TRUE) {
1972 p = head->textdomain_head;
1973 while (p != NULL) {
1974 /*
1975 * textdomain output line already contains
1976 * FIle name and line number information.
1977 * Therefore, does not have to check for nflg.
1978 */
1979 output_textdomain(fp, p);
1980 p = p->next;
1981 }
1982 }
1983
1984 p = head->gettext_head;
1985 while (p != NULL) {
1986
1987 /*
1988 * Comment is printed only if -c is used and
1989 * associated with gettext or dgettext.
1990 * textdomain is not associated with comments.
1991 * Changes:
1992 * comments should be extracted in case of -j option
1993 * because there are read from exising file.
1994 */
1995 if (((cflg == TRUE) || (jflg == TRUE)) &&
1996 (p->istextdomain != TRUE)) {
1997 output_comment(fp, p->comment);
1998 }
1999
2000 /*
2001 * If -n is used, then file number and line number
2002 * information is printed.
2003 * In case of textdomain(), this information is redundant
2004 * and is not printed.
2005 * If linenum is 0, it means this information has been
2006 * read from existing po file and it already contains
2007 * file and line number info as a comment line. So, it
2008 * should not printed in such case.
2009 */
2010 if ((nflg == TRUE) && (p->istextdomain == FALSE) &&
2011 (p->linenum > 0)) {
2012 (void) fprintf(fp, "# File:%s, line:%d\n",
2013 p->fname, p->linenum);
2014 }
2015
2016 /*
2017 * Depending on the type of node, output textdomain comment
2018 * or msgid.
2019 */
2020 if ((sflg == FALSE) &&
2021 (p->istextdomain == TRUE)) {
2022 output_textdomain(fp, p);
2023 } else {
2024 output_msgid(fp, p->msgid, p->isduplicate);
2025 }
2026 p = p->next;
2027
2028 } /* while */
2029
2030 (void) fclose(fp);
2031 } /* write_one_file */
2032
2033 /*
2034 * Prints out textdomain call as a comment line with file name and
2035 * the line number information.
2036 */
2037 static void
output_textdomain(FILE * fp,struct element_st * p)2038 output_textdomain(FILE *fp, struct element_st *p)
2039 {
2040
2041 if (p == NULL)
2042 return;
2043
2044 /*
2045 * Write textdomain() line as a comment.
2046 */
2047 (void) fprintf(fp, "# File:%s, line:%d, textdomain(\"%s\");\n",
2048 p->fname, p->linenum, p->msgid->str);
2049 } /* output_textdomain */
2050
2051 /*
2052 * Prints out comments from linked list.
2053 */
2054 static void
output_comment(FILE * fp,struct strlist_st * p)2055 output_comment(FILE *fp, struct strlist_st *p)
2056 {
2057 if (p == NULL)
2058 return;
2059
2060 /*
2061 * Write comment section.
2062 */
2063 while (p != NULL) {
2064 (void) fprintf(fp, "# %s\n", p->str);
2065 p = p->next;
2066 }
2067 } /* output_comment */
2068
2069 /*
2070 * Prints out msgid along with msgstr.
2071 */
2072 static void
output_msgid(FILE * fp,struct strlist_st * p,int duplicate)2073 output_msgid(FILE *fp, struct strlist_st *p, int duplicate)
2074 {
2075 struct strlist_st *q;
2076
2077 if (p == NULL)
2078 return;
2079
2080 /*
2081 * Write msgid section.
2082 * If duplciate flag is ON, prepend "# " in front of every line
2083 * so that they are considered as comment lines in .po file.
2084 */
2085 if (duplicate == TRUE) {
2086 (void) fprintf(fp, "# ");
2087 }
2088 (void) fprintf(fp, "msgid \"%s\"\n", p->str);
2089 q = p->next;
2090 while (q != NULL) {
2091 if (duplicate == TRUE) {
2092 (void) fprintf(fp, "# ");
2093 }
2094 (void) fprintf(fp, " \"%s\"\n", q->str);
2095 q = q->next;
2096 }
2097
2098 /*
2099 * Write msgstr section.
2100 * if -M option is specified, append <suffix> to msgid.
2101 * if -m option is specified, prepend <prefix> to msgid.
2102 */
2103 if (duplicate == TRUE) {
2104 (void) fprintf(fp, "# ");
2105 }
2106 if ((mflg == TRUE) || (Mflg == TRUE)) {
2107 if (mflg == TRUE) {
2108 /*
2109 * If single line msgid, add suffix to the same line
2110 */
2111 if ((Mflg == TRUE) && (p->next == NULL)) {
2112 /* -M and -m and single line case */
2113 (void) fprintf(fp, "msgstr \"%s%s%s\"\n",
2114 prefix, p->str, suffix);
2115 } else {
2116 /* -M and -m and multi line case */
2117 (void) fprintf(fp, "msgstr \"%s%s\"\n",
2118 prefix, p->str);
2119 }
2120 } else {
2121 if ((Mflg == TRUE) && (p->next == NULL)) {
2122 /* -M only with single line case */
2123 (void) fprintf(fp, "msgstr \"%s%s\"\n",
2124 p->str, suffix);
2125 } else {
2126 /* -M only with multi line case */
2127 (void) fprintf(fp, "msgstr \"%s\"\n", p->str);
2128 }
2129 }
2130 q = p->next;
2131 while (q != NULL) {
2132 if (duplicate == TRUE) {
2133 (void) fprintf(fp, "# ");
2134 }
2135 (void) fprintf(fp, " \"%s\"\n", q->str);
2136 q = q->next;
2137 }
2138 /*
2139 * If multi line msgid, add suffix after the last line.
2140 */
2141 if ((Mflg == TRUE) && (p->next != NULL) &&
2142 (suffix[0] != '\0')) {
2143 (void) fprintf(fp, " \"%s\"\n", suffix);
2144 }
2145 } else {
2146 (void) fprintf(fp, "msgstr\n");
2147 }
2148 } /* output_msgid */
2149
2150 /*
2151 * Malloc a new element node and initialize fields.
2152 */
2153 static struct element_st *
new_element(void)2154 new_element(void)
2155 {
2156 struct element_st *tmp;
2157
2158 tmp = (struct element_st *)malloc(sizeof (struct element_st));
2159 tmp->istextdomain = FALSE;
2160 tmp->isduplicate = FALSE;
2161 tmp->msgid = NULL;
2162 tmp->msgstr = NULL;
2163 tmp->comment = NULL;
2164 tmp->fname = NULL;
2165 tmp->linenum = 0;
2166 tmp->next = NULL;
2167
2168 return (tmp);
2169 } /* new_element */
2170
2171 /*
2172 * Malloc a new domain node and initialize fields.
2173 */
2174 static struct domain_st *
new_domain(void)2175 new_domain(void)
2176 {
2177 struct domain_st *tmp;
2178
2179 tmp = (struct domain_st *)malloc(sizeof (struct domain_st));
2180 tmp->dname = NULL;
2181 tmp->gettext_head = NULL;
2182 tmp->gettext_tail = NULL;
2183 tmp->textdomain_head = NULL;
2184 tmp->textdomain_tail = NULL;
2185 tmp->next = NULL;
2186
2187 return (tmp);
2188 } /* new_domain */
2189
2190 /*
2191 * Malloc a new string list node and initialize fields.
2192 */
2193 static struct strlist_st *
new_strlist(void)2194 new_strlist(void)
2195 {
2196 struct strlist_st *tmp;
2197
2198 tmp = (struct strlist_st *)malloc(sizeof (struct strlist_st));
2199 tmp->str = NULL;
2200 tmp->next = NULL;
2201
2202 return (tmp);
2203 } /* new_strlist */
2204
2205 /*
2206 * Malloc a new exclude string list node and initialize fields.
2207 */
2208 static struct exclude_st *
new_exclude(void)2209 new_exclude(void)
2210 {
2211 struct exclude_st *tmp;
2212
2213 tmp = (struct exclude_st *)malloc(sizeof (struct exclude_st));
2214 tmp->exstr = NULL;
2215 tmp->next = NULL;
2216
2217 return (tmp);
2218 } /* new_exclude */
2219
2220 /*
2221 * Local version of strcat to keep within maximum string size.
2222 */
2223 static void
lstrcat(char * s1,const char * s2)2224 lstrcat(char *s1, const char *s2)
2225 {
2226 char *es1 = &s1[MAX_STRING_LEN];
2227 char *ss1 = s1;
2228
2229 while (*s1++)
2230 ;
2231 --s1;
2232 while (*s1++ = *s2++)
2233 if (s1 >= es1) {
2234 s1[-1] = '\0';
2235 if ((in_comment == TRUE || in_quote == TRUE) &&
2236 (warn_linenum != curr_linenum)) {
2237 if (stdin_only == FALSE) {
2238 (void) fprintf(stderr,
2239 "WARNING: file %s line %d exceeds "\
2240 "%d characters: \"%15.15s\"\n",
2241 curr_file, curr_linenum,
2242 MAX_STRING_LEN, ss1);
2243 } else {
2244 (void) fprintf(stderr,
2245 "WARNING: line %d exceeds "\
2246 "%d characters: \"%15.15s\"\n",
2247 curr_linenum, MAX_STRING_LEN, ss1);
2248 }
2249 warn_linenum = curr_linenum;
2250 }
2251 break;
2252 }
2253 } /* lstrcat */
2254
2255 #ifdef DEBUG
2256 /*
2257 * Debug print routine. Compiled only with DEBUG on.
2258 */
2259 void
print_element_list(struct element_st * q)2260 print_element_list(struct element_st *q)
2261 {
2262 struct strlist_st *r;
2263
2264 while (q != NULL) {
2265 printf(" istextdomain = %d\n", q->istextdomain);
2266 printf(" isduplicate = %d\n", q->isduplicate);
2267 if ((q->msgid != NULL) && (q->msgid->str != NULL)) {
2268 printf(" msgid = <%s>\n", q->msgid->str);
2269 r = q->msgid->next;
2270 while (r != NULL) {
2271 printf(" <%s>\n", r->str);
2272 r = r->next;
2273 }
2274 } else {
2275 printf(" msgid = <NULL>\n");
2276 }
2277 if ((q->msgstr != NULL) && (q->msgstr->str != NULL)) {
2278 printf(" msgstr= <%s>\n", q->msgstr->str);
2279 r = q->msgstr->next;
2280 while (r != NULL) {
2281 printf(" <%s>\n", r->str);
2282 r = r->next;
2283 }
2284 } else {
2285 printf(" msgstr= <NULL>\n");
2286 }
2287
2288 if (q->comment == NULL) {
2289 printf(" comment = <NULL>\n");
2290 } else {
2291 printf(" comment = <%s>\n", q->comment->str);
2292 r = q->comment->next;
2293 while (r != NULL) {
2294 printf(" <%s>\n", r->str);
2295 r = r->next;
2296 }
2297 }
2298
2299 if (q->fname == NULL) {
2300 printf(" fname = <NULL>\n");
2301 } else {
2302 printf(" fname = <%s>\n", q->fname);
2303 }
2304 printf(" linenum = %d\n", q->linenum);
2305 printf("\n");
2306 q = q->next;
2307 }
2308 }
2309
2310 /*
2311 * Debug print routine. Compiled only with DEBUG on.
2312 */
2313 void
print_one_domain(struct domain_st * p)2314 print_one_domain(struct domain_st *p)
2315 {
2316 struct element_st *q;
2317
2318 if (p == NULL) {
2319 printf("domain pointer = <NULL>\n");
2320 return;
2321 } else if (p->dname == NULL) {
2322 printf("domain_name = <%s>\n", "<NULL>");
2323 } else {
2324 printf("domain_name = <%s>\n", p->dname);
2325 }
2326 q = p->gettext_head;
2327 print_element_list(q);
2328
2329 q = p->textdomain_head;
2330 print_element_list(q);
2331 } /* print_one_domain */
2332
2333 void
print_all_domain(struct domain_st * dom_list)2334 print_all_domain(struct domain_st *dom_list)
2335 {
2336 struct domain_st *p;
2337 struct element_st *q;
2338
2339 p = dom_list;
2340 while (p != NULL) {
2341 print_one_domain(p);
2342 p = p->next;
2343 } /* while */
2344 } /* print_all_domain */
2345 #endif
2346