1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License, Version 1.0 only
6 * (the "License"). You may not use this file except in compliance
7 * with the License.
8 *
9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10 * or http://www.opensolaris.org/os/licensing.
11 * See the License for the specific language governing permissions
12 * and limitations under the License.
13 *
14 * When distributing Covered Code, include this CDDL HEADER in each
15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16 * If applicable, add the following below this CDDL HEADER, with the
17 * fields enclosed by brackets "[]" replaced with your own identifying
18 * information: Portions Copyright [yyyy] [name of copyright owner]
19 *
20 * CDDL HEADER END
21 */
22 /*
23 * Copyright 1991, 1999, 2001-2002 Sun Microsystems, Inc.
24 * All rights reserved.
25 * Use is subject to license terms.
26 */
27 #pragma ident "%Z%%M% %I% %E% SMI"
28
29 #include <ctype.h>
30 #include <stdio.h>
31 #include <stdlib.h>
32 #include <string.h>
33
34 #define TRUE 1
35 #define FALSE 0
36 #define MAX_PATH_LEN 1024
37 #define MAX_DOMAIN_LEN 1024
38 #define MAX_STRING_LEN 2048
39
40 #define USAGE "Usage: xgettext [-a [-x exclude-file]] [-jns]\
41 [-c comment-tag]\n [-d default-domain] [-m prefix] \
42 [-M suffix] [-p pathname] files ...\n\
43 xgettext -h\n"
44
45 #define DEFAULT_DOMAIN "messages"
46
47 extern char yytext[];
48 extern int yylex(void);
49
50 /*
51 * Contains a list of strings to be used to store ANSI-C style string.
52 * Each quoted string is stored in one node.
53 */
54 struct strlist_st {
55 char *str;
56 struct strlist_st *next;
57 };
58
59 /*
60 * istextdomain : Boolean telling if this node contains textdomain call.
61 * isduplicate : Boolean telling if this node duplicate of any other msgid.
62 * msgid : contains msgid or textdomain if istextdomain is true.
63 * msgstr : contains msgstr.
64 * comment : comment extracted in case of -c option.
65 * fname : tells which file contains msgid.
66 * linenum : line number in the file.
67 * next : Next node.
68 */
69 struct element_st {
70 char istextdomain;
71 char isduplicate;
72 struct strlist_st *msgid;
73 struct strlist_st *msgstr;
74 struct strlist_st *comment;
75 char *fname;
76 int linenum;
77 struct element_st *next;
78 };
79
80 /*
81 * dname : domain name. NULL if default domain.
82 * gettext_head : Head of linked list containing [d]gettext().
83 * gettext_tail : Tail of linked list containing [d]gettext().
84 * textdomain_head : Head of linked list containing textdomain().
85 * textdomain_tail : Tail of linked list containing textdomain().
86 * next : Next node.
87 *
88 * Each domain contains two linked list.
89 * (gettext_head, textdomain_head)
90 * If -s option is used, then textdomain_head contains all
91 * textdomain() calls and no textdomain() calls are stored in gettext_head.
92 * If -s option is not used, textdomain_head is empty list and
93 * gettext_head contains all gettext() dgettext(), and textdomain() calls.
94 */
95 struct domain_st {
96 char *dname;
97 struct element_st *gettext_head;
98 struct element_st *gettext_tail;
99 struct element_st *textdomain_head;
100 struct element_st *textdomain_tail;
101 struct domain_st *next;
102 };
103
104 /*
105 * There are two domain linked lists.
106 * def_dom contains default domain linked list and
107 * dom_head contains all other deomain linked lists to be created by
108 * dgettext() calls.
109 */
110 static struct domain_st *def_dom = NULL;
111 static struct domain_st *dom_head = NULL;
112 static struct domain_st *dom_tail = NULL;
113
114 /*
115 * This linked list contains a list of strings to be excluded when
116 * -x option is used.
117 */
118 static struct exclude_st {
119 struct strlist_st *exstr;
120 struct exclude_st *next;
121 } *excl_head;
122
123 /*
124 * All option flags and values for each option if any.
125 */
126 static int aflg = FALSE;
127 static int cflg = FALSE;
128 static char *comment_tag = NULL;
129 static char *default_domain = NULL;
130 static int hflg = FALSE;
131 static int jflg = FALSE;
132 static int mflg = FALSE;
133 static int Mflg = FALSE;
134 static char *suffix = NULL;
135 static char *prefix = NULL;
136 static int nflg = FALSE;
137 static int pflg = FALSE;
138 static char *pathname = NULL;
139 static int sflg = FALSE;
140 static int tflg = FALSE; /* Undocumented option to extract dcgettext */
141 static int xflg = FALSE;
142 static char *exclude_file = NULL;
143
144 /*
145 * Each variable shows the current state of parsing input file.
146 *
147 * in_comment : Means inside comment block (C or C++).
148 * in_cplus_comment : Means inside C++ comment block.
149 * in_gettext : Means inside gettext call.
150 * in_dgettext : Means inside dgettext call.
151 * in_dcgettext : Means inside dcgettext call.
152 * in_textdomain : Means inside textdomain call.
153 * in_str : Means currently processing ANSI style string.
154 * in_quote : Means currently processing double quoted string.
155 * in_skippable_string : Means currently processing double quoted string,
156 * that occurs outside a call to gettext, dgettext,
157 * dcgettext, textdomain, with -a not specified.
158 * is_last_comment_line : Means the current line is the last line
159 * of the comment block. This is necessary because
160 * in_comment becomes FALSE when '* /' is encountered.
161 * is_first_comma_found : This is used only for dcgettext because dcgettext()
162 * requires 2 commas. So need to do different action
163 * depending on which commas encountered.
164 * num_nested_open_paren : This keeps track of the number of open parens to
165 * handle dcgettext ((const char *)0,"msg",LC_TIME);
166 */
167 static int in_comment = FALSE;
168 static int in_cplus_comment = FALSE;
169 static int in_gettext = FALSE;
170 static int in_dgettext = FALSE;
171 static int in_dcgettext = FALSE;
172 static int in_textdomain = FALSE;
173 static int in_str = FALSE;
174 static int in_quote = FALSE;
175 static int is_last_comment_line = FALSE;
176 static int is_first_comma_found = FALSE;
177 static int in_skippable_string = FALSE;
178 static int num_nested_open_paren = 0;
179
180 /*
181 * This variable contains the first line of gettext(), dgettext(), or
182 * textdomain() calls.
183 * This is necessary for multiple lines of a single call to store
184 * the starting line.
185 */
186 static int linenum_saved = 0;
187
188 int stdin_only = FALSE; /* Read input from stdin */
189
190 /*
191 * curr_file : Contains current file name processed.
192 * curr_domain : Contains the current domain for each dgettext().
193 * This is NULL for gettext().
194 * curr_line : Contains the current line processed.
195 * qstring_buf : Contains the double quoted string processed.
196 * curr_linenum : Line number being processed in the current input file.
197 * warn_linenum : Line number of current warning message.
198 */
199 char curr_file[MAX_PATH_LEN];
200 static char curr_domain[MAX_DOMAIN_LEN];
201 static char curr_line[MAX_STRING_LEN];
202 static char qstring_buf[MAX_STRING_LEN];
203 int curr_linenum = 1;
204 int warn_linenum = 0;
205
206 /*
207 * strhead : This list contains ANSI style string.
208 * Each node contains double quoted string.
209 * strtail : This is the tail of strhead.
210 * commhead : This list contains comments string.
211 * Each node contains one line of comment.
212 * commtail : This is the tail of commhead.
213 */
214 static struct strlist_st *strhead = NULL;
215 static struct strlist_st *strtail = NULL;
216 static struct strlist_st *commhead = NULL;
217 static struct strlist_st *commtail = NULL;
218
219 /*
220 * gargc : Same as argc. Used to pass argc to lex routine.
221 * gargv : Same as argv. Used to pass argc to lex routine.
222 */
223 int gargc;
224 char **gargv;
225
226 static void add_line_to_comment(void);
227 static void add_qstring_to_str(void);
228 static void add_str_to_element_list(int, char *);
229 static void copy_strlist_to_str(char *, struct strlist_st *);
230 static void end_ansi_string(void);
231 static void free_strlist(struct strlist_st *);
232 void handle_newline(void);
233 static void initialize_globals(void);
234 static void output_comment(FILE *, struct strlist_st *);
235 static void output_msgid(FILE *, struct strlist_st *, int);
236 static void output_textdomain(FILE *, struct element_st *);
237 static void print_help(void);
238 static void read_exclude_file(void);
239 static void trim_line(char *);
240 static void write_all_files(void);
241 static void write_one_file(struct domain_st *);
242
243 static void lstrcat(char *, const char *);
244
245 /*
246 * Utility functions to malloc a node and initialize fields.
247 */
248 static struct domain_st *new_domain(void);
249 static struct strlist_st *new_strlist(void);
250 static struct element_st *new_element(void);
251 static struct exclude_st *new_exclude(void);
252
253 /*
254 * Main program of xgettext.
255 */
256 int
main(int argc,char ** argv)257 main(int argc, char **argv)
258 {
259 int opterr = FALSE;
260 int c;
261
262 initialize_globals();
263
264 while ((c = getopt(argc, argv, "jhax:nsc:d:m:M:p:t")) != EOF) {
265 switch (c) {
266 case 'a':
267 aflg = TRUE;
268 break;
269 case 'c':
270 cflg = TRUE;
271 comment_tag = optarg;
272 break;
273 case 'd':
274 default_domain = optarg;
275 break;
276 case 'h':
277 hflg = TRUE;
278 break;
279 case 'j':
280 jflg = TRUE;
281 break;
282 case 'M':
283 Mflg = TRUE;
284 suffix = optarg;
285 break;
286 case 'm':
287 mflg = TRUE;
288 prefix = optarg;
289 break;
290 case 'n':
291 nflg = TRUE;
292 break;
293 case 'p':
294 pflg = TRUE;
295 pathname = optarg;
296 break;
297 case 's':
298 sflg = TRUE;
299 break;
300 case 't':
301 tflg = TRUE;
302 break;
303 case 'x':
304 xflg = TRUE;
305 exclude_file = optarg;
306 break;
307 case '?':
308 opterr = TRUE;
309 break;
310 }
311 }
312
313 /* if -h is used, ignore all other options. */
314 if (hflg == TRUE) {
315 (void) fprintf(stderr, USAGE);
316 print_help();
317 exit(0);
318 }
319
320 /* -x can be used only with -a */
321 if ((xflg == TRUE) && (aflg == FALSE))
322 opterr = TRUE;
323
324 /* -j cannot be used with -a */
325 if ((jflg == TRUE) && (aflg == TRUE)) {
326 (void) fprintf(stderr,
327 "-a and -j options cannot be used together.\n");
328 opterr = TRUE;
329 }
330
331 /* -j cannot be used with -s */
332 if ((jflg == TRUE) && (sflg == TRUE)) {
333 (void) fprintf(stderr,
334 "-j and -s options cannot be used together.\n");
335 opterr = TRUE;
336 }
337
338 if (opterr == TRUE) {
339 (void) fprintf(stderr, USAGE);
340 exit(2);
341 }
342
343 /* error, if no files are specified. */
344 if (optind == argc) {
345 (void) fprintf(stderr, USAGE);
346 exit(2);
347 }
348
349 if (xflg == TRUE) {
350 read_exclude_file();
351 }
352
353 /* If files are -, then read from stdin */
354 if (argv[optind][0] == '-') {
355 stdin_only = TRUE;
356 optind++;
357 } else {
358 stdin_only = FALSE;
359 }
360
361 /* Store argc and argv to pass to yylex() */
362 gargc = argc;
363 gargv = argv;
364
365 #ifdef DEBUG
366 (void) printf("optind=%d\n", optind);
367 {
368 int i = optind;
369 for (; i < argc; i++) {
370 (void) printf(" %d, <%s>\n", i, argv[i]);
371 }
372 }
373 #endif
374
375 if (stdin_only == FALSE) {
376 if (freopen(argv[optind], "r", stdin) == NULL) {
377 (void) fprintf(stderr,
378 "ERROR, can't open input file: %s\n", argv[optind]);
379 exit(2);
380 }
381 (void) strcpy(curr_file, gargv[optind]);
382 optind++;
383 }
384
385 /*
386 * Process input.
387 */
388 (void) yylex();
389
390 #ifdef DEBUG
391 printf("\n======= default_domain ========\n");
392 print_one_domain(def_dom);
393 printf("======= domain list ========\n");
394 print_all_domain(dom_head);
395 #endif
396
397 /*
398 * Write out all .po files.
399 */
400 write_all_files();
401
402 return (0);
403 } /* main */
404
405 /*
406 * Prints help information for each option.
407 */
408 static void
print_help(void)409 print_help(void)
410 {
411 (void) fprintf(stderr, "\n");
412 (void) fprintf(stderr,
413 "-a\t\t\tfind ALL strings\n");
414 (void) fprintf(stderr,
415 "-c <comment-tag>\tget comments containing <flag>\n");
416 (void) fprintf(stderr,
417 "-d <default-domain>\tuse <default-domain> for default domain\n");
418 (void) fprintf(stderr,
419 "-h\t\t\tHelp\n");
420 (void) fprintf(stderr,
421 "-j\t\t\tupdate existing file with the current result\n");
422 (void) fprintf(stderr,
423 "-M <suffix>\t\tfill in msgstr with msgid<suffix>\n");
424 (void) fprintf(stderr,
425 "-m <prefix>\t\tfill in msgstr with <prefix>msgid\n");
426 (void) fprintf(stderr,
427 "-n\t\t\tline# file name and line number info in output\n");
428 (void) fprintf(stderr,
429 "-p <pathname>\t\tuse <pathname> for output file directory\n");
430 (void) fprintf(stderr,
431 "-s\t\t\tgenerate sorted output files\n");
432 (void) fprintf(stderr,
433 "-x <exclude-file>\texclude strings in file <exclude-file> from output\n");
434 (void) fprintf(stderr,
435 "-\t\t\tread stdin, use as a filter (input only)\n");
436 } /* print_help */
437
438 /*
439 * Extract file name and line number information from macro line
440 * and set the global variable accordingly.
441 * The valid line format is
442 * 1) # nnn
443 * or
444 * 2) # nnn "xxxxx"
445 * where nnn is line number and xxxxx is file name.
446 */
447 static void
extract_filename_linenumber(char * mline)448 extract_filename_linenumber(char *mline)
449 {
450 int num;
451 char *p, *q, *r;
452
453 /*
454 * mline can contain multi newline.
455 * line number should be increased by the number of newlines.
456 */
457 p = mline;
458 while ((p = strchr(p, '\n')) != NULL) {
459 p++;
460 curr_linenum++;
461 }
462 p = strchr(mline, ' ');
463 if (p == NULL)
464 return;
465 q = strchr(++p, ' ');
466 if (q == NULL) {
467 /* case 1 */
468 if ((num = atoi(p)) > 0) {
469 curr_linenum = num;
470 return;
471 }
472 } else {
473 /* case 2 */
474 *q++ = 0;
475 if (*q == '"') {
476 q++;
477 r = strchr(q, '"');
478 if (r == NULL) {
479 return;
480 }
481 *r = 0;
482 if ((num = atoi(p)) > 0) {
483 curr_linenum = num;
484 (void) strcpy(curr_file, q);
485 }
486 }
487 }
488 } /* extract_filename_linenumber */
489
490 /*
491 * Handler for MACRO line which starts with #.
492 */
493 void
handle_macro_line(void)494 handle_macro_line(void)
495 {
496 #ifdef DEBUG
497 (void) printf("Macro line=<%s>\n", yytext);
498 #endif
499 if (cflg == TRUE)
500 lstrcat(curr_line, yytext);
501
502 if (in_quote == TRUE) {
503 lstrcat(qstring_buf, yytext);
504 } else if (in_comment == FALSE) {
505 extract_filename_linenumber(yytext);
506 }
507
508 curr_linenum--;
509 handle_newline();
510 } /* handle_macro_line */
511
512 /*
513 * Handler for C++ comments which starts with //.
514 */
515 void
handle_cplus_comment_line(void)516 handle_cplus_comment_line(void)
517 {
518 if (cflg == TRUE)
519 lstrcat(curr_line, yytext);
520
521 if (in_quote == TRUE) {
522 lstrcat(qstring_buf, yytext);
523 } else if ((in_comment == FALSE) &&
524 (in_skippable_string == FALSE)) {
525
526 /*
527 * If already in c comments, don't do anything.
528 * Set both flags to TRUE here.
529 * Both flags will be set to FALSE when newline
530 * encounters.
531 */
532 in_cplus_comment = TRUE;
533 in_comment = TRUE;
534 }
535 } /* handle_cplus_comment_line */
536
537 /*
538 * Handler for the comment start (slash asterisk) in input file.
539 */
540 void
handle_open_comment(void)541 handle_open_comment(void)
542 {
543 if (cflg == TRUE)
544 lstrcat(curr_line, yytext);
545
546 if (in_quote == TRUE) {
547 lstrcat(qstring_buf, yytext);
548 } else if ((in_comment == FALSE) &&
549 (in_skippable_string == FALSE)) {
550
551 in_comment = TRUE;
552 is_last_comment_line = FALSE;
553 /*
554 * If there is any comment extracted before accidently,
555 * clean it up and start the new comment again.
556 */
557 free_strlist(commhead);
558 commhead = commtail = NULL;
559 }
560 }
561
562 /*
563 * Handler for the comment end (asterisk slash) in input file.
564 */
565 void
handle_close_comment(void)566 handle_close_comment(void)
567 {
568 if (cflg == TRUE)
569 lstrcat(curr_line, yytext);
570
571 if (in_quote == TRUE) {
572 lstrcat(qstring_buf, yytext);
573 } else if (in_skippable_string == FALSE) {
574 in_comment = FALSE;
575 is_last_comment_line = TRUE;
576 }
577 }
578
579 /*
580 * Handler for "gettext" in input file.
581 */
582 void
handle_gettext(void)583 handle_gettext(void)
584 {
585 /*
586 * If -t option is specified to extrct dcgettext,
587 * don't do anything for gettext().
588 */
589 if (tflg == TRUE) {
590 return;
591 }
592
593 num_nested_open_paren = 0;
594
595 if (cflg == TRUE)
596 lstrcat(curr_line, yytext);
597
598 if (in_quote == TRUE) {
599 lstrcat(qstring_buf, yytext);
600 } else if (in_comment == FALSE) {
601 in_gettext = TRUE;
602 linenum_saved = curr_linenum;
603 /*
604 * gettext will be put into default domain .po file
605 * curr_domain does not change for gettext.
606 */
607 curr_domain[0] = NULL;
608 }
609 } /* handle_gettext */
610
611 /*
612 * Handler for "dgettext" in input file.
613 */
614 void
handle_dgettext(void)615 handle_dgettext(void)
616 {
617 /*
618 * If -t option is specified to extrct dcgettext,
619 * don't do anything for dgettext().
620 */
621 if (tflg == TRUE) {
622 return;
623 }
624
625 num_nested_open_paren = 0;
626
627 if (cflg == TRUE)
628 lstrcat(curr_line, yytext);
629
630 if (in_quote == TRUE) {
631 lstrcat(qstring_buf, yytext);
632 } else if (in_comment == FALSE) {
633 in_dgettext = TRUE;
634 linenum_saved = curr_linenum;
635 /*
636 * dgettext will be put into domain file specified.
637 * curr_domain will follow.
638 */
639 curr_domain[0] = NULL;
640 }
641 } /* handle_dgettext */
642
643 /*
644 * Handler for "dcgettext" in input file.
645 */
646 void
handle_dcgettext(void)647 handle_dcgettext(void)
648 {
649 /*
650 * dcgettext will be extracted only when -t flag is specified.
651 */
652 if (tflg == FALSE) {
653 return;
654 }
655
656 num_nested_open_paren = 0;
657
658 is_first_comma_found = FALSE;
659
660 if (cflg == TRUE)
661 lstrcat(curr_line, yytext);
662
663 if (in_quote == TRUE) {
664 lstrcat(qstring_buf, yytext);
665 } else if (in_comment == FALSE) {
666 in_dcgettext = TRUE;
667 linenum_saved = curr_linenum;
668 /*
669 * dcgettext will be put into domain file specified.
670 * curr_domain will follow.
671 */
672 curr_domain[0] = NULL;
673 }
674 } /* handle_dcgettext */
675
676 /*
677 * Handler for "textdomain" in input file.
678 */
679 void
handle_textdomain(void)680 handle_textdomain(void)
681 {
682 if (cflg == TRUE)
683 lstrcat(curr_line, yytext);
684
685 if (in_quote == TRUE) {
686 lstrcat(qstring_buf, yytext);
687 } else if (in_comment == FALSE) {
688 in_textdomain = TRUE;
689 linenum_saved = curr_linenum;
690 curr_domain[0] = NULL;
691 }
692 } /* handle_textdomain */
693
694 /*
695 * Handler for '(' in input file.
696 */
697 void
handle_open_paren(void)698 handle_open_paren(void)
699 {
700 if (cflg == TRUE)
701 lstrcat(curr_line, yytext);
702
703 if (in_quote == TRUE) {
704 lstrcat(qstring_buf, yytext);
705 } else if (in_comment == FALSE) {
706 if ((in_gettext == TRUE) ||
707 (in_dgettext == TRUE) ||
708 (in_dcgettext == TRUE) ||
709 (in_textdomain == TRUE)) {
710 in_str = TRUE;
711 num_nested_open_paren++;
712 }
713 }
714 } /* handle_open_paren */
715
716 /*
717 * Handler for ')' in input file.
718 */
719 void
handle_close_paren(void)720 handle_close_paren(void)
721 {
722 if (cflg == TRUE)
723 lstrcat(curr_line, yytext);
724
725 if (in_quote == TRUE) {
726 lstrcat(qstring_buf, yytext);
727 } else if (in_comment == FALSE) {
728 if ((in_gettext == TRUE) ||
729 (in_dgettext == TRUE) ||
730 (in_dcgettext == TRUE) ||
731 (in_textdomain == TRUE)) {
732 /*
733 * If this is not the matching close paren with
734 * the first open paren, no action is necessary.
735 */
736 if (--num_nested_open_paren > 0)
737 return;
738 add_str_to_element_list(in_textdomain, curr_domain);
739 in_str = FALSE;
740 in_gettext = FALSE;
741 in_dgettext = FALSE;
742 in_dcgettext = FALSE;
743 in_textdomain = FALSE;
744 } else if (aflg == TRUE) {
745 end_ansi_string();
746 }
747 }
748 } /* handle_close_paren */
749
750 /*
751 * Handler for '\\n' in input file.
752 *
753 * This is a '\' followed by new line.
754 * This can be treated like a new line except when this is a continuation
755 * of a ANSI-C string.
756 * If this is a part of ANSI string, treat the current line as a double
757 * quoted string and the next line is the start of the double quoted
758 * string.
759 */
760 void
handle_esc_newline(void)761 handle_esc_newline(void)
762 {
763 if (cflg == TRUE)
764 lstrcat(curr_line, "\\");
765
766 curr_linenum++;
767
768 if (in_quote == TRUE) {
769 add_qstring_to_str();
770 } else if ((in_comment == TRUE) ||
771 (is_last_comment_line == TRUE)) {
772 if (in_cplus_comment == FALSE) {
773 add_line_to_comment();
774 }
775 }
776
777 curr_line[0] = NULL;
778 } /* handle_esc_newline */
779
780 /*
781 * Handler for '"' in input file.
782 */
783 void
handle_quote(void)784 handle_quote(void)
785 {
786 if (cflg == TRUE)
787 lstrcat(curr_line, yytext);
788
789 if (in_comment == TRUE) {
790 /*EMPTY*/
791 } else if ((in_gettext == TRUE) ||
792 (in_dgettext == TRUE) ||
793 (in_dcgettext == TRUE) ||
794 (in_textdomain == TRUE)) {
795 if (in_str == TRUE) {
796 if (in_quote == FALSE) {
797 in_quote = TRUE;
798 } else {
799 add_qstring_to_str();
800 in_quote = FALSE;
801 }
802 }
803 } else if (aflg == TRUE) {
804 /*
805 * The quote is found outside of gettext, dgetext, and
806 * textdomain. Everytime a quoted string is found,
807 * add it to the string list.
808 * in_str stays TRUE until ANSI string ends.
809 */
810 if (in_str == TRUE) {
811 if (in_quote == TRUE) {
812 in_quote = FALSE;
813 add_qstring_to_str();
814 } else {
815 in_quote = TRUE;
816 }
817 } else {
818 in_str = TRUE;
819 in_quote = TRUE;
820 linenum_saved = curr_linenum;
821 }
822 } else {
823 in_skippable_string = (in_skippable_string == TRUE) ?
824 FALSE : TRUE;
825 }
826 } /* handle_quote */
827
828 /*
829 * Handler for ' ' or TAB in input file.
830 */
831 void
handle_spaces(void)832 handle_spaces(void)
833 {
834 if (cflg == TRUE)
835 lstrcat(curr_line, yytext);
836
837 if (in_quote == TRUE) {
838 lstrcat(qstring_buf, yytext);
839 }
840 } /* handle_spaces */
841
842 /*
843 * Flattens a linked list containing ANSI string to the one string.
844 */
845 static void
copy_strlist_to_str(char * str,struct strlist_st * strlist)846 copy_strlist_to_str(char *str, struct strlist_st *strlist)
847 {
848 struct strlist_st *p;
849
850 str[0] = NULL;
851
852 if (strlist != NULL) {
853 p = strlist;
854 while (p != NULL) {
855 if (p->str != NULL) {
856 lstrcat(str, p->str);
857 }
858 p = p->next;
859 }
860 }
861 } /* copy_strlist_to_str */
862
863 /*
864 * Handler for ',' in input file.
865 */
866 void
handle_comma(void)867 handle_comma(void)
868 {
869 if (cflg == TRUE)
870 lstrcat(curr_line, yytext);
871
872 if (in_quote == TRUE) {
873 lstrcat(qstring_buf, yytext);
874 } else if (in_comment == FALSE) {
875 if (in_str == TRUE) {
876 if (in_dgettext == TRUE) {
877 copy_strlist_to_str(curr_domain, strhead);
878 free_strlist(strhead);
879 strhead = strtail = NULL;
880 } else if (in_dcgettext == TRUE) {
881 /*
882 * Ignore the second comma.
883 */
884 if (is_first_comma_found == FALSE) {
885 copy_strlist_to_str(curr_domain,
886 strhead);
887 free_strlist(strhead);
888 strhead = strtail = NULL;
889 is_first_comma_found = TRUE;
890 }
891 } else if (aflg == TRUE) {
892 end_ansi_string();
893 }
894 }
895 }
896 } /* handle_comma */
897
898 /*
899 * Handler for any other character that does not have special handler.
900 */
901 void
handle_character(void)902 handle_character(void)
903 {
904 if (cflg == TRUE)
905 lstrcat(curr_line, yytext);
906
907 if (in_quote == TRUE) {
908 lstrcat(qstring_buf, yytext);
909 } else if (in_comment == FALSE) {
910 if (in_str == TRUE) {
911 if (aflg == TRUE) {
912 end_ansi_string();
913 }
914 }
915 }
916 } /* handle_character */
917
918 /*
919 * Handler for new line in input file.
920 */
921 void
handle_newline(void)922 handle_newline(void)
923 {
924 curr_linenum++;
925
926 /*
927 * in_quote is always FALSE here for ANSI-C code.
928 */
929 if ((in_comment == TRUE) ||
930 (is_last_comment_line == TRUE)) {
931 if (in_cplus_comment == TRUE) {
932 in_cplus_comment = FALSE;
933 in_comment = FALSE;
934 } else {
935 add_line_to_comment();
936 }
937 }
938
939 curr_line[0] = NULL;
940 /*
941 * C++ comment always ends with new line.
942 */
943 } /* handle_newline */
944
945 /*
946 * Process ANSI string.
947 */
948 static void
end_ansi_string(void)949 end_ansi_string(void)
950 {
951 if ((aflg == TRUE) &&
952 (in_str == TRUE) &&
953 (in_gettext == FALSE) &&
954 (in_dgettext == FALSE) &&
955 (in_dcgettext == FALSE) &&
956 (in_textdomain == FALSE)) {
957 add_str_to_element_list(FALSE, curr_domain);
958 in_str = FALSE;
959 }
960 } /* end_ansi_string */
961
962 /*
963 * Initialize global variables if necessary.
964 */
965 static void
initialize_globals(void)966 initialize_globals(void)
967 {
968 default_domain = strdup(DEFAULT_DOMAIN);
969 curr_domain[0] = NULL;
970 curr_file[0] = NULL;
971 qstring_buf[0] = NULL;
972 } /* initialize_globals() */
973
974 /*
975 * Extract only string part when read a exclude file by removing
976 * keywords (e.g. msgid, msgstr, # ) and heading and trailing blanks and
977 * double quotes.
978 */
979 static void
trim_line(char * line)980 trim_line(char *line)
981 {
982 int i, p, len;
983 int first = 0;
984 int last = 0;
985 char c;
986
987 len = strlen(line);
988
989 /*
990 * Find the position of the last non-whitespace character.
991 */
992 i = len - 1;
993 /*CONSTCOND*/
994 while (1) {
995 c = line[i--];
996 if ((c != ' ') && (c != '\n') && (c != '\t')) {
997 last = ++i;
998 break;
999 }
1000 }
1001
1002 /*
1003 * Find the position of the first non-whitespace character
1004 * by skipping "msgid" initially.
1005 */
1006 if (strncmp("msgid ", line, 6) == 0) {
1007 i = 5;
1008 } else if (strncmp("msgstr ", line, 7) == 0) {
1009 i = 6;
1010 } else if (strncmp("# ", line, 2) == 0) {
1011 i = 2;
1012 } else {
1013 i = 0;
1014 }
1015
1016 /*CONSTCOND*/
1017 while (1) {
1018 c = line[i++];
1019 if ((c != ' ') && (c != '\n') && (c != '\t')) {
1020 first = --i;
1021 break;
1022 }
1023 }
1024
1025 /*
1026 * For Backward compatibility, we consider both double quoted
1027 * string and non-quoted string.
1028 * The double quote is removed before being stored if exists.
1029 */
1030 if (line[first] == '"') {
1031 first++;
1032 }
1033 if (line[last] == '"') {
1034 last--;
1035 }
1036
1037 /*
1038 * Now copy the valid part of the string.
1039 */
1040 p = first;
1041 for (i = 0; i <= (last-first); i++) {
1042 line[i] = line[p++];
1043 }
1044 line [i] = NULL;
1045 } /* trim_line */
1046
1047 /*
1048 * Read exclude file and stores it in the global linked list.
1049 */
1050 static void
read_exclude_file(void)1051 read_exclude_file(void)
1052 {
1053 FILE *fp;
1054 struct exclude_st *tmp_excl;
1055 struct strlist_st *tail;
1056 int ignore_line;
1057 char line [MAX_STRING_LEN];
1058
1059 if ((fp = fopen(exclude_file, "r")) == NULL) {
1060 (void) fprintf(stderr, "ERROR, can't open exclude file: %s\n",
1061 exclude_file);
1062 exit(2);
1063 }
1064
1065 ignore_line = TRUE;
1066 while (fgets(line, MAX_STRING_LEN, fp) != NULL) {
1067 /*
1068 * Line starting with # is a comment line and ignored.
1069 * Blank line is ignored, too.
1070 */
1071 if ((line[0] == '\n') || (line[0] == '#')) {
1072 continue;
1073 } else if (strncmp(line, "msgstr", 6) == 0) {
1074 ignore_line = TRUE;
1075 } else if (strncmp(line, "domain", 6) == 0) {
1076 ignore_line = TRUE;
1077 } else if (strncmp(line, "msgid", 5) == 0) {
1078 ignore_line = FALSE;
1079 tmp_excl = new_exclude();
1080 tmp_excl->exstr = new_strlist();
1081 trim_line(line);
1082 tmp_excl->exstr->str = strdup(line);
1083 tail = tmp_excl->exstr;
1084 /*
1085 * Prepend new exclude string node to the list.
1086 */
1087 tmp_excl->next = excl_head;
1088 excl_head = tmp_excl;
1089 } else {
1090 /*
1091 * If more than one line of string forms msgid,
1092 * append it to the string linked list.
1093 */
1094 if (ignore_line == FALSE) {
1095 trim_line(line);
1096 tail->next = new_strlist();
1097 tail->next->str = strdup(line);
1098 tail = tail->next;
1099 }
1100 }
1101 } /* while */
1102
1103 #ifdef DEBUG
1104 tmp_excl = excl_head;
1105 while (tmp_excl != NULL) {
1106 printf("============================\n");
1107 tail = tmp_excl->exstr;
1108 while (tail != NULL) {
1109 printf("%s###\n", tail->str);
1110 tail = tail->next;
1111 }
1112 tmp_excl = tmp_excl->next;
1113 }
1114 #endif
1115 } /* read_exclude_file */
1116
1117 /*
1118 * Get next character from the string list containing ANSI style string.
1119 * This function returns three valus. (p, *m, *c).
1120 * p is returned by return value and, *m and *c are returned by changing
1121 * values in the location pointed.
1122 *
1123 * p : points node in the linked list for ANSI string.
1124 * Each node contains double quoted string.
1125 * m : The location of the next characters in the double quoted string
1126 * as integer index in the string.
1127 * When it gets to end of quoted string, the next node will be
1128 * read and m starts as zero for every new node.
1129 * c : Stores the value of the characterto be returned.
1130 */
1131 static struct strlist_st *
get_next_ch(struct strlist_st * p,int * m,char * c)1132 get_next_ch(struct strlist_st *p, int *m, char *c)
1133 {
1134 char ch, oct, hex;
1135 int value, i;
1136
1137 /*
1138 * From the string list, find non-null string first.
1139 */
1140
1141 /*CONSTCOND*/
1142 while (1) {
1143 if (p == NULL) {
1144 break;
1145 } else if (p->str == NULL) {
1146 p = p->next;
1147 } else if (p->str[*m] == NULL) {
1148 p = p->next;
1149 *m = 0;
1150 } else {
1151 break;
1152 }
1153 }
1154
1155 /*
1156 * No more character is available.
1157 */
1158 if (p == NULL) {
1159 *c = 0;
1160 return (NULL);
1161 }
1162
1163 /*
1164 * Check if the character back slash.
1165 * If yes, ANSI defined escape sequence rule is used.
1166 */
1167 if (p->str[*m] != '\\') {
1168 *c = p->str[*m];
1169 *m = *m + 1;
1170 return (p);
1171 } else {
1172 /*
1173 * Get next character after '\'.
1174 */
1175 *m = *m + 1;
1176 ch = p->str[*m];
1177 switch (ch) {
1178 case 'a':
1179 *c = '\a';
1180 break;
1181 case 'b':
1182 *c = '\b';
1183 break;
1184 case 'f':
1185 *c = '\f';
1186 break;
1187 case 'n':
1188 *c = '\n';
1189 break;
1190 case 'r':
1191 *c = '\r';
1192 break;
1193 case 't':
1194 *c = '\t';
1195 break;
1196 case 'v':
1197 *c = '\v';
1198 break;
1199 case '0':
1200 case '1':
1201 case '2':
1202 case '3':
1203 case '4':
1204 case '5':
1205 case '6':
1206 case '7':
1207 /*
1208 * Get maximum of three octal digits.
1209 */
1210 value = ch;
1211 for (i = 0; i < 2; i++) {
1212 *m = *m + 1;
1213 oct = p->str[*m];
1214 if ((oct >= '0') && (oct <= '7')) {
1215 value = value * 8 + (oct - '0');
1216 } else {
1217 *m = *m - 1;
1218 break;
1219 }
1220 }
1221 *c = value;
1222 #ifdef DEBUG
1223 /* (void) fprintf(stderr, "octal=%d\n", value); */
1224 #endif
1225 break;
1226 case 'x':
1227 value = 0;
1228 /*
1229 * Remove all heading zeros first and
1230 * get one or two valuid hexadecimal charaters.
1231 */
1232 *m = *m + 1;
1233 while (p->str[*m] == '0') {
1234 *m = *m + 1;
1235 }
1236 value = 0;
1237 for (i = 0; i < 2; i++) {
1238 hex = p->str[*m];
1239 *m = *m + 1;
1240 if (isdigit(hex)) {
1241 value = value * 16 + (hex - '0');
1242 } else if (isxdigit(hex)) {
1243 hex = tolower(hex);
1244 value = value * 16 + (hex - 'a' + 10);
1245 } else {
1246 *m = *m - 1;
1247 break;
1248 }
1249 }
1250 *c = value;
1251 #ifdef DEBUG
1252 (void) fprintf(stderr, "hex=%d\n", value);
1253 #endif
1254 *m = *m - 1;
1255 break;
1256 default :
1257 /*
1258 * Undefined by ANSI.
1259 * Just ignore "\".
1260 */
1261 *c = p->str[*m];
1262 break;
1263 }
1264 /*
1265 * Advance pointer to point the next character to be parsed.
1266 */
1267 *m = *m + 1;
1268 return (p);
1269 }
1270 } /* get_next_ch */
1271
1272 /*
1273 * Compares two msgids.
1274 * Comparison is done by values, not by characters represented.
1275 * For example, '\t', '\011' and '0x9' are identical values.
1276 * Return values are same as in strcmp.
1277 * 1 if msgid1 > msgid2
1278 * 0 if msgid1 = msgid2
1279 * -1 if msgid1 < msgid2
1280 */
1281 static int
msgidcmp(struct strlist_st * id1,struct strlist_st * id2)1282 msgidcmp(struct strlist_st *id1, struct strlist_st *id2)
1283 {
1284 char c1, c2;
1285 int m1, m2;
1286
1287 m1 = 0;
1288 m2 = 0;
1289
1290 /*CONSTCOND*/
1291 while (1) {
1292 id1 = get_next_ch(id1, &m1, &c1);
1293 id2 = get_next_ch(id2, &m2, &c2);
1294
1295 if ((c1 == 0) && (c2 == 0)) {
1296 return (0);
1297 }
1298
1299 if (c1 > c2) {
1300 return (1);
1301 } else if (c1 < c2) {
1302 return (-1);
1303 }
1304 }
1305 /*NOTREACHED*/
1306 } /* msgidcmp */
1307
1308 /*
1309 * Check if a ANSI string (which is a linked list itself) is a duplicate
1310 * of any string in the list of ANSI string.
1311 */
1312 static int
isduplicate(struct element_st * list,struct strlist_st * str)1313 isduplicate(struct element_st *list, struct strlist_st *str)
1314 {
1315 struct element_st *p;
1316
1317 if (list == NULL) {
1318 return (FALSE);
1319 }
1320
1321 p = list;
1322 while (p != NULL) {
1323 if (p->msgid != NULL) {
1324 if (msgidcmp(p->msgid, str) == 0) {
1325 return (TRUE);
1326 }
1327 }
1328 p = p->next;
1329 }
1330
1331 return (FALSE);
1332 } /* isduplicate */
1333
1334 /*
1335 * Extract a comment line and add to the linked list containing
1336 * comment block.
1337 * Each comment line is stored in the node.
1338 */
1339 static void
add_line_to_comment(void)1340 add_line_to_comment(void)
1341 {
1342 struct strlist_st *tmp_str;
1343
1344 tmp_str = new_strlist();
1345 tmp_str->str = strdup(curr_line);
1346 tmp_str->next = NULL;
1347
1348 if (commhead == NULL) {
1349 /* Empty comment list */
1350 commhead = tmp_str;
1351 commtail = tmp_str;
1352 } else {
1353 /* append it to the list */
1354 commtail->next = tmp_str;
1355 commtail = commtail->next;
1356 }
1357
1358 is_last_comment_line = FALSE;
1359 } /* add_line_to_comment */
1360
1361 /*
1362 * Add a double quoted string to the linked list containing ANSI string.
1363 */
1364 static void
add_qstring_to_str(void)1365 add_qstring_to_str(void)
1366 {
1367 struct strlist_st *tmp_str;
1368
1369 tmp_str = new_strlist();
1370 tmp_str->str = strdup(qstring_buf);
1371 tmp_str->next = NULL;
1372
1373 if (strhead == NULL) {
1374 /* Null ANSI string */
1375 strhead = tmp_str;
1376 strtail = tmp_str;
1377 } else {
1378 /* Append it to the ANSI string linked list */
1379 strtail->next = tmp_str;
1380 strtail = strtail->next;
1381 }
1382
1383 qstring_buf[0] = NULL;
1384 } /* add_qstring_to_str */
1385
1386 /*
1387 * Finds the head of domain nodes given domain name.
1388 */
1389 static struct domain_st *
find_domain_node(char * dname)1390 find_domain_node(char *dname)
1391 {
1392 struct domain_st *tmp_dom, *p;
1393
1394 /*
1395 * If -a option is specified everything will be written to the
1396 * default domain file.
1397 */
1398 if (aflg == TRUE) {
1399 if (def_dom == NULL) {
1400 def_dom = new_domain();
1401 }
1402 return (def_dom);
1403 }
1404
1405 if ((dname == NULL) ||
1406 (dname[0] == NULL) ||
1407 (strcmp(dname, default_domain) == 0)) {
1408 if (def_dom == NULL) {
1409 def_dom = new_domain();
1410 }
1411 if (strcmp(dname, default_domain) == 0) {
1412 (void) fprintf(stderr,
1413 "%s \"%s\" is used in dgettext of file:%s line:%d.\n",
1414 "Warning: default domain name",
1415 default_domain, curr_file, curr_linenum);
1416 }
1417 return (def_dom);
1418 } else {
1419 p = dom_head;
1420 while (p != NULL) {
1421 if (strcmp(p->dname, dname) == 0) {
1422 return (p);
1423 }
1424 p = p->next;
1425 }
1426
1427 tmp_dom = new_domain();
1428 tmp_dom->dname = strdup(dname);
1429
1430 if (dom_head == NULL) {
1431 dom_head = tmp_dom;
1432 dom_tail = tmp_dom;
1433 } else {
1434 dom_tail->next = tmp_dom;
1435 dom_tail = dom_tail->next;
1436 }
1437 return (tmp_dom);
1438 }
1439 } /* find_domain_node */
1440
1441 /*
1442 * Frees the ANSI string linked list.
1443 */
1444 static void
free_strlist(struct strlist_st * ptr)1445 free_strlist(struct strlist_st *ptr)
1446 {
1447 struct strlist_st *p;
1448
1449 p = ptr;
1450 ptr = NULL;
1451 while (p != NULL) {
1452 ptr = p->next;
1453 free(p->str);
1454 free(p);
1455 p = ptr;
1456 }
1457 } /* free_strlist */
1458
1459 /*
1460 * Finds if a ANSI string is contained in the exclude file.
1461 */
1462 static int
isexcluded(struct strlist_st * strlist)1463 isexcluded(struct strlist_st *strlist)
1464 {
1465 struct exclude_st *p;
1466
1467 p = excl_head;
1468 while (p != NULL) {
1469 if (msgidcmp(p->exstr, strlist) == 0) {
1470 return (TRUE);
1471 }
1472 p = p->next;
1473 }
1474 return (FALSE);
1475 } /* isexcluded */
1476
1477 /*
1478 * Finds if a comment block is to be extracted.
1479 *
1480 * When -c option is specified, find out if comment block contains
1481 * comment-tag as a token separated by blanks. If it does, this
1482 * comment block is associated with the next msgid encountered.
1483 * Comment block is a linked list where each node contains one line
1484 * of comments.
1485 */
1486 static int
isextracted(struct strlist_st * strlist)1487 isextracted(struct strlist_st *strlist)
1488 {
1489 struct strlist_st *p;
1490 char *first, *pc;
1491
1492
1493 p = strlist;
1494 while (p != NULL) {
1495 first = strdup(p->str);
1496 while ((first != NULL) && (first[0] != NULL)) {
1497 pc = first;
1498
1499 /*CONSTCOND*/
1500 while (1) {
1501 if (*pc == NULL) {
1502 break;
1503 } else if ((*pc == ' ') || (*pc == '\t')) {
1504 *pc++ = NULL;
1505 break;
1506 }
1507 pc++;
1508 }
1509 if (strcmp(first, comment_tag) == 0) {
1510 return (TRUE);
1511 }
1512 first = pc;
1513 }
1514 p = p->next;
1515 } /* while */
1516
1517 /*
1518 * Not found.
1519 */
1520 return (FALSE);
1521 } /* isextracted */
1522
1523 /*
1524 * Adds ANSI string to the domain element list.
1525 */
1526 static void
add_str_to_element_list(int istextdomain,char * domain_list)1527 add_str_to_element_list(int istextdomain, char *domain_list)
1528 {
1529 struct element_st *tmp_elem;
1530 struct element_st *p, *q;
1531 struct domain_st *tmp_dom;
1532 int result;
1533
1534 /*
1535 * This can happen if something like gettext(USAGE) is used
1536 * and it is impossible to get msgid for this gettext.
1537 * Since -x option should be used in this kind of cases,
1538 * it is OK not to catch msgid.
1539 */
1540 if (strhead == NULL) {
1541 return;
1542 }
1543
1544 /*
1545 * The global variable curr_domain contains either NULL
1546 * for default_domain or domain name for dgettext().
1547 */
1548 tmp_dom = find_domain_node(domain_list);
1549
1550 /*
1551 * If this msgid is in the exclude file,
1552 * then free the linked list and return.
1553 */
1554 if ((istextdomain == FALSE) &&
1555 (isexcluded(strhead) == TRUE)) {
1556 free_strlist(strhead);
1557 strhead = strtail = NULL;
1558 return;
1559 }
1560
1561 tmp_elem = new_element();
1562 tmp_elem->msgid = strhead;
1563 tmp_elem->istextdomain = istextdomain;
1564 /*
1565 * If -c option is specified and TAG matches,
1566 * then associate the comment to the next [d]gettext() calls
1567 * encountered in the source code.
1568 * textdomain() calls will not have any effect.
1569 */
1570 if (istextdomain == FALSE) {
1571 if ((cflg == TRUE) && (commhead != NULL)) {
1572 if (isextracted(commhead) == TRUE) {
1573 tmp_elem->comment = commhead;
1574 } else {
1575 free_strlist(commhead);
1576 }
1577 commhead = commtail = NULL;
1578 }
1579 }
1580
1581 tmp_elem->linenum = linenum_saved;
1582 tmp_elem->fname = strdup(curr_file);
1583
1584
1585 if (sflg == TRUE) {
1586 /*
1587 * If this is textdomain() call and -s option is specified,
1588 * append this node to the textdomain linked list.
1589 */
1590 if (istextdomain == TRUE) {
1591 if (tmp_dom->textdomain_head == NULL) {
1592 tmp_dom->textdomain_head = tmp_elem;
1593 tmp_dom->textdomain_tail = tmp_elem;
1594 } else {
1595 tmp_dom->textdomain_tail->next = tmp_elem;
1596 tmp_dom->textdomain_tail = tmp_elem;
1597 }
1598 strhead = strtail = NULL;
1599 return;
1600 }
1601
1602 /*
1603 * Insert the node to the properly sorted position.
1604 */
1605 q = NULL;
1606 p = tmp_dom->gettext_head;
1607 while (p != NULL) {
1608 result = msgidcmp(strhead, p->msgid);
1609 if (result == 0) {
1610 /*
1611 * Duplicate id. Do not store.
1612 */
1613 free_strlist(strhead);
1614 strhead = strtail = NULL;
1615 return;
1616 } else if (result > 0) {
1617 /* move to the next node */
1618 q = p;
1619 p = p->next;
1620 } else {
1621 tmp_elem->next = p;
1622 if (q != NULL) {
1623 q->next = tmp_elem;
1624 } else {
1625 tmp_dom->gettext_head = tmp_elem;
1626 }
1627 strhead = strtail = NULL;
1628 return;
1629 }
1630 } /* while */
1631
1632 /*
1633 * New msgid is the largest or empty list.
1634 */
1635 if (q != NULL) {
1636 /* largest case */
1637 q->next = tmp_elem;
1638 } else {
1639 /* empty list */
1640 tmp_dom->gettext_head = tmp_elem;
1641 }
1642 } else {
1643 /*
1644 * Check if this msgid is already in the same domain.
1645 */
1646 if (tmp_dom != NULL) {
1647 if (isduplicate(tmp_dom->gettext_head,
1648 tmp_elem->msgid) == TRUE) {
1649 tmp_elem->isduplicate = TRUE;
1650 }
1651 }
1652 /*
1653 * If -s option is not specified, then everything
1654 * is stored in gettext linked list.
1655 */
1656 if (tmp_dom->gettext_head == NULL) {
1657 tmp_dom->gettext_head = tmp_elem;
1658 tmp_dom->gettext_tail = tmp_elem;
1659 } else {
1660 tmp_dom->gettext_tail->next = tmp_elem;
1661 tmp_dom->gettext_tail = tmp_elem;
1662 }
1663 }
1664
1665 strhead = strtail = NULL;
1666 } /* add_str_to_element_list */
1667
1668 /*
1669 * Write all domain linked list to the files.
1670 */
1671 static void
write_all_files(void)1672 write_all_files(void)
1673 {
1674 struct domain_st *tmp;
1675
1676 /*
1677 * Write out default domain file.
1678 */
1679 write_one_file(def_dom);
1680
1681 /*
1682 * If dgettext() exists and -a option is not used,
1683 * then there are non-empty linked list.
1684 */
1685 tmp = dom_head;
1686 while (tmp != NULL) {
1687 write_one_file(tmp);
1688 tmp = tmp->next;
1689 }
1690 } /* write_all_files */
1691
1692 /*
1693 * add an element_st list to the linked list.
1694 */
1695 static void
add_node_to_polist(struct element_st ** pohead,struct element_st ** potail,struct element_st * elem)1696 add_node_to_polist(struct element_st **pohead,
1697 struct element_st **potail, struct element_st *elem)
1698 {
1699 if (elem == NULL) {
1700 return;
1701 }
1702
1703 if (*pohead == NULL) {
1704 *pohead = *potail = elem;
1705 } else {
1706 (*potail)->next = elem;
1707 *potail = (*potail)->next;
1708 }
1709 } /* add_node_to_polist */
1710
1711 #define INIT_STATE 0
1712 #define IN_MSGID 1
1713 #define IN_MSGSTR 2
1714 #define IN_COMMENT 3
1715 /*
1716 * Reads existing po file into the linked list and returns the head
1717 * of the linked list.
1718 */
1719 static struct element_st *
read_po(char * fname)1720 read_po(char *fname)
1721 {
1722 struct element_st *tmp_elem = NULL;
1723 struct element_st *ehead = NULL, *etail = NULL;
1724 struct strlist_st *comment_tail = NULL;
1725 struct strlist_st *msgid_tail = NULL;
1726 struct strlist_st *msgstr_tail = NULL;
1727 int state = INIT_STATE;
1728 char line [MAX_STRING_LEN];
1729 FILE *fp;
1730
1731 if ((fp = fopen(fname, "r")) == NULL) {
1732 return (NULL);
1733 }
1734
1735 while (fgets(line, MAX_STRING_LEN, fp) != NULL) {
1736 /*
1737 * Line starting with # is a comment line and ignored.
1738 * Blank line is ignored, too.
1739 */
1740 if (line[0] == '\n') {
1741 continue;
1742 } else if (line[0] == '#') {
1743 /*
1744 * If tmp_elem is not NULL, there is msgid pair
1745 * stored. Therefore, add it.
1746 */
1747 if ((tmp_elem != NULL) && (state == IN_MSGSTR)) {
1748 add_node_to_polist(&ehead, &etail, tmp_elem);
1749 }
1750
1751 if ((state == INIT_STATE) || (state == IN_MSGSTR)) {
1752 state = IN_COMMENT;
1753 tmp_elem = new_element();
1754 tmp_elem->comment = comment_tail =
1755 new_strlist();
1756 /*
1757 * remove new line and skip "# "
1758 * in the beginning of the existing
1759 * comment line.
1760 */
1761 line[strlen(line)-1] = 0;
1762 comment_tail->str = strdup(line+2);
1763 } else if (state == IN_COMMENT) {
1764 comment_tail->next = new_strlist();
1765 comment_tail = comment_tail->next;
1766 /*
1767 * remove new line and skip "# "
1768 * in the beginning of the existing
1769 * comment line.
1770 */
1771 line[strlen(line)-1] = 0;
1772 comment_tail->str = strdup(line+2);
1773 }
1774
1775 } else if (strncmp(line, "domain", 6) == 0) {
1776 /* ignore domain line */
1777 continue;
1778 } else if (strncmp(line, "msgid", 5) == 0) {
1779 if (state == IN_MSGSTR) {
1780 add_node_to_polist(&ehead, &etail, tmp_elem);
1781 tmp_elem = new_element();
1782 } else if (state == INIT_STATE) {
1783 tmp_elem = new_element();
1784 }
1785
1786 state = IN_MSGID;
1787 trim_line(line);
1788 tmp_elem->msgid = msgid_tail = new_strlist();
1789 msgid_tail->str = strdup(line);
1790
1791 } else if (strncmp(line, "msgstr", 6) == 0) {
1792 state = IN_MSGSTR;
1793 trim_line(line);
1794 tmp_elem->msgstr = msgstr_tail = new_strlist();
1795 msgstr_tail->str = strdup(line);
1796 } else {
1797 /*
1798 * If more than one line of string forms msgid,
1799 * append it to the string linked list.
1800 */
1801 if (state == IN_MSGID) {
1802 trim_line(line);
1803 msgid_tail->next = new_strlist();
1804 msgid_tail = msgid_tail->next;
1805 msgid_tail->str = strdup(line);
1806 } else if (state == IN_MSGSTR) {
1807 trim_line(line);
1808 msgstr_tail->next = new_strlist();
1809 msgstr_tail = msgstr_tail->next;
1810 msgstr_tail->str = strdup(line);
1811 }
1812 }
1813 } /* while */
1814
1815 /*
1816 * To insert the last msgid pair.
1817 */
1818 if (tmp_elem != NULL) {
1819 add_node_to_polist(&ehead, &etail, tmp_elem);
1820 }
1821
1822 #ifdef DEBUG
1823 {
1824 struct domain_st *tmp_domain = new_domain();
1825 char tmpstr[256];
1826
1827 sprintf(tmpstr, "existing_po file : <%s>", fname);
1828 tmp_domain->dname = strdup(tmpstr);
1829 tmp_domain->gettext_head = ehead;
1830 printf("======= existing po file <%s> ========\n", fname);
1831 print_one_domain(tmp_domain);
1832 }
1833 #endif /* DEBUG */
1834
1835 (void) fclose(fp);
1836 return (ehead);
1837 } /* read_po */
1838
1839 /*
1840 * This function will append the second list to the first list.
1841 * If the msgid in the second list contains msgid in the first list,
1842 * it will be marked as duplicate.
1843 */
1844 static struct element_st *
append_list(struct element_st * l1,struct element_st * l2)1845 append_list(struct element_st *l1, struct element_st *l2)
1846 {
1847 struct element_st *p = NULL, *q = NULL, *l1_tail = NULL;
1848
1849 if (l1 == NULL)
1850 return (l2);
1851 if (l2 == NULL)
1852 return (l1);
1853
1854 /*
1855 * in this while loop, just mark isduplicate field of node in the
1856 * l2 list if the same msgid exists in l1 list.
1857 */
1858 p = l2;
1859 while (p != NULL) {
1860 q = l1;
1861 while (q != NULL) {
1862 if (msgidcmp(p->msgid, q->msgid) == 0) {
1863 p->isduplicate = TRUE;
1864 break;
1865 }
1866 q = q->next;
1867 }
1868 p = p->next;
1869 }
1870
1871 /* Now connect two linked lists. */
1872 l1_tail = l1;
1873 while (l1_tail->next != NULL) {
1874 if (l1->next == NULL)
1875 break;
1876 l1_tail = l1_tail-> next;
1877 }
1878 l1_tail->next = l2;
1879
1880 return (l1);
1881 } /* append_list */
1882
1883 /*
1884 * Writes one domain list to the file.
1885 */
1886 static void
write_one_file(struct domain_st * head)1887 write_one_file(struct domain_st *head)
1888 {
1889 FILE *fp;
1890 char fname [MAX_PATH_LEN];
1891 char dname [MAX_DOMAIN_LEN];
1892 struct element_st *p;
1893 struct element_st *existing_po_list;
1894
1895 /*
1896 * If head is NULL, then it still has to create .po file
1897 * so that it will guarantee that the previous .po file was
1898 * alwasys deleted.
1899 * This is why checking NULL pointer has been moved to after
1900 * creating .po file.
1901 */
1902
1903 /*
1904 * If domain name is NULL, it is the default domain list.
1905 * The domain name is either "messages" or specified by option -d.
1906 * The default domain name is contained in default_domain variable.
1907 */
1908 dname[0] = NULL;
1909 if ((head != NULL) &&
1910 (head->dname != NULL)) {
1911 (void) strcpy(dname, head->dname);
1912 } else {
1913 (void) strcpy(dname, default_domain);
1914 }
1915
1916 /*
1917 * path is the current directory if not specified by option -p.
1918 */
1919 fname[0] = 0;
1920 if (pflg == TRUE) {
1921 (void) strcat(fname, pathname);
1922 (void) strcat(fname, "/");
1923 }
1924 (void) strcat(fname, dname);
1925 (void) strcat(fname, ".po");
1926
1927 /*
1928 * If -j flag is specified, read exsiting .po file and
1929 * append the current list to the end of the list read from
1930 * the existing .po file.
1931 */
1932 if (jflg == TRUE) {
1933 /*
1934 * If head is NULL, we don't have to change existing file.
1935 * Therefore, just return it.
1936 */
1937 if (head == NULL) {
1938 return;
1939 }
1940 existing_po_list = read_po(fname);
1941 head->gettext_head = append_list(existing_po_list,
1942 head->gettext_head);
1943 #ifdef DEBUG
1944 if (head->dname != NULL) {
1945 printf("===after merge (-j option): <%s>===\n",
1946 head->dname);
1947 } else {
1948 printf("===after merge (-j option): <NULL>===\n");
1949 }
1950 print_one_domain(head);
1951 #endif
1952
1953 } /* if jflg */
1954
1955 if ((fp = fopen(fname, "w")) == NULL) {
1956 (void) fprintf(stderr,
1957 "ERROR, can't open output file: %s\n", fname);
1958 exit(2);
1959 }
1960
1961 (void) fprintf(fp, "domain \"%s\"\n", dname);
1962
1963 /* See comments above in the beginning of this function */
1964 if (head == NULL)
1965 return;
1966
1967 /*
1968 * There are separate storage for textdomain() calls if
1969 * -s option is used (textdomain_head linked list).
1970 * Otherwise, textdomain() is mixed with gettext(0 and dgettext().
1971 * If mixed, the boolean varaible istextdomain is used to see
1972 * if the current node contains textdomain() or [d]gettext().
1973 */
1974 if (sflg == TRUE) {
1975 p = head->textdomain_head;
1976 while (p != NULL) {
1977 /*
1978 * textdomain output line already contains
1979 * FIle name and line number information.
1980 * Therefore, does not have to check for nflg.
1981 */
1982 output_textdomain(fp, p);
1983 p = p->next;
1984 }
1985 }
1986
1987 p = head->gettext_head;
1988 while (p != NULL) {
1989
1990 /*
1991 * Comment is printed only if -c is used and
1992 * associated with gettext or dgettext.
1993 * textdomain is not associated with comments.
1994 * Changes:
1995 * comments should be extracted in case of -j option
1996 * because there are read from exising file.
1997 */
1998 if (((cflg == TRUE) || (jflg == TRUE)) &&
1999 (p->istextdomain != TRUE)) {
2000 output_comment(fp, p->comment);
2001 }
2002
2003 /*
2004 * If -n is used, then file number and line number
2005 * information is printed.
2006 * In case of textdomain(), this information is redundant
2007 * and is not printed.
2008 * If linenum is 0, it means this information has been
2009 * read from existing po file and it already contains
2010 * file and line number info as a comment line. So, it
2011 * should not printed in such case.
2012 */
2013 if ((nflg == TRUE) && (p->istextdomain == FALSE) &&
2014 (p->linenum > 0)) {
2015 (void) fprintf(fp, "# File:%s, line:%d\n",
2016 p->fname, p->linenum);
2017 }
2018
2019 /*
2020 * Depending on the type of node, output textdomain comment
2021 * or msgid.
2022 */
2023 if ((sflg == FALSE) &&
2024 (p->istextdomain == TRUE)) {
2025 output_textdomain(fp, p);
2026 } else {
2027 output_msgid(fp, p->msgid, p->isduplicate);
2028 }
2029 p = p->next;
2030
2031 } /* while */
2032
2033 (void) fclose(fp);
2034 } /* write_one_file */
2035
2036 /*
2037 * Prints out textdomain call as a comment line with file name and
2038 * the line number information.
2039 */
2040 static void
output_textdomain(FILE * fp,struct element_st * p)2041 output_textdomain(FILE *fp, struct element_st *p)
2042 {
2043
2044 if (p == NULL)
2045 return;
2046
2047 /*
2048 * Write textdomain() line as a comment.
2049 */
2050 (void) fprintf(fp, "# File:%s, line:%d, textdomain(\"%s\");\n",
2051 p->fname, p->linenum, p->msgid->str);
2052 } /* output_textdomain */
2053
2054 /*
2055 * Prints out comments from linked list.
2056 */
2057 static void
output_comment(FILE * fp,struct strlist_st * p)2058 output_comment(FILE *fp, struct strlist_st *p)
2059 {
2060 if (p == NULL)
2061 return;
2062
2063 /*
2064 * Write comment section.
2065 */
2066 while (p != NULL) {
2067 (void) fprintf(fp, "# %s\n", p->str);
2068 p = p->next;
2069 }
2070 } /* output_comment */
2071
2072 /*
2073 * Prints out msgid along with msgstr.
2074 */
2075 static void
output_msgid(FILE * fp,struct strlist_st * p,int duplicate)2076 output_msgid(FILE *fp, struct strlist_st *p, int duplicate)
2077 {
2078 struct strlist_st *q;
2079
2080 if (p == NULL)
2081 return;
2082
2083 /*
2084 * Write msgid section.
2085 * If duplciate flag is ON, prepend "# " in front of every line
2086 * so that they are considered as comment lines in .po file.
2087 */
2088 if (duplicate == TRUE) {
2089 (void) fprintf(fp, "# ");
2090 }
2091 (void) fprintf(fp, "msgid \"%s\"\n", p->str);
2092 q = p->next;
2093 while (q != NULL) {
2094 if (duplicate == TRUE) {
2095 (void) fprintf(fp, "# ");
2096 }
2097 (void) fprintf(fp, " \"%s\"\n", q->str);
2098 q = q->next;
2099 }
2100
2101 /*
2102 * Write msgstr section.
2103 * if -M option is specified, append <suffix> to msgid.
2104 * if -m option is specified, prepend <prefix> to msgid.
2105 */
2106 if (duplicate == TRUE) {
2107 (void) fprintf(fp, "# ");
2108 }
2109 if ((mflg == TRUE) || (Mflg == TRUE)) {
2110 if (mflg == TRUE) {
2111 /*
2112 * If single line msgid, add suffix to the same line
2113 */
2114 if ((Mflg == TRUE) && (p->next == NULL)) {
2115 /* -M and -m and single line case */
2116 (void) fprintf(fp,
2117 "msgstr \"%s%s%s\"\n",
2118 prefix, p->str, suffix);
2119 } else {
2120 /* -M and -m and multi line case */
2121 (void) fprintf(fp,
2122 "msgstr \"%s%s\"\n",
2123 prefix, p->str);
2124 }
2125 } else {
2126 if ((Mflg == TRUE) && (p->next == NULL)) {
2127 /* -M only with single line case */
2128 (void) fprintf(fp, "msgstr \"%s%s\"\n",
2129 p->str, suffix);
2130 } else {
2131 /* -M only with multi line case */
2132 (void) fprintf(fp, "msgstr \"%s\"\n", p->str);
2133 }
2134 }
2135 q = p->next;
2136 while (q != NULL) {
2137 if (duplicate == TRUE) {
2138 (void) fprintf(fp, "# ");
2139 }
2140 (void) fprintf(fp, " \"%s\"\n", q->str);
2141 q = q->next;
2142 }
2143 /*
2144 * If multi line msgid, add suffix after the last line.
2145 */
2146 if ((Mflg == TRUE) && (p->next != NULL) &&
2147 (suffix[0] != NULL)) {
2148 (void) fprintf(fp, " \"%s\"\n", suffix);
2149 }
2150 } else {
2151 (void) fprintf(fp, "msgstr\n");
2152 }
2153 } /* output_msgid */
2154
2155 /*
2156 * Malloc a new element node and initialize fields.
2157 */
2158 static struct element_st *
new_element(void)2159 new_element(void)
2160 {
2161 struct element_st *tmp;
2162
2163 tmp = (struct element_st *)malloc(sizeof (struct element_st));
2164 tmp->istextdomain = FALSE;
2165 tmp->isduplicate = FALSE;
2166 tmp->msgid = NULL;
2167 tmp->msgstr = NULL;
2168 tmp->comment = NULL;
2169 tmp->fname = NULL;
2170 tmp->linenum = 0;
2171 tmp->next = NULL;
2172
2173 return (tmp);
2174 } /* new_element */
2175
2176 /*
2177 * Malloc a new domain node and initialize fields.
2178 */
2179 static struct domain_st *
new_domain(void)2180 new_domain(void)
2181 {
2182 struct domain_st *tmp;
2183
2184 tmp = (struct domain_st *)malloc(sizeof (struct domain_st));
2185 tmp->dname = NULL;
2186 tmp->gettext_head = NULL;
2187 tmp->gettext_tail = NULL;
2188 tmp->textdomain_head = NULL;
2189 tmp->textdomain_tail = NULL;
2190 tmp->next = NULL;
2191
2192 return (tmp);
2193 } /* new_domain */
2194
2195 /*
2196 * Malloc a new string list node and initialize fields.
2197 */
2198 static struct strlist_st *
new_strlist(void)2199 new_strlist(void)
2200 {
2201 struct strlist_st *tmp;
2202
2203 tmp = (struct strlist_st *)malloc(sizeof (struct strlist_st));
2204 tmp->str = NULL;
2205 tmp->next = NULL;
2206
2207 return (tmp);
2208 } /* new_strlist */
2209
2210 /*
2211 * Malloc a new exclude string list node and initialize fields.
2212 */
2213 static struct exclude_st *
new_exclude(void)2214 new_exclude(void)
2215 {
2216 struct exclude_st *tmp;
2217
2218 tmp = (struct exclude_st *)malloc(sizeof (struct exclude_st));
2219 tmp->exstr = NULL;
2220 tmp->next = NULL;
2221
2222 return (tmp);
2223 } /* new_exclude */
2224
2225 /*
2226 * Local version of strcat to keep within maximum string size.
2227 */
2228 static void
lstrcat(char * s1,const char * s2)2229 lstrcat(char *s1, const char *s2)
2230 {
2231 char *es1 = &s1[MAX_STRING_LEN];
2232 char *ss1 = s1;
2233
2234 while (*s1++)
2235 ;
2236 --s1;
2237 while (*s1++ = *s2++)
2238 if (s1 >= es1) {
2239 s1[-1] = '\0';
2240 if ((in_comment == TRUE || in_quote == TRUE) &&
2241 (warn_linenum != curr_linenum)) {
2242 if (stdin_only == FALSE) {
2243 (void) fprintf(stderr,
2244 "WARNING: file %s line %d exceeds "\
2245 "%d characters: \"%15.15s\"\n",
2246 curr_file, curr_linenum,
2247 MAX_STRING_LEN, ss1);
2248 } else {
2249 (void) fprintf(stderr,
2250 "WARNING: line %d exceeds "\
2251 "%d characters: \"%15.15s\"\n",
2252 curr_linenum, MAX_STRING_LEN, ss1);
2253 }
2254 warn_linenum = curr_linenum;
2255 }
2256 break;
2257 }
2258 } /* lstrcat */
2259
2260 #ifdef DEBUG
2261 /*
2262 * Debug print routine. Compiled only with DEBUG on.
2263 */
2264 void
print_element_list(struct element_st * q)2265 print_element_list(struct element_st *q)
2266 {
2267 struct strlist_st *r;
2268
2269 while (q != NULL) {
2270 printf(" istextdomain = %d\n", q->istextdomain);
2271 printf(" isduplicate = %d\n", q->isduplicate);
2272 if ((q->msgid != NULL) && (q->msgid->str != NULL)) {
2273 printf(" msgid = <%s>\n", q->msgid->str);
2274 r = q->msgid->next;
2275 while (r != NULL) {
2276 printf(" <%s>\n", r->str);
2277 r = r->next;
2278 }
2279 } else {
2280 printf(" msgid = <NULL>\n");
2281 }
2282 if ((q->msgstr != NULL) && (q->msgstr->str != NULL)) {
2283 printf(" msgstr= <%s>\n", q->msgstr->str);
2284 r = q->msgstr->next;
2285 while (r != NULL) {
2286 printf(" <%s>\n", r->str);
2287 r = r->next;
2288 }
2289 } else {
2290 printf(" msgstr= <NULL>\n");
2291 }
2292
2293 if (q->comment == NULL) {
2294 printf(" comment = <NULL>\n");
2295 } else {
2296 printf(" comment = <%s>\n", q->comment->str);
2297 r = q->comment->next;
2298 while (r != NULL) {
2299 printf(" <%s>\n", r->str);
2300 r = r->next;
2301 }
2302 }
2303
2304 if (q->fname == NULL) {
2305 printf(" fname = <NULL>\n");
2306 } else {
2307 printf(" fname = <%s>\n", q->fname);
2308 }
2309 printf(" linenum = %d\n", q->linenum);
2310 printf("\n");
2311 q = q->next;
2312 }
2313 }
2314
2315 /*
2316 * Debug print routine. Compiled only with DEBUG on.
2317 */
2318 void
print_one_domain(struct domain_st * p)2319 print_one_domain(struct domain_st *p)
2320 {
2321 struct element_st *q;
2322
2323 if (p == NULL) {
2324 printf("domain pointer = <NULL>\n");
2325 return;
2326 } else if (p->dname == NULL) {
2327 printf("domain_name = <%s>\n", "<NULL>");
2328 } else {
2329 printf("domain_name = <%s>\n", p->dname);
2330 }
2331 q = p->gettext_head;
2332 print_element_list(q);
2333
2334 q = p->textdomain_head;
2335 print_element_list(q);
2336 } /* print_one_domain */
2337
2338 void
print_all_domain(struct domain_st * dom_list)2339 print_all_domain(struct domain_st *dom_list)
2340 {
2341 struct domain_st *p;
2342 struct element_st *q;
2343
2344 p = dom_list;
2345 while (p != NULL) {
2346 print_one_domain(p);
2347 p = p->next;
2348 } /* while */
2349 } /* print_all_domain */
2350 #endif
2351