1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License, Version 1.0 only 6 * (the "License"). You may not use this file except in compliance 7 * with the License. 8 * 9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10 * or http://www.opensolaris.org/os/licensing. 11 * See the License for the specific language governing permissions 12 * and limitations under the License. 13 * 14 * When distributing Covered Code, include this CDDL HEADER in each 15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16 * If applicable, add the following below this CDDL HEADER, with the 17 * fields enclosed by brackets "[]" replaced with your own identifying 18 * information: Portions Copyright [yyyy] [name of copyright owner] 19 * 20 * CDDL HEADER END 21 */ 22 /* 23 * Copyright 1991, 1999, 2001-2002 Sun Microsystems, Inc. 24 * All rights reserved. 25 * Use is subject to license terms. 26 */ 27 #pragma ident "%Z%%M% %I% %E% SMI" 28 29 #include <ctype.h> 30 #include <stdio.h> 31 #include <stdlib.h> 32 #include <string.h> 33 34 #define TRUE 1 35 #define FALSE 0 36 #define MAX_PATH_LEN 1024 37 #define MAX_DOMAIN_LEN 1024 38 #define MAX_STRING_LEN 2048 39 40 #define USAGE "Usage: xgettext [-a [-x exclude-file]] [-jns]\ 41 [-c comment-tag]\n [-d default-domain] [-m prefix] \ 42 [-M suffix] [-p pathname] files ...\n\ 43 xgettext -h\n" 44 45 #define DEFAULT_DOMAIN "messages" 46 47 extern char yytext[]; 48 extern int yylex(void); 49 50 /* 51 * Contains a list of strings to be used to store ANSI-C style string. 52 * Each quoted string is stored in one node. 53 */ 54 struct strlist_st { 55 char *str; 56 struct strlist_st *next; 57 }; 58 59 /* 60 * istextdomain : Boolean telling if this node contains textdomain call. 61 * isduplicate : Boolean telling if this node duplicate of any other msgid. 62 * msgid : contains msgid or textdomain if istextdomain is true. 63 * msgstr : contains msgstr. 64 * comment : comment extracted in case of -c option. 65 * fname : tells which file contains msgid. 66 * linenum : line number in the file. 67 * next : Next node. 68 */ 69 struct element_st { 70 char istextdomain; 71 char isduplicate; 72 struct strlist_st *msgid; 73 struct strlist_st *msgstr; 74 struct strlist_st *comment; 75 char *fname; 76 int linenum; 77 struct element_st *next; 78 }; 79 80 /* 81 * dname : domain name. NULL if default domain. 82 * gettext_head : Head of linked list containing [d]gettext(). 83 * gettext_tail : Tail of linked list containing [d]gettext(). 84 * textdomain_head : Head of linked list containing textdomain(). 85 * textdomain_tail : Tail of linked list containing textdomain(). 86 * next : Next node. 87 * 88 * Each domain contains two linked list. 89 * (gettext_head, textdomain_head) 90 * If -s option is used, then textdomain_head contains all 91 * textdomain() calls and no textdomain() calls are stored in gettext_head. 92 * If -s option is not used, textdomain_head is empty list and 93 * gettext_head contains all gettext() dgettext(), and textdomain() calls. 94 */ 95 struct domain_st { 96 char *dname; 97 struct element_st *gettext_head; 98 struct element_st *gettext_tail; 99 struct element_st *textdomain_head; 100 struct element_st *textdomain_tail; 101 struct domain_st *next; 102 }; 103 104 /* 105 * There are two domain linked lists. 106 * def_dom contains default domain linked list and 107 * dom_head contains all other deomain linked lists to be created by 108 * dgettext() calls. 109 */ 110 static struct domain_st *def_dom = NULL; 111 static struct domain_st *dom_head = NULL; 112 static struct domain_st *dom_tail = NULL; 113 114 /* 115 * This linked list contains a list of strings to be excluded when 116 * -x option is used. 117 */ 118 static struct exclude_st { 119 struct strlist_st *exstr; 120 struct exclude_st *next; 121 } *excl_head; 122 123 /* 124 * All option flags and values for each option if any. 125 */ 126 static int aflg = FALSE; 127 static int cflg = FALSE; 128 static char *comment_tag = NULL; 129 static char *default_domain = NULL; 130 static int hflg = FALSE; 131 static int jflg = FALSE; 132 static int mflg = FALSE; 133 static int Mflg = FALSE; 134 static char *suffix = NULL; 135 static char *prefix = NULL; 136 static int nflg = FALSE; 137 static int pflg = FALSE; 138 static char *pathname = NULL; 139 static int sflg = FALSE; 140 static int tflg = FALSE; /* Undocumented option to extract dcgettext */ 141 static int xflg = FALSE; 142 static char *exclude_file = NULL; 143 144 /* 145 * Each variable shows the current state of parsing input file. 146 * 147 * in_comment : Means inside comment block (C or C++). 148 * in_cplus_comment : Means inside C++ comment block. 149 * in_gettext : Means inside gettext call. 150 * in_dgettext : Means inside dgettext call. 151 * in_dcgettext : Means inside dcgettext call. 152 * in_textdomain : Means inside textdomain call. 153 * in_str : Means currently processing ANSI style string. 154 * in_quote : Means currently processing double quoted string. 155 * in_skippable_string : Means currently processing double quoted string, 156 * that occurs outside a call to gettext, dgettext, 157 * dcgettext, textdomain, with -a not specified. 158 * is_last_comment_line : Means the current line is the last line 159 * of the comment block. This is necessary because 160 * in_comment becomes FALSE when '* /' is encountered. 161 * is_first_comma_found : This is used only for dcgettext because dcgettext() 162 * requires 2 commas. So need to do different action 163 * depending on which commas encountered. 164 * num_nested_open_paren : This keeps track of the number of open parens to 165 * handle dcgettext ((const char *)0,"msg",LC_TIME); 166 */ 167 static int in_comment = FALSE; 168 static int in_cplus_comment = FALSE; 169 static int in_gettext = FALSE; 170 static int in_dgettext = FALSE; 171 static int in_dcgettext = FALSE; 172 static int in_textdomain = FALSE; 173 static int in_str = FALSE; 174 static int in_quote = FALSE; 175 static int is_last_comment_line = FALSE; 176 static int is_first_comma_found = FALSE; 177 static int in_skippable_string = FALSE; 178 static int num_nested_open_paren = 0; 179 180 /* 181 * This variable contains the first line of gettext(), dgettext(), or 182 * textdomain() calls. 183 * This is necessary for multiple lines of a single call to store 184 * the starting line. 185 */ 186 static int linenum_saved = 0; 187 188 int stdin_only = FALSE; /* Read input from stdin */ 189 190 /* 191 * curr_file : Contains current file name processed. 192 * curr_domain : Contains the current domain for each dgettext(). 193 * This is NULL for gettext(). 194 * curr_line : Contains the current line processed. 195 * qstring_buf : Contains the double quoted string processed. 196 * curr_linenum : Line number being processed in the current input file. 197 * warn_linenum : Line number of current warning message. 198 */ 199 char curr_file[MAX_PATH_LEN]; 200 static char curr_domain[MAX_DOMAIN_LEN]; 201 static char curr_line[MAX_STRING_LEN]; 202 static char qstring_buf[MAX_STRING_LEN]; 203 int curr_linenum = 1; 204 int warn_linenum = 0; 205 206 /* 207 * strhead : This list contains ANSI style string. 208 * Each node contains double quoted string. 209 * strtail : This is the tail of strhead. 210 * commhead : This list contains comments string. 211 * Each node contains one line of comment. 212 * commtail : This is the tail of commhead. 213 */ 214 static struct strlist_st *strhead = NULL; 215 static struct strlist_st *strtail = NULL; 216 static struct strlist_st *commhead = NULL; 217 static struct strlist_st *commtail = NULL; 218 219 /* 220 * gargc : Same as argc. Used to pass argc to lex routine. 221 * gargv : Same as argv. Used to pass argc to lex routine. 222 */ 223 int gargc; 224 char **gargv; 225 226 static void add_line_to_comment(void); 227 static void add_qstring_to_str(void); 228 static void add_str_to_element_list(int, char *); 229 static void copy_strlist_to_str(char *, struct strlist_st *); 230 static void end_ansi_string(void); 231 static void free_strlist(struct strlist_st *); 232 void handle_newline(void); 233 static void initialize_globals(void); 234 static void output_comment(FILE *, struct strlist_st *); 235 static void output_msgid(FILE *, struct strlist_st *, int); 236 static void output_textdomain(FILE *, struct element_st *); 237 static void print_help(void); 238 static void read_exclude_file(void); 239 static void trim_line(char *); 240 static void write_all_files(void); 241 static void write_one_file(struct domain_st *); 242 243 static void lstrcat(char *, const char *); 244 245 /* 246 * Utility functions to malloc a node and initialize fields. 247 */ 248 static struct domain_st *new_domain(void); 249 static struct strlist_st *new_strlist(void); 250 static struct element_st *new_element(void); 251 static struct exclude_st *new_exclude(void); 252 253 /* 254 * Main program of xgettext. 255 */ 256 int 257 main(int argc, char **argv) 258 { 259 int opterr = FALSE; 260 int c; 261 262 initialize_globals(); 263 264 while ((c = getopt(argc, argv, "jhax:nsc:d:m:M:p:t")) != EOF) { 265 switch (c) { 266 case 'a': 267 aflg = TRUE; 268 break; 269 case 'c': 270 cflg = TRUE; 271 comment_tag = optarg; 272 break; 273 case 'd': 274 default_domain = optarg; 275 break; 276 case 'h': 277 hflg = TRUE; 278 break; 279 case 'j': 280 jflg = TRUE; 281 break; 282 case 'M': 283 Mflg = TRUE; 284 suffix = optarg; 285 break; 286 case 'm': 287 mflg = TRUE; 288 prefix = optarg; 289 break; 290 case 'n': 291 nflg = TRUE; 292 break; 293 case 'p': 294 pflg = TRUE; 295 pathname = optarg; 296 break; 297 case 's': 298 sflg = TRUE; 299 break; 300 case 't': 301 tflg = TRUE; 302 break; 303 case 'x': 304 xflg = TRUE; 305 exclude_file = optarg; 306 break; 307 case '?': 308 opterr = TRUE; 309 break; 310 } 311 } 312 313 /* if -h is used, ignore all other options. */ 314 if (hflg == TRUE) { 315 (void) fprintf(stderr, USAGE); 316 print_help(); 317 exit(0); 318 } 319 320 /* -x can be used only with -a */ 321 if ((xflg == TRUE) && (aflg == FALSE)) 322 opterr = TRUE; 323 324 /* -j cannot be used with -a */ 325 if ((jflg == TRUE) && (aflg == TRUE)) { 326 (void) fprintf(stderr, 327 "-a and -j options cannot be used together.\n"); 328 opterr = TRUE; 329 } 330 331 /* -j cannot be used with -s */ 332 if ((jflg == TRUE) && (sflg == TRUE)) { 333 (void) fprintf(stderr, 334 "-j and -s options cannot be used together.\n"); 335 opterr = TRUE; 336 } 337 338 if (opterr == TRUE) { 339 (void) fprintf(stderr, USAGE); 340 exit(2); 341 } 342 343 /* error, if no files are specified. */ 344 if (optind == argc) { 345 (void) fprintf(stderr, USAGE); 346 exit(2); 347 } 348 349 if (xflg == TRUE) { 350 read_exclude_file(); 351 } 352 353 /* If files are -, then read from stdin */ 354 if (argv[optind][0] == '-') { 355 stdin_only = TRUE; 356 optind++; 357 } else { 358 stdin_only = FALSE; 359 } 360 361 /* Store argc and argv to pass to yylex() */ 362 gargc = argc; 363 gargv = argv; 364 365 #ifdef DEBUG 366 (void) printf("optind=%d\n", optind); 367 { 368 int i = optind; 369 for (; i < argc; i++) { 370 (void) printf(" %d, <%s>\n", i, argv[i]); 371 } 372 } 373 #endif 374 375 if (stdin_only == FALSE) { 376 if (freopen(argv[optind], "r", stdin) == NULL) { 377 (void) fprintf(stderr, 378 "ERROR, can't open input file: %s\n", argv[optind]); 379 exit(2); 380 } 381 (void) strcpy(curr_file, gargv[optind]); 382 optind++; 383 } 384 385 /* 386 * Process input. 387 */ 388 (void) yylex(); 389 390 #ifdef DEBUG 391 printf("\n======= default_domain ========\n"); 392 print_one_domain(def_dom); 393 printf("======= domain list ========\n"); 394 print_all_domain(dom_head); 395 #endif 396 397 /* 398 * Write out all .po files. 399 */ 400 write_all_files(); 401 402 return (0); 403 } /* main */ 404 405 /* 406 * Prints help information for each option. 407 */ 408 static void 409 print_help(void) 410 { 411 (void) fprintf(stderr, "\n"); 412 (void) fprintf(stderr, 413 "-a\t\t\tfind ALL strings\n"); 414 (void) fprintf(stderr, 415 "-c <comment-tag>\tget comments containing <flag>\n"); 416 (void) fprintf(stderr, 417 "-d <default-domain>\tuse <default-domain> for default domain\n"); 418 (void) fprintf(stderr, 419 "-h\t\t\tHelp\n"); 420 (void) fprintf(stderr, 421 "-j\t\t\tupdate existing file with the current result\n"); 422 (void) fprintf(stderr, 423 "-M <suffix>\t\tfill in msgstr with msgid<suffix>\n"); 424 (void) fprintf(stderr, 425 "-m <prefix>\t\tfill in msgstr with <prefix>msgid\n"); 426 (void) fprintf(stderr, 427 "-n\t\t\tline# file name and line number info in output\n"); 428 (void) fprintf(stderr, 429 "-p <pathname>\t\tuse <pathname> for output file directory\n"); 430 (void) fprintf(stderr, 431 "-s\t\t\tgenerate sorted output files\n"); 432 (void) fprintf(stderr, 433 "-x <exclude-file>\texclude strings in file <exclude-file> from output\n"); 434 (void) fprintf(stderr, 435 "-\t\t\tread stdin, use as a filter (input only)\n"); 436 } /* print_help */ 437 438 /* 439 * Extract file name and line number information from macro line 440 * and set the global variable accordingly. 441 * The valid line format is 442 * 1) # nnn 443 * or 444 * 2) # nnn "xxxxx" 445 * where nnn is line number and xxxxx is file name. 446 */ 447 static void 448 extract_filename_linenumber(char *mline) 449 { 450 int num; 451 char *p, *q, *r; 452 453 /* 454 * mline can contain multi newline. 455 * line number should be increased by the number of newlines. 456 */ 457 p = mline; 458 while ((p = strchr(p, '\n')) != NULL) { 459 p++; 460 curr_linenum++; 461 } 462 p = strchr(mline, ' '); 463 if (p == NULL) 464 return; 465 q = strchr(++p, ' '); 466 if (q == NULL) { 467 /* case 1 */ 468 if ((num = atoi(p)) > 0) { 469 curr_linenum = num; 470 return; 471 } 472 } else { 473 /* case 2 */ 474 *q++ = 0; 475 if (*q == '"') { 476 q++; 477 r = strchr(q, '"'); 478 if (r == NULL) { 479 return; 480 } 481 *r = 0; 482 if ((num = atoi(p)) > 0) { 483 curr_linenum = num; 484 (void) strcpy(curr_file, q); 485 } 486 } 487 } 488 } /* extract_filename_linenumber */ 489 490 /* 491 * Handler for MACRO line which starts with #. 492 */ 493 void 494 handle_macro_line(void) 495 { 496 #ifdef DEBUG 497 (void) printf("Macro line=<%s>\n", yytext); 498 #endif 499 if (cflg == TRUE) 500 lstrcat(curr_line, yytext); 501 502 if (in_quote == TRUE) { 503 lstrcat(qstring_buf, yytext); 504 } else if (in_comment == FALSE) { 505 extract_filename_linenumber(yytext); 506 } 507 508 curr_linenum--; 509 handle_newline(); 510 } /* handle_macro_line */ 511 512 /* 513 * Handler for C++ comments which starts with //. 514 */ 515 void 516 handle_cplus_comment_line(void) 517 { 518 if (cflg == TRUE) 519 lstrcat(curr_line, yytext); 520 521 if (in_quote == TRUE) { 522 lstrcat(qstring_buf, yytext); 523 } else if ((in_comment == FALSE) && 524 (in_skippable_string == FALSE)) { 525 526 /* 527 * If already in c comments, don't do anything. 528 * Set both flags to TRUE here. 529 * Both flags will be set to FALSE when newline 530 * encounters. 531 */ 532 in_cplus_comment = TRUE; 533 in_comment = TRUE; 534 } 535 } /* handle_cplus_comment_line */ 536 537 /* 538 * Handler for the comment start (slash asterisk) in input file. 539 */ 540 void 541 handle_open_comment(void) 542 { 543 if (cflg == TRUE) 544 lstrcat(curr_line, yytext); 545 546 if (in_quote == TRUE) { 547 lstrcat(qstring_buf, yytext); 548 } else if ((in_comment == FALSE) && 549 (in_skippable_string == FALSE)) { 550 551 in_comment = TRUE; 552 is_last_comment_line = FALSE; 553 /* 554 * If there is any comment extracted before accidently, 555 * clean it up and start the new comment again. 556 */ 557 free_strlist(commhead); 558 commhead = commtail = NULL; 559 } 560 } 561 562 /* 563 * Handler for the comment end (asterisk slash) in input file. 564 */ 565 void 566 handle_close_comment(void) 567 { 568 if (cflg == TRUE) 569 lstrcat(curr_line, yytext); 570 571 if (in_quote == TRUE) { 572 lstrcat(qstring_buf, yytext); 573 } else if (in_skippable_string == FALSE) { 574 in_comment = FALSE; 575 is_last_comment_line = TRUE; 576 } 577 } 578 579 /* 580 * Handler for "gettext" in input file. 581 */ 582 void 583 handle_gettext(void) 584 { 585 /* 586 * If -t option is specified to extrct dcgettext, 587 * don't do anything for gettext(). 588 */ 589 if (tflg == TRUE) { 590 return; 591 } 592 593 num_nested_open_paren = 0; 594 595 if (cflg == TRUE) 596 lstrcat(curr_line, yytext); 597 598 if (in_quote == TRUE) { 599 lstrcat(qstring_buf, yytext); 600 } else if (in_comment == FALSE) { 601 in_gettext = TRUE; 602 linenum_saved = curr_linenum; 603 /* 604 * gettext will be put into default domain .po file 605 * curr_domain does not change for gettext. 606 */ 607 curr_domain[0] = NULL; 608 } 609 } /* handle_gettext */ 610 611 /* 612 * Handler for "dgettext" in input file. 613 */ 614 void 615 handle_dgettext(void) 616 { 617 /* 618 * If -t option is specified to extrct dcgettext, 619 * don't do anything for dgettext(). 620 */ 621 if (tflg == TRUE) { 622 return; 623 } 624 625 num_nested_open_paren = 0; 626 627 if (cflg == TRUE) 628 lstrcat(curr_line, yytext); 629 630 if (in_quote == TRUE) { 631 lstrcat(qstring_buf, yytext); 632 } else if (in_comment == FALSE) { 633 in_dgettext = TRUE; 634 linenum_saved = curr_linenum; 635 /* 636 * dgettext will be put into domain file specified. 637 * curr_domain will follow. 638 */ 639 curr_domain[0] = NULL; 640 } 641 } /* handle_dgettext */ 642 643 /* 644 * Handler for "dcgettext" in input file. 645 */ 646 void 647 handle_dcgettext(void) 648 { 649 /* 650 * dcgettext will be extracted only when -t flag is specified. 651 */ 652 if (tflg == FALSE) { 653 return; 654 } 655 656 num_nested_open_paren = 0; 657 658 is_first_comma_found = FALSE; 659 660 if (cflg == TRUE) 661 lstrcat(curr_line, yytext); 662 663 if (in_quote == TRUE) { 664 lstrcat(qstring_buf, yytext); 665 } else if (in_comment == FALSE) { 666 in_dcgettext = TRUE; 667 linenum_saved = curr_linenum; 668 /* 669 * dcgettext will be put into domain file specified. 670 * curr_domain will follow. 671 */ 672 curr_domain[0] = NULL; 673 } 674 } /* handle_dcgettext */ 675 676 /* 677 * Handler for "textdomain" in input file. 678 */ 679 void 680 handle_textdomain(void) 681 { 682 if (cflg == TRUE) 683 lstrcat(curr_line, yytext); 684 685 if (in_quote == TRUE) { 686 lstrcat(qstring_buf, yytext); 687 } else if (in_comment == FALSE) { 688 in_textdomain = TRUE; 689 linenum_saved = curr_linenum; 690 curr_domain[0] = NULL; 691 } 692 } /* handle_textdomain */ 693 694 /* 695 * Handler for '(' in input file. 696 */ 697 void 698 handle_open_paren(void) 699 { 700 if (cflg == TRUE) 701 lstrcat(curr_line, yytext); 702 703 if (in_quote == TRUE) { 704 lstrcat(qstring_buf, yytext); 705 } else if (in_comment == FALSE) { 706 if ((in_gettext == TRUE) || 707 (in_dgettext == TRUE) || 708 (in_dcgettext == TRUE) || 709 (in_textdomain == TRUE)) { 710 in_str = TRUE; 711 num_nested_open_paren++; 712 } 713 } 714 } /* handle_open_paren */ 715 716 /* 717 * Handler for ')' in input file. 718 */ 719 void 720 handle_close_paren(void) 721 { 722 if (cflg == TRUE) 723 lstrcat(curr_line, yytext); 724 725 if (in_quote == TRUE) { 726 lstrcat(qstring_buf, yytext); 727 } else if (in_comment == FALSE) { 728 if ((in_gettext == TRUE) || 729 (in_dgettext == TRUE) || 730 (in_dcgettext == TRUE) || 731 (in_textdomain == TRUE)) { 732 /* 733 * If this is not the matching close paren with 734 * the first open paren, no action is necessary. 735 */ 736 if (--num_nested_open_paren > 0) 737 return; 738 add_str_to_element_list(in_textdomain, curr_domain); 739 in_str = FALSE; 740 in_gettext = FALSE; 741 in_dgettext = FALSE; 742 in_dcgettext = FALSE; 743 in_textdomain = FALSE; 744 } else if (aflg == TRUE) { 745 end_ansi_string(); 746 } 747 } 748 } /* handle_close_paren */ 749 750 /* 751 * Handler for '\\n' in input file. 752 * 753 * This is a '\' followed by new line. 754 * This can be treated like a new line except when this is a continuation 755 * of a ANSI-C string. 756 * If this is a part of ANSI string, treat the current line as a double 757 * quoted string and the next line is the start of the double quoted 758 * string. 759 */ 760 void 761 handle_esc_newline(void) 762 { 763 if (cflg == TRUE) 764 lstrcat(curr_line, "\\"); 765 766 curr_linenum++; 767 768 if (in_quote == TRUE) { 769 add_qstring_to_str(); 770 } else if ((in_comment == TRUE) || 771 (is_last_comment_line == TRUE)) { 772 if (in_cplus_comment == FALSE) { 773 add_line_to_comment(); 774 } 775 } 776 777 curr_line[0] = NULL; 778 } /* handle_esc_newline */ 779 780 /* 781 * Handler for '"' in input file. 782 */ 783 void 784 handle_quote(void) 785 { 786 if (cflg == TRUE) 787 lstrcat(curr_line, yytext); 788 789 if (in_comment == TRUE) { 790 /*EMPTY*/ 791 } else if ((in_gettext == TRUE) || 792 (in_dgettext == TRUE) || 793 (in_dcgettext == TRUE) || 794 (in_textdomain == TRUE)) { 795 if (in_str == TRUE) { 796 if (in_quote == FALSE) { 797 in_quote = TRUE; 798 } else { 799 add_qstring_to_str(); 800 in_quote = FALSE; 801 } 802 } 803 } else if (aflg == TRUE) { 804 /* 805 * The quote is found outside of gettext, dgetext, and 806 * textdomain. Everytime a quoted string is found, 807 * add it to the string list. 808 * in_str stays TRUE until ANSI string ends. 809 */ 810 if (in_str == TRUE) { 811 if (in_quote == TRUE) { 812 in_quote = FALSE; 813 add_qstring_to_str(); 814 } else { 815 in_quote = TRUE; 816 } 817 } else { 818 in_str = TRUE; 819 in_quote = TRUE; 820 linenum_saved = curr_linenum; 821 } 822 } else { 823 in_skippable_string = (in_skippable_string == TRUE) ? 824 FALSE : TRUE; 825 } 826 } /* handle_quote */ 827 828 /* 829 * Handler for ' ' or TAB in input file. 830 */ 831 void 832 handle_spaces(void) 833 { 834 if (cflg == TRUE) 835 lstrcat(curr_line, yytext); 836 837 if (in_quote == TRUE) { 838 lstrcat(qstring_buf, yytext); 839 } 840 } /* handle_spaces */ 841 842 /* 843 * Flattens a linked list containing ANSI string to the one string. 844 */ 845 static void 846 copy_strlist_to_str(char *str, struct strlist_st *strlist) 847 { 848 struct strlist_st *p; 849 850 str[0] = NULL; 851 852 if (strlist != NULL) { 853 p = strlist; 854 while (p != NULL) { 855 if (p->str != NULL) { 856 lstrcat(str, p->str); 857 } 858 p = p->next; 859 } 860 } 861 } /* copy_strlist_to_str */ 862 863 /* 864 * Handler for ',' in input file. 865 */ 866 void 867 handle_comma(void) 868 { 869 if (cflg == TRUE) 870 lstrcat(curr_line, yytext); 871 872 if (in_quote == TRUE) { 873 lstrcat(qstring_buf, yytext); 874 } else if (in_comment == FALSE) { 875 if (in_str == TRUE) { 876 if (in_dgettext == TRUE) { 877 copy_strlist_to_str(curr_domain, strhead); 878 free_strlist(strhead); 879 strhead = strtail = NULL; 880 } else if (in_dcgettext == TRUE) { 881 /* 882 * Ignore the second comma. 883 */ 884 if (is_first_comma_found == FALSE) { 885 copy_strlist_to_str(curr_domain, 886 strhead); 887 free_strlist(strhead); 888 strhead = strtail = NULL; 889 is_first_comma_found = TRUE; 890 } 891 } else if (aflg == TRUE) { 892 end_ansi_string(); 893 } 894 } 895 } 896 } /* handle_comma */ 897 898 /* 899 * Handler for any other character that does not have special handler. 900 */ 901 void 902 handle_character(void) 903 { 904 if (cflg == TRUE) 905 lstrcat(curr_line, yytext); 906 907 if (in_quote == TRUE) { 908 lstrcat(qstring_buf, yytext); 909 } else if (in_comment == FALSE) { 910 if (in_str == TRUE) { 911 if (aflg == TRUE) { 912 end_ansi_string(); 913 } 914 } 915 } 916 } /* handle_character */ 917 918 /* 919 * Handler for new line in input file. 920 */ 921 void 922 handle_newline(void) 923 { 924 curr_linenum++; 925 926 /* 927 * in_quote is always FALSE here for ANSI-C code. 928 */ 929 if ((in_comment == TRUE) || 930 (is_last_comment_line == TRUE)) { 931 if (in_cplus_comment == TRUE) { 932 in_cplus_comment = FALSE; 933 in_comment = FALSE; 934 } else { 935 add_line_to_comment(); 936 } 937 } 938 939 curr_line[0] = NULL; 940 /* 941 * C++ comment always ends with new line. 942 */ 943 } /* handle_newline */ 944 945 /* 946 * Process ANSI string. 947 */ 948 static void 949 end_ansi_string(void) 950 { 951 if ((aflg == TRUE) && 952 (in_str == TRUE) && 953 (in_gettext == FALSE) && 954 (in_dgettext == FALSE) && 955 (in_dcgettext == FALSE) && 956 (in_textdomain == FALSE)) { 957 add_str_to_element_list(FALSE, curr_domain); 958 in_str = FALSE; 959 } 960 } /* end_ansi_string */ 961 962 /* 963 * Initialize global variables if necessary. 964 */ 965 static void 966 initialize_globals(void) 967 { 968 default_domain = strdup(DEFAULT_DOMAIN); 969 curr_domain[0] = NULL; 970 curr_file[0] = NULL; 971 qstring_buf[0] = NULL; 972 } /* initialize_globals() */ 973 974 /* 975 * Extract only string part when read a exclude file by removing 976 * keywords (e.g. msgid, msgstr, # ) and heading and trailing blanks and 977 * double quotes. 978 */ 979 static void 980 trim_line(char *line) 981 { 982 int i, p, len; 983 int first = 0; 984 int last = 0; 985 char c; 986 987 len = strlen(line); 988 989 /* 990 * Find the position of the last non-whitespace character. 991 */ 992 i = len - 1; 993 /*CONSTCOND*/ 994 while (1) { 995 c = line[i--]; 996 if ((c != ' ') && (c != '\n') && (c != '\t')) { 997 last = ++i; 998 break; 999 } 1000 } 1001 1002 /* 1003 * Find the position of the first non-whitespace character 1004 * by skipping "msgid" initially. 1005 */ 1006 if (strncmp("msgid ", line, 6) == 0) { 1007 i = 5; 1008 } else if (strncmp("msgstr ", line, 7) == 0) { 1009 i = 6; 1010 } else if (strncmp("# ", line, 2) == 0) { 1011 i = 2; 1012 } else { 1013 i = 0; 1014 } 1015 1016 /*CONSTCOND*/ 1017 while (1) { 1018 c = line[i++]; 1019 if ((c != ' ') && (c != '\n') && (c != '\t')) { 1020 first = --i; 1021 break; 1022 } 1023 } 1024 1025 /* 1026 * For Backward compatibility, we consider both double quoted 1027 * string and non-quoted string. 1028 * The double quote is removed before being stored if exists. 1029 */ 1030 if (line[first] == '"') { 1031 first++; 1032 } 1033 if (line[last] == '"') { 1034 last--; 1035 } 1036 1037 /* 1038 * Now copy the valid part of the string. 1039 */ 1040 p = first; 1041 for (i = 0; i <= (last-first); i++) { 1042 line[i] = line[p++]; 1043 } 1044 line [i] = NULL; 1045 } /* trim_line */ 1046 1047 /* 1048 * Read exclude file and stores it in the global linked list. 1049 */ 1050 static void 1051 read_exclude_file(void) 1052 { 1053 FILE *fp; 1054 struct exclude_st *tmp_excl; 1055 struct strlist_st *tail; 1056 int ignore_line; 1057 char line [MAX_STRING_LEN]; 1058 1059 if ((fp = fopen(exclude_file, "r")) == NULL) { 1060 (void) fprintf(stderr, "ERROR, can't open exclude file: %s\n", 1061 exclude_file); 1062 exit(2); 1063 } 1064 1065 ignore_line = TRUE; 1066 while (fgets(line, MAX_STRING_LEN, fp) != NULL) { 1067 /* 1068 * Line starting with # is a comment line and ignored. 1069 * Blank line is ignored, too. 1070 */ 1071 if ((line[0] == '\n') || (line[0] == '#')) { 1072 continue; 1073 } else if (strncmp(line, "msgstr", 6) == 0) { 1074 ignore_line = TRUE; 1075 } else if (strncmp(line, "domain", 6) == 0) { 1076 ignore_line = TRUE; 1077 } else if (strncmp(line, "msgid", 5) == 0) { 1078 ignore_line = FALSE; 1079 tmp_excl = new_exclude(); 1080 tmp_excl->exstr = new_strlist(); 1081 trim_line(line); 1082 tmp_excl->exstr->str = strdup(line); 1083 tail = tmp_excl->exstr; 1084 /* 1085 * Prepend new exclude string node to the list. 1086 */ 1087 tmp_excl->next = excl_head; 1088 excl_head = tmp_excl; 1089 } else { 1090 /* 1091 * If more than one line of string forms msgid, 1092 * append it to the string linked list. 1093 */ 1094 if (ignore_line == FALSE) { 1095 trim_line(line); 1096 tail->next = new_strlist(); 1097 tail->next->str = strdup(line); 1098 tail = tail->next; 1099 } 1100 } 1101 } /* while */ 1102 1103 #ifdef DEBUG 1104 tmp_excl = excl_head; 1105 while (tmp_excl != NULL) { 1106 printf("============================\n"); 1107 tail = tmp_excl->exstr; 1108 while (tail != NULL) { 1109 printf("%s###\n", tail->str); 1110 tail = tail->next; 1111 } 1112 tmp_excl = tmp_excl->next; 1113 } 1114 #endif 1115 } /* read_exclude_file */ 1116 1117 /* 1118 * Get next character from the string list containing ANSI style string. 1119 * This function returns three valus. (p, *m, *c). 1120 * p is returned by return value and, *m and *c are returned by changing 1121 * values in the location pointed. 1122 * 1123 * p : points node in the linked list for ANSI string. 1124 * Each node contains double quoted string. 1125 * m : The location of the next characters in the double quoted string 1126 * as integer index in the string. 1127 * When it gets to end of quoted string, the next node will be 1128 * read and m starts as zero for every new node. 1129 * c : Stores the value of the characterto be returned. 1130 */ 1131 static struct strlist_st * 1132 get_next_ch(struct strlist_st *p, int *m, char *c) 1133 { 1134 char ch, oct, hex; 1135 int value, i; 1136 1137 /* 1138 * From the string list, find non-null string first. 1139 */ 1140 1141 /*CONSTCOND*/ 1142 while (1) { 1143 if (p == NULL) { 1144 break; 1145 } else if (p->str == NULL) { 1146 p = p->next; 1147 } else if (p->str[*m] == NULL) { 1148 p = p->next; 1149 *m = 0; 1150 } else { 1151 break; 1152 } 1153 } 1154 1155 /* 1156 * No more character is available. 1157 */ 1158 if (p == NULL) { 1159 *c = 0; 1160 return (NULL); 1161 } 1162 1163 /* 1164 * Check if the character back slash. 1165 * If yes, ANSI defined escape sequence rule is used. 1166 */ 1167 if (p->str[*m] != '\\') { 1168 *c = p->str[*m]; 1169 *m = *m + 1; 1170 return (p); 1171 } else { 1172 /* 1173 * Get next character after '\'. 1174 */ 1175 *m = *m + 1; 1176 ch = p->str[*m]; 1177 switch (ch) { 1178 case 'a': 1179 *c = '\a'; 1180 break; 1181 case 'b': 1182 *c = '\b'; 1183 break; 1184 case 'f': 1185 *c = '\f'; 1186 break; 1187 case 'n': 1188 *c = '\n'; 1189 break; 1190 case 'r': 1191 *c = '\r'; 1192 break; 1193 case 't': 1194 *c = '\t'; 1195 break; 1196 case 'v': 1197 *c = '\v'; 1198 break; 1199 case '0': 1200 case '1': 1201 case '2': 1202 case '3': 1203 case '4': 1204 case '5': 1205 case '6': 1206 case '7': 1207 /* 1208 * Get maximum of three octal digits. 1209 */ 1210 value = ch; 1211 for (i = 0; i < 2; i++) { 1212 *m = *m + 1; 1213 oct = p->str[*m]; 1214 if ((oct >= '0') && (oct <= '7')) { 1215 value = value * 8 + (oct - '0'); 1216 } else { 1217 *m = *m - 1; 1218 break; 1219 } 1220 } 1221 *c = value; 1222 #ifdef DEBUG 1223 /* (void) fprintf(stderr, "octal=%d\n", value); */ 1224 #endif 1225 break; 1226 case 'x': 1227 value = 0; 1228 /* 1229 * Remove all heading zeros first and 1230 * get one or two valuid hexadecimal charaters. 1231 */ 1232 *m = *m + 1; 1233 while (p->str[*m] == '0') { 1234 *m = *m + 1; 1235 } 1236 value = 0; 1237 for (i = 0; i < 2; i++) { 1238 hex = p->str[*m]; 1239 *m = *m + 1; 1240 if (isdigit(hex)) { 1241 value = value * 16 + (hex - '0'); 1242 } else if (isxdigit(hex)) { 1243 hex = tolower(hex); 1244 value = value * 16 + (hex - 'a' + 10); 1245 } else { 1246 *m = *m - 1; 1247 break; 1248 } 1249 } 1250 *c = value; 1251 #ifdef DEBUG 1252 (void) fprintf(stderr, "hex=%d\n", value); 1253 #endif 1254 *m = *m - 1; 1255 break; 1256 default : 1257 /* 1258 * Undefined by ANSI. 1259 * Just ignore "\". 1260 */ 1261 *c = p->str[*m]; 1262 break; 1263 } 1264 /* 1265 * Advance pointer to point the next character to be parsed. 1266 */ 1267 *m = *m + 1; 1268 return (p); 1269 } 1270 } /* get_next_ch */ 1271 1272 /* 1273 * Compares two msgids. 1274 * Comparison is done by values, not by characters represented. 1275 * For example, '\t', '\011' and '0x9' are identical values. 1276 * Return values are same as in strcmp. 1277 * 1 if msgid1 > msgid2 1278 * 0 if msgid1 = msgid2 1279 * -1 if msgid1 < msgid2 1280 */ 1281 static int 1282 msgidcmp(struct strlist_st *id1, struct strlist_st *id2) 1283 { 1284 char c1, c2; 1285 int m1, m2; 1286 1287 m1 = 0; 1288 m2 = 0; 1289 1290 /*CONSTCOND*/ 1291 while (1) { 1292 id1 = get_next_ch(id1, &m1, &c1); 1293 id2 = get_next_ch(id2, &m2, &c2); 1294 1295 if ((c1 == 0) && (c2 == 0)) { 1296 return (0); 1297 } 1298 1299 if (c1 > c2) { 1300 return (1); 1301 } else if (c1 < c2) { 1302 return (-1); 1303 } 1304 } 1305 /*NOTREACHED*/ 1306 } /* msgidcmp */ 1307 1308 /* 1309 * Check if a ANSI string (which is a linked list itself) is a duplicate 1310 * of any string in the list of ANSI string. 1311 */ 1312 static int 1313 isduplicate(struct element_st *list, struct strlist_st *str) 1314 { 1315 struct element_st *p; 1316 1317 if (list == NULL) { 1318 return (FALSE); 1319 } 1320 1321 p = list; 1322 while (p != NULL) { 1323 if (p->msgid != NULL) { 1324 if (msgidcmp(p->msgid, str) == 0) { 1325 return (TRUE); 1326 } 1327 } 1328 p = p->next; 1329 } 1330 1331 return (FALSE); 1332 } /* isduplicate */ 1333 1334 /* 1335 * Extract a comment line and add to the linked list containing 1336 * comment block. 1337 * Each comment line is stored in the node. 1338 */ 1339 static void 1340 add_line_to_comment(void) 1341 { 1342 struct strlist_st *tmp_str; 1343 1344 tmp_str = new_strlist(); 1345 tmp_str->str = strdup(curr_line); 1346 tmp_str->next = NULL; 1347 1348 if (commhead == NULL) { 1349 /* Empty comment list */ 1350 commhead = tmp_str; 1351 commtail = tmp_str; 1352 } else { 1353 /* append it to the list */ 1354 commtail->next = tmp_str; 1355 commtail = commtail->next; 1356 } 1357 1358 is_last_comment_line = FALSE; 1359 } /* add_line_to_comment */ 1360 1361 /* 1362 * Add a double quoted string to the linked list containing ANSI string. 1363 */ 1364 static void 1365 add_qstring_to_str(void) 1366 { 1367 struct strlist_st *tmp_str; 1368 1369 tmp_str = new_strlist(); 1370 tmp_str->str = strdup(qstring_buf); 1371 tmp_str->next = NULL; 1372 1373 if (strhead == NULL) { 1374 /* Null ANSI string */ 1375 strhead = tmp_str; 1376 strtail = tmp_str; 1377 } else { 1378 /* Append it to the ANSI string linked list */ 1379 strtail->next = tmp_str; 1380 strtail = strtail->next; 1381 } 1382 1383 qstring_buf[0] = NULL; 1384 } /* add_qstring_to_str */ 1385 1386 /* 1387 * Finds the head of domain nodes given domain name. 1388 */ 1389 static struct domain_st * 1390 find_domain_node(char *dname) 1391 { 1392 struct domain_st *tmp_dom, *p; 1393 1394 /* 1395 * If -a option is specified everything will be written to the 1396 * default domain file. 1397 */ 1398 if (aflg == TRUE) { 1399 if (def_dom == NULL) { 1400 def_dom = new_domain(); 1401 } 1402 return (def_dom); 1403 } 1404 1405 if ((dname == NULL) || 1406 (dname[0] == NULL) || 1407 (strcmp(dname, default_domain) == 0)) { 1408 if (def_dom == NULL) { 1409 def_dom = new_domain(); 1410 } 1411 if (strcmp(dname, default_domain) == 0) { 1412 (void) fprintf(stderr, 1413 "%s \"%s\" is used in dgettext of file:%s line:%d.\n", 1414 "Warning: default domain name", 1415 default_domain, curr_file, curr_linenum); 1416 } 1417 return (def_dom); 1418 } else { 1419 p = dom_head; 1420 while (p != NULL) { 1421 if (strcmp(p->dname, dname) == 0) { 1422 return (p); 1423 } 1424 p = p->next; 1425 } 1426 1427 tmp_dom = new_domain(); 1428 tmp_dom->dname = strdup(dname); 1429 1430 if (dom_head == NULL) { 1431 dom_head = tmp_dom; 1432 dom_tail = tmp_dom; 1433 } else { 1434 dom_tail->next = tmp_dom; 1435 dom_tail = dom_tail->next; 1436 } 1437 return (tmp_dom); 1438 } 1439 } /* find_domain_node */ 1440 1441 /* 1442 * Frees the ANSI string linked list. 1443 */ 1444 static void 1445 free_strlist(struct strlist_st *ptr) 1446 { 1447 struct strlist_st *p; 1448 1449 p = ptr; 1450 ptr = NULL; 1451 while (p != NULL) { 1452 ptr = p->next; 1453 free(p->str); 1454 free(p); 1455 p = ptr; 1456 } 1457 } /* free_strlist */ 1458 1459 /* 1460 * Finds if a ANSI string is contained in the exclude file. 1461 */ 1462 static int 1463 isexcluded(struct strlist_st *strlist) 1464 { 1465 struct exclude_st *p; 1466 1467 p = excl_head; 1468 while (p != NULL) { 1469 if (msgidcmp(p->exstr, strlist) == 0) { 1470 return (TRUE); 1471 } 1472 p = p->next; 1473 } 1474 return (FALSE); 1475 } /* isexcluded */ 1476 1477 /* 1478 * Finds if a comment block is to be extracted. 1479 * 1480 * When -c option is specified, find out if comment block contains 1481 * comment-tag as a token separated by blanks. If it does, this 1482 * comment block is associated with the next msgid encountered. 1483 * Comment block is a linked list where each node contains one line 1484 * of comments. 1485 */ 1486 static int 1487 isextracted(struct strlist_st *strlist) 1488 { 1489 struct strlist_st *p; 1490 char *first, *pc; 1491 1492 1493 p = strlist; 1494 while (p != NULL) { 1495 first = strdup(p->str); 1496 while ((first != NULL) && (first[0] != NULL)) { 1497 pc = first; 1498 1499 /*CONSTCOND*/ 1500 while (1) { 1501 if (*pc == NULL) { 1502 break; 1503 } else if ((*pc == ' ') || (*pc == '\t')) { 1504 *pc++ = NULL; 1505 break; 1506 } 1507 pc++; 1508 } 1509 if (strcmp(first, comment_tag) == 0) { 1510 return (TRUE); 1511 } 1512 first = pc; 1513 } 1514 p = p->next; 1515 } /* while */ 1516 1517 /* 1518 * Not found. 1519 */ 1520 return (FALSE); 1521 } /* isextracted */ 1522 1523 /* 1524 * Adds ANSI string to the domain element list. 1525 */ 1526 static void 1527 add_str_to_element_list(int istextdomain, char *domain_list) 1528 { 1529 struct element_st *tmp_elem; 1530 struct element_st *p, *q; 1531 struct domain_st *tmp_dom; 1532 int result; 1533 1534 /* 1535 * This can happen if something like gettext(USAGE) is used 1536 * and it is impossible to get msgid for this gettext. 1537 * Since -x option should be used in this kind of cases, 1538 * it is OK not to catch msgid. 1539 */ 1540 if (strhead == NULL) { 1541 return; 1542 } 1543 1544 /* 1545 * The global variable curr_domain contains either NULL 1546 * for default_domain or domain name for dgettext(). 1547 */ 1548 tmp_dom = find_domain_node(domain_list); 1549 1550 /* 1551 * If this msgid is in the exclude file, 1552 * then free the linked list and return. 1553 */ 1554 if ((istextdomain == FALSE) && 1555 (isexcluded(strhead) == TRUE)) { 1556 free_strlist(strhead); 1557 strhead = strtail = NULL; 1558 return; 1559 } 1560 1561 tmp_elem = new_element(); 1562 tmp_elem->msgid = strhead; 1563 tmp_elem->istextdomain = istextdomain; 1564 /* 1565 * If -c option is specified and TAG matches, 1566 * then associate the comment to the next [d]gettext() calls 1567 * encountered in the source code. 1568 * textdomain() calls will not have any effect. 1569 */ 1570 if (istextdomain == FALSE) { 1571 if ((cflg == TRUE) && (commhead != NULL)) { 1572 if (isextracted(commhead) == TRUE) { 1573 tmp_elem->comment = commhead; 1574 } else { 1575 free_strlist(commhead); 1576 } 1577 commhead = commtail = NULL; 1578 } 1579 } 1580 1581 tmp_elem->linenum = linenum_saved; 1582 tmp_elem->fname = strdup(curr_file); 1583 1584 1585 if (sflg == TRUE) { 1586 /* 1587 * If this is textdomain() call and -s option is specified, 1588 * append this node to the textdomain linked list. 1589 */ 1590 if (istextdomain == TRUE) { 1591 if (tmp_dom->textdomain_head == NULL) { 1592 tmp_dom->textdomain_head = tmp_elem; 1593 tmp_dom->textdomain_tail = tmp_elem; 1594 } else { 1595 tmp_dom->textdomain_tail->next = tmp_elem; 1596 tmp_dom->textdomain_tail = tmp_elem; 1597 } 1598 strhead = strtail = NULL; 1599 return; 1600 } 1601 1602 /* 1603 * Insert the node to the properly sorted position. 1604 */ 1605 q = NULL; 1606 p = tmp_dom->gettext_head; 1607 while (p != NULL) { 1608 result = msgidcmp(strhead, p->msgid); 1609 if (result == 0) { 1610 /* 1611 * Duplicate id. Do not store. 1612 */ 1613 free_strlist(strhead); 1614 strhead = strtail = NULL; 1615 return; 1616 } else if (result > 0) { 1617 /* move to the next node */ 1618 q = p; 1619 p = p->next; 1620 } else { 1621 tmp_elem->next = p; 1622 if (q != NULL) { 1623 q->next = tmp_elem; 1624 } else { 1625 tmp_dom->gettext_head = tmp_elem; 1626 } 1627 strhead = strtail = NULL; 1628 return; 1629 } 1630 } /* while */ 1631 1632 /* 1633 * New msgid is the largest or empty list. 1634 */ 1635 if (q != NULL) { 1636 /* largest case */ 1637 q->next = tmp_elem; 1638 } else { 1639 /* empty list */ 1640 tmp_dom->gettext_head = tmp_elem; 1641 } 1642 } else { 1643 /* 1644 * Check if this msgid is already in the same domain. 1645 */ 1646 if (tmp_dom != NULL) { 1647 if (isduplicate(tmp_dom->gettext_head, 1648 tmp_elem->msgid) == TRUE) { 1649 tmp_elem->isduplicate = TRUE; 1650 } 1651 } 1652 /* 1653 * If -s option is not specified, then everything 1654 * is stored in gettext linked list. 1655 */ 1656 if (tmp_dom->gettext_head == NULL) { 1657 tmp_dom->gettext_head = tmp_elem; 1658 tmp_dom->gettext_tail = tmp_elem; 1659 } else { 1660 tmp_dom->gettext_tail->next = tmp_elem; 1661 tmp_dom->gettext_tail = tmp_elem; 1662 } 1663 } 1664 1665 strhead = strtail = NULL; 1666 } /* add_str_to_element_list */ 1667 1668 /* 1669 * Write all domain linked list to the files. 1670 */ 1671 static void 1672 write_all_files(void) 1673 { 1674 struct domain_st *tmp; 1675 1676 /* 1677 * Write out default domain file. 1678 */ 1679 write_one_file(def_dom); 1680 1681 /* 1682 * If dgettext() exists and -a option is not used, 1683 * then there are non-empty linked list. 1684 */ 1685 tmp = dom_head; 1686 while (tmp != NULL) { 1687 write_one_file(tmp); 1688 tmp = tmp->next; 1689 } 1690 } /* write_all_files */ 1691 1692 /* 1693 * add an element_st list to the linked list. 1694 */ 1695 static void 1696 add_node_to_polist(struct element_st **pohead, 1697 struct element_st **potail, struct element_st *elem) 1698 { 1699 if (elem == NULL) { 1700 return; 1701 } 1702 1703 if (*pohead == NULL) { 1704 *pohead = *potail = elem; 1705 } else { 1706 (*potail)->next = elem; 1707 *potail = (*potail)->next; 1708 } 1709 } /* add_node_to_polist */ 1710 1711 #define INIT_STATE 0 1712 #define IN_MSGID 1 1713 #define IN_MSGSTR 2 1714 #define IN_COMMENT 3 1715 /* 1716 * Reads existing po file into the linked list and returns the head 1717 * of the linked list. 1718 */ 1719 static struct element_st * 1720 read_po(char *fname) 1721 { 1722 struct element_st *tmp_elem = NULL; 1723 struct element_st *ehead = NULL, *etail = NULL; 1724 struct strlist_st *comment_tail = NULL; 1725 struct strlist_st *msgid_tail = NULL; 1726 struct strlist_st *msgstr_tail = NULL; 1727 int state = INIT_STATE; 1728 char line [MAX_STRING_LEN]; 1729 FILE *fp; 1730 1731 if ((fp = fopen(fname, "r")) == NULL) { 1732 return (NULL); 1733 } 1734 1735 while (fgets(line, MAX_STRING_LEN, fp) != NULL) { 1736 /* 1737 * Line starting with # is a comment line and ignored. 1738 * Blank line is ignored, too. 1739 */ 1740 if (line[0] == '\n') { 1741 continue; 1742 } else if (line[0] == '#') { 1743 /* 1744 * If tmp_elem is not NULL, there is msgid pair 1745 * stored. Therefore, add it. 1746 */ 1747 if ((tmp_elem != NULL) && (state == IN_MSGSTR)) { 1748 add_node_to_polist(&ehead, &etail, tmp_elem); 1749 } 1750 1751 if ((state == INIT_STATE) || (state == IN_MSGSTR)) { 1752 state = IN_COMMENT; 1753 tmp_elem = new_element(); 1754 tmp_elem->comment = comment_tail = 1755 new_strlist(); 1756 /* 1757 * remove new line and skip "# " 1758 * in the beginning of the existing 1759 * comment line. 1760 */ 1761 line[strlen(line)-1] = 0; 1762 comment_tail->str = strdup(line+2); 1763 } else if (state == IN_COMMENT) { 1764 comment_tail->next = new_strlist(); 1765 comment_tail = comment_tail->next; 1766 /* 1767 * remove new line and skip "# " 1768 * in the beginning of the existing 1769 * comment line. 1770 */ 1771 line[strlen(line)-1] = 0; 1772 comment_tail->str = strdup(line+2); 1773 } 1774 1775 } else if (strncmp(line, "domain", 6) == 0) { 1776 /* ignore domain line */ 1777 continue; 1778 } else if (strncmp(line, "msgid", 5) == 0) { 1779 if (state == IN_MSGSTR) { 1780 add_node_to_polist(&ehead, &etail, tmp_elem); 1781 tmp_elem = new_element(); 1782 } else if (state == INIT_STATE) { 1783 tmp_elem = new_element(); 1784 } 1785 1786 state = IN_MSGID; 1787 trim_line(line); 1788 tmp_elem->msgid = msgid_tail = new_strlist(); 1789 msgid_tail->str = strdup(line); 1790 1791 } else if (strncmp(line, "msgstr", 6) == 0) { 1792 state = IN_MSGSTR; 1793 trim_line(line); 1794 tmp_elem->msgstr = msgstr_tail = new_strlist(); 1795 msgstr_tail->str = strdup(line); 1796 } else { 1797 /* 1798 * If more than one line of string forms msgid, 1799 * append it to the string linked list. 1800 */ 1801 if (state == IN_MSGID) { 1802 trim_line(line); 1803 msgid_tail->next = new_strlist(); 1804 msgid_tail = msgid_tail->next; 1805 msgid_tail->str = strdup(line); 1806 } else if (state == IN_MSGSTR) { 1807 trim_line(line); 1808 msgstr_tail->next = new_strlist(); 1809 msgstr_tail = msgstr_tail->next; 1810 msgstr_tail->str = strdup(line); 1811 } 1812 } 1813 } /* while */ 1814 1815 /* 1816 * To insert the last msgid pair. 1817 */ 1818 if (tmp_elem != NULL) { 1819 add_node_to_polist(&ehead, &etail, tmp_elem); 1820 } 1821 1822 #ifdef DEBUG 1823 { 1824 struct domain_st *tmp_domain = new_domain(); 1825 char tmpstr[256]; 1826 1827 sprintf(tmpstr, "existing_po file : <%s>", fname); 1828 tmp_domain->dname = strdup(tmpstr); 1829 tmp_domain->gettext_head = ehead; 1830 printf("======= existing po file <%s> ========\n", fname); 1831 print_one_domain(tmp_domain); 1832 } 1833 #endif /* DEBUG */ 1834 1835 (void) fclose(fp); 1836 return (ehead); 1837 } /* read_po */ 1838 1839 /* 1840 * This function will append the second list to the first list. 1841 * If the msgid in the second list contains msgid in the first list, 1842 * it will be marked as duplicate. 1843 */ 1844 static struct element_st * 1845 append_list(struct element_st *l1, struct element_st *l2) 1846 { 1847 struct element_st *p = NULL, *q = NULL, *l1_tail = NULL; 1848 1849 if (l1 == NULL) 1850 return (l2); 1851 if (l2 == NULL) 1852 return (l1); 1853 1854 /* 1855 * in this while loop, just mark isduplicate field of node in the 1856 * l2 list if the same msgid exists in l1 list. 1857 */ 1858 p = l2; 1859 while (p != NULL) { 1860 q = l1; 1861 while (q != NULL) { 1862 if (msgidcmp(p->msgid, q->msgid) == 0) { 1863 p->isduplicate = TRUE; 1864 break; 1865 } 1866 q = q->next; 1867 } 1868 p = p->next; 1869 } 1870 1871 /* Now connect two linked lists. */ 1872 l1_tail = l1; 1873 while (l1_tail->next != NULL) { 1874 if (l1->next == NULL) 1875 break; 1876 l1_tail = l1_tail-> next; 1877 } 1878 l1_tail->next = l2; 1879 1880 return (l1); 1881 } /* append_list */ 1882 1883 /* 1884 * Writes one domain list to the file. 1885 */ 1886 static void 1887 write_one_file(struct domain_st *head) 1888 { 1889 FILE *fp; 1890 char fname [MAX_PATH_LEN]; 1891 char dname [MAX_DOMAIN_LEN]; 1892 struct element_st *p; 1893 struct element_st *existing_po_list; 1894 1895 /* 1896 * If head is NULL, then it still has to create .po file 1897 * so that it will guarantee that the previous .po file was 1898 * alwasys deleted. 1899 * This is why checking NULL pointer has been moved to after 1900 * creating .po file. 1901 */ 1902 1903 /* 1904 * If domain name is NULL, it is the default domain list. 1905 * The domain name is either "messages" or specified by option -d. 1906 * The default domain name is contained in default_domain variable. 1907 */ 1908 dname[0] = NULL; 1909 if ((head != NULL) && 1910 (head->dname != NULL)) { 1911 (void) strcpy(dname, head->dname); 1912 } else { 1913 (void) strcpy(dname, default_domain); 1914 } 1915 1916 /* 1917 * path is the current directory if not specified by option -p. 1918 */ 1919 fname[0] = 0; 1920 if (pflg == TRUE) { 1921 (void) strcat(fname, pathname); 1922 (void) strcat(fname, "/"); 1923 } 1924 (void) strcat(fname, dname); 1925 (void) strcat(fname, ".po"); 1926 1927 /* 1928 * If -j flag is specified, read exsiting .po file and 1929 * append the current list to the end of the list read from 1930 * the existing .po file. 1931 */ 1932 if (jflg == TRUE) { 1933 /* 1934 * If head is NULL, we don't have to change existing file. 1935 * Therefore, just return it. 1936 */ 1937 if (head == NULL) { 1938 return; 1939 } 1940 existing_po_list = read_po(fname); 1941 head->gettext_head = append_list(existing_po_list, 1942 head->gettext_head); 1943 #ifdef DEBUG 1944 if (head->dname != NULL) { 1945 printf("===after merge (-j option): <%s>===\n", 1946 head->dname); 1947 } else { 1948 printf("===after merge (-j option): <NULL>===\n"); 1949 } 1950 print_one_domain(head); 1951 #endif 1952 1953 } /* if jflg */ 1954 1955 if ((fp = fopen(fname, "w")) == NULL) { 1956 (void) fprintf(stderr, 1957 "ERROR, can't open output file: %s\n", fname); 1958 exit(2); 1959 } 1960 1961 (void) fprintf(fp, "domain \"%s\"\n", dname); 1962 1963 /* See comments above in the beginning of this function */ 1964 if (head == NULL) 1965 return; 1966 1967 /* 1968 * There are separate storage for textdomain() calls if 1969 * -s option is used (textdomain_head linked list). 1970 * Otherwise, textdomain() is mixed with gettext(0 and dgettext(). 1971 * If mixed, the boolean varaible istextdomain is used to see 1972 * if the current node contains textdomain() or [d]gettext(). 1973 */ 1974 if (sflg == TRUE) { 1975 p = head->textdomain_head; 1976 while (p != NULL) { 1977 /* 1978 * textdomain output line already contains 1979 * FIle name and line number information. 1980 * Therefore, does not have to check for nflg. 1981 */ 1982 output_textdomain(fp, p); 1983 p = p->next; 1984 } 1985 } 1986 1987 p = head->gettext_head; 1988 while (p != NULL) { 1989 1990 /* 1991 * Comment is printed only if -c is used and 1992 * associated with gettext or dgettext. 1993 * textdomain is not associated with comments. 1994 * Changes: 1995 * comments should be extracted in case of -j option 1996 * because there are read from exising file. 1997 */ 1998 if (((cflg == TRUE) || (jflg == TRUE)) && 1999 (p->istextdomain != TRUE)) { 2000 output_comment(fp, p->comment); 2001 } 2002 2003 /* 2004 * If -n is used, then file number and line number 2005 * information is printed. 2006 * In case of textdomain(), this information is redundant 2007 * and is not printed. 2008 * If linenum is 0, it means this information has been 2009 * read from existing po file and it already contains 2010 * file and line number info as a comment line. So, it 2011 * should not printed in such case. 2012 */ 2013 if ((nflg == TRUE) && (p->istextdomain == FALSE) && 2014 (p->linenum > 0)) { 2015 (void) fprintf(fp, "# File:%s, line:%d\n", 2016 p->fname, p->linenum); 2017 } 2018 2019 /* 2020 * Depending on the type of node, output textdomain comment 2021 * or msgid. 2022 */ 2023 if ((sflg == FALSE) && 2024 (p->istextdomain == TRUE)) { 2025 output_textdomain(fp, p); 2026 } else { 2027 output_msgid(fp, p->msgid, p->isduplicate); 2028 } 2029 p = p->next; 2030 2031 } /* while */ 2032 2033 (void) fclose(fp); 2034 } /* write_one_file */ 2035 2036 /* 2037 * Prints out textdomain call as a comment line with file name and 2038 * the line number information. 2039 */ 2040 static void 2041 output_textdomain(FILE *fp, struct element_st *p) 2042 { 2043 2044 if (p == NULL) 2045 return; 2046 2047 /* 2048 * Write textdomain() line as a comment. 2049 */ 2050 (void) fprintf(fp, "# File:%s, line:%d, textdomain(\"%s\");\n", 2051 p->fname, p->linenum, p->msgid->str); 2052 } /* output_textdomain */ 2053 2054 /* 2055 * Prints out comments from linked list. 2056 */ 2057 static void 2058 output_comment(FILE *fp, struct strlist_st *p) 2059 { 2060 if (p == NULL) 2061 return; 2062 2063 /* 2064 * Write comment section. 2065 */ 2066 while (p != NULL) { 2067 (void) fprintf(fp, "# %s\n", p->str); 2068 p = p->next; 2069 } 2070 } /* output_comment */ 2071 2072 /* 2073 * Prints out msgid along with msgstr. 2074 */ 2075 static void 2076 output_msgid(FILE *fp, struct strlist_st *p, int duplicate) 2077 { 2078 struct strlist_st *q; 2079 2080 if (p == NULL) 2081 return; 2082 2083 /* 2084 * Write msgid section. 2085 * If duplciate flag is ON, prepend "# " in front of every line 2086 * so that they are considered as comment lines in .po file. 2087 */ 2088 if (duplicate == TRUE) { 2089 (void) fprintf(fp, "# "); 2090 } 2091 (void) fprintf(fp, "msgid \"%s\"\n", p->str); 2092 q = p->next; 2093 while (q != NULL) { 2094 if (duplicate == TRUE) { 2095 (void) fprintf(fp, "# "); 2096 } 2097 (void) fprintf(fp, " \"%s\"\n", q->str); 2098 q = q->next; 2099 } 2100 2101 /* 2102 * Write msgstr section. 2103 * if -M option is specified, append <suffix> to msgid. 2104 * if -m option is specified, prepend <prefix> to msgid. 2105 */ 2106 if (duplicate == TRUE) { 2107 (void) fprintf(fp, "# "); 2108 } 2109 if ((mflg == TRUE) || (Mflg == TRUE)) { 2110 if (mflg == TRUE) { 2111 /* 2112 * If single line msgid, add suffix to the same line 2113 */ 2114 if ((Mflg == TRUE) && (p->next == NULL)) { 2115 /* -M and -m and single line case */ 2116 (void) fprintf(fp, 2117 "msgstr \"%s%s%s\"\n", 2118 prefix, p->str, suffix); 2119 } else { 2120 /* -M and -m and multi line case */ 2121 (void) fprintf(fp, 2122 "msgstr \"%s%s\"\n", 2123 prefix, p->str); 2124 } 2125 } else { 2126 if ((Mflg == TRUE) && (p->next == NULL)) { 2127 /* -M only with single line case */ 2128 (void) fprintf(fp, "msgstr \"%s%s\"\n", 2129 p->str, suffix); 2130 } else { 2131 /* -M only with multi line case */ 2132 (void) fprintf(fp, "msgstr \"%s\"\n", p->str); 2133 } 2134 } 2135 q = p->next; 2136 while (q != NULL) { 2137 if (duplicate == TRUE) { 2138 (void) fprintf(fp, "# "); 2139 } 2140 (void) fprintf(fp, " \"%s\"\n", q->str); 2141 q = q->next; 2142 } 2143 /* 2144 * If multi line msgid, add suffix after the last line. 2145 */ 2146 if ((Mflg == TRUE) && (p->next != NULL) && 2147 (suffix[0] != NULL)) { 2148 (void) fprintf(fp, " \"%s\"\n", suffix); 2149 } 2150 } else { 2151 (void) fprintf(fp, "msgstr\n"); 2152 } 2153 } /* output_msgid */ 2154 2155 /* 2156 * Malloc a new element node and initialize fields. 2157 */ 2158 static struct element_st * 2159 new_element(void) 2160 { 2161 struct element_st *tmp; 2162 2163 tmp = (struct element_st *)malloc(sizeof (struct element_st)); 2164 tmp->istextdomain = FALSE; 2165 tmp->isduplicate = FALSE; 2166 tmp->msgid = NULL; 2167 tmp->msgstr = NULL; 2168 tmp->comment = NULL; 2169 tmp->fname = NULL; 2170 tmp->linenum = 0; 2171 tmp->next = NULL; 2172 2173 return (tmp); 2174 } /* new_element */ 2175 2176 /* 2177 * Malloc a new domain node and initialize fields. 2178 */ 2179 static struct domain_st * 2180 new_domain(void) 2181 { 2182 struct domain_st *tmp; 2183 2184 tmp = (struct domain_st *)malloc(sizeof (struct domain_st)); 2185 tmp->dname = NULL; 2186 tmp->gettext_head = NULL; 2187 tmp->gettext_tail = NULL; 2188 tmp->textdomain_head = NULL; 2189 tmp->textdomain_tail = NULL; 2190 tmp->next = NULL; 2191 2192 return (tmp); 2193 } /* new_domain */ 2194 2195 /* 2196 * Malloc a new string list node and initialize fields. 2197 */ 2198 static struct strlist_st * 2199 new_strlist(void) 2200 { 2201 struct strlist_st *tmp; 2202 2203 tmp = (struct strlist_st *)malloc(sizeof (struct strlist_st)); 2204 tmp->str = NULL; 2205 tmp->next = NULL; 2206 2207 return (tmp); 2208 } /* new_strlist */ 2209 2210 /* 2211 * Malloc a new exclude string list node and initialize fields. 2212 */ 2213 static struct exclude_st * 2214 new_exclude(void) 2215 { 2216 struct exclude_st *tmp; 2217 2218 tmp = (struct exclude_st *)malloc(sizeof (struct exclude_st)); 2219 tmp->exstr = NULL; 2220 tmp->next = NULL; 2221 2222 return (tmp); 2223 } /* new_exclude */ 2224 2225 /* 2226 * Local version of strcat to keep within maximum string size. 2227 */ 2228 static void 2229 lstrcat(char *s1, const char *s2) 2230 { 2231 char *es1 = &s1[MAX_STRING_LEN]; 2232 char *ss1 = s1; 2233 2234 while (*s1++) 2235 ; 2236 --s1; 2237 while (*s1++ = *s2++) 2238 if (s1 >= es1) { 2239 s1[-1] = '\0'; 2240 if ((in_comment == TRUE || in_quote == TRUE) && 2241 (warn_linenum != curr_linenum)) { 2242 if (stdin_only == FALSE) { 2243 (void) fprintf(stderr, 2244 "WARNING: file %s line %d exceeds "\ 2245 "%d characters: \"%15.15s\"\n", 2246 curr_file, curr_linenum, 2247 MAX_STRING_LEN, ss1); 2248 } else { 2249 (void) fprintf(stderr, 2250 "WARNING: line %d exceeds "\ 2251 "%d characters: \"%15.15s\"\n", 2252 curr_linenum, MAX_STRING_LEN, ss1); 2253 } 2254 warn_linenum = curr_linenum; 2255 } 2256 break; 2257 } 2258 } /* lstrcat */ 2259 2260 #ifdef DEBUG 2261 /* 2262 * Debug print routine. Compiled only with DEBUG on. 2263 */ 2264 void 2265 print_element_list(struct element_st *q) 2266 { 2267 struct strlist_st *r; 2268 2269 while (q != NULL) { 2270 printf(" istextdomain = %d\n", q->istextdomain); 2271 printf(" isduplicate = %d\n", q->isduplicate); 2272 if ((q->msgid != NULL) && (q->msgid->str != NULL)) { 2273 printf(" msgid = <%s>\n", q->msgid->str); 2274 r = q->msgid->next; 2275 while (r != NULL) { 2276 printf(" <%s>\n", r->str); 2277 r = r->next; 2278 } 2279 } else { 2280 printf(" msgid = <NULL>\n"); 2281 } 2282 if ((q->msgstr != NULL) && (q->msgstr->str != NULL)) { 2283 printf(" msgstr= <%s>\n", q->msgstr->str); 2284 r = q->msgstr->next; 2285 while (r != NULL) { 2286 printf(" <%s>\n", r->str); 2287 r = r->next; 2288 } 2289 } else { 2290 printf(" msgstr= <NULL>\n"); 2291 } 2292 2293 if (q->comment == NULL) { 2294 printf(" comment = <NULL>\n"); 2295 } else { 2296 printf(" comment = <%s>\n", q->comment->str); 2297 r = q->comment->next; 2298 while (r != NULL) { 2299 printf(" <%s>\n", r->str); 2300 r = r->next; 2301 } 2302 } 2303 2304 if (q->fname == NULL) { 2305 printf(" fname = <NULL>\n"); 2306 } else { 2307 printf(" fname = <%s>\n", q->fname); 2308 } 2309 printf(" linenum = %d\n", q->linenum); 2310 printf("\n"); 2311 q = q->next; 2312 } 2313 } 2314 2315 /* 2316 * Debug print routine. Compiled only with DEBUG on. 2317 */ 2318 void 2319 print_one_domain(struct domain_st *p) 2320 { 2321 struct element_st *q; 2322 2323 if (p == NULL) { 2324 printf("domain pointer = <NULL>\n"); 2325 return; 2326 } else if (p->dname == NULL) { 2327 printf("domain_name = <%s>\n", "<NULL>"); 2328 } else { 2329 printf("domain_name = <%s>\n", p->dname); 2330 } 2331 q = p->gettext_head; 2332 print_element_list(q); 2333 2334 q = p->textdomain_head; 2335 print_element_list(q); 2336 } /* print_one_domain */ 2337 2338 void 2339 print_all_domain(struct domain_st *dom_list) 2340 { 2341 struct domain_st *p; 2342 struct element_st *q; 2343 2344 p = dom_list; 2345 while (p != NULL) { 2346 print_one_domain(p); 2347 p = p->next; 2348 } /* while */ 2349 } /* print_all_domain */ 2350 #endif 2351