1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License, Version 1.0 only 6 * (the "License"). You may not use this file except in compliance 7 * with the License. 8 * 9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10 * or http://www.opensolaris.org/os/licensing. 11 * See the License for the specific language governing permissions 12 * and limitations under the License. 13 * 14 * When distributing Covered Code, include this CDDL HEADER in each 15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16 * If applicable, add the following below this CDDL HEADER, with the 17 * fields enclosed by brackets "[]" replaced with your own identifying 18 * information: Portions Copyright [yyyy] [name of copyright owner] 19 * 20 * CDDL HEADER END 21 */ 22 /* 23 * Copyright 1991, 1999, 2001-2002 Sun Microsystems, Inc. 24 * All rights reserved. 25 * Use is subject to license terms. 26 */ 27 28 #include <ctype.h> 29 #include <stdio.h> 30 #include <stdlib.h> 31 #include <string.h> 32 33 #define TRUE 1 34 #define FALSE 0 35 #define MAX_PATH_LEN 1024 36 #define MAX_DOMAIN_LEN 1024 37 #define MAX_STRING_LEN 2048 38 39 #define USAGE "Usage: xgettext [-a [-x exclude-file]] [-jns]\ 40 [-c comment-tag]\n [-d default-domain] [-m prefix] \ 41 [-M suffix] [-p pathname] files ...\n\ 42 xgettext -h\n" 43 44 #define DEFAULT_DOMAIN "messages" 45 46 extern char yytext[]; 47 extern int yylex(void); 48 49 /* 50 * Contains a list of strings to be used to store ANSI-C style string. 51 * Each quoted string is stored in one node. 52 */ 53 struct strlist_st { 54 char *str; 55 struct strlist_st *next; 56 }; 57 58 /* 59 * istextdomain : Boolean telling if this node contains textdomain call. 60 * isduplicate : Boolean telling if this node duplicate of any other msgid. 61 * msgid : contains msgid or textdomain if istextdomain is true. 62 * msgstr : contains msgstr. 63 * comment : comment extracted in case of -c option. 64 * fname : tells which file contains msgid. 65 * linenum : line number in the file. 66 * next : Next node. 67 */ 68 struct element_st { 69 char istextdomain; 70 char isduplicate; 71 struct strlist_st *msgid; 72 struct strlist_st *msgstr; 73 struct strlist_st *comment; 74 char *fname; 75 int linenum; 76 struct element_st *next; 77 }; 78 79 /* 80 * dname : domain name. NULL if default domain. 81 * gettext_head : Head of linked list containing [d]gettext(). 82 * gettext_tail : Tail of linked list containing [d]gettext(). 83 * textdomain_head : Head of linked list containing textdomain(). 84 * textdomain_tail : Tail of linked list containing textdomain(). 85 * next : Next node. 86 * 87 * Each domain contains two linked list. 88 * (gettext_head, textdomain_head) 89 * If -s option is used, then textdomain_head contains all 90 * textdomain() calls and no textdomain() calls are stored in gettext_head. 91 * If -s option is not used, textdomain_head is empty list and 92 * gettext_head contains all gettext() dgettext(), and textdomain() calls. 93 */ 94 struct domain_st { 95 char *dname; 96 struct element_st *gettext_head; 97 struct element_st *gettext_tail; 98 struct element_st *textdomain_head; 99 struct element_st *textdomain_tail; 100 struct domain_st *next; 101 }; 102 103 /* 104 * There are two domain linked lists. 105 * def_dom contains default domain linked list and 106 * dom_head contains all other deomain linked lists to be created by 107 * dgettext() calls. 108 */ 109 static struct domain_st *def_dom = NULL; 110 static struct domain_st *dom_head = NULL; 111 static struct domain_st *dom_tail = NULL; 112 113 /* 114 * This linked list contains a list of strings to be excluded when 115 * -x option is used. 116 */ 117 static struct exclude_st { 118 struct strlist_st *exstr; 119 struct exclude_st *next; 120 } *excl_head; 121 122 /* 123 * All option flags and values for each option if any. 124 */ 125 static int aflg = FALSE; 126 static int cflg = FALSE; 127 static char *comment_tag = NULL; 128 static char *default_domain = NULL; 129 static int hflg = FALSE; 130 static int jflg = FALSE; 131 static int mflg = FALSE; 132 static int Mflg = FALSE; 133 static char *suffix = NULL; 134 static char *prefix = NULL; 135 static int nflg = FALSE; 136 static int pflg = FALSE; 137 static char *pathname = NULL; 138 static int sflg = FALSE; 139 static int tflg = FALSE; /* Undocumented option to extract dcgettext */ 140 static int xflg = FALSE; 141 static char *exclude_file = NULL; 142 143 /* 144 * Each variable shows the current state of parsing input file. 145 * 146 * in_comment : Means inside comment block (C or C++). 147 * in_cplus_comment : Means inside C++ comment block. 148 * in_gettext : Means inside gettext call. 149 * in_dgettext : Means inside dgettext call. 150 * in_dcgettext : Means inside dcgettext call. 151 * in_textdomain : Means inside textdomain call. 152 * in_str : Means currently processing ANSI style string. 153 * in_quote : Means currently processing double quoted string. 154 * in_skippable_string : Means currently processing double quoted string, 155 * that occurs outside a call to gettext, dgettext, 156 * dcgettext, textdomain, with -a not specified. 157 * is_last_comment_line : Means the current line is the last line 158 * of the comment block. This is necessary because 159 * in_comment becomes FALSE when '* /' is encountered. 160 * is_first_comma_found : This is used only for dcgettext because dcgettext() 161 * requires 2 commas. So need to do different action 162 * depending on which commas encountered. 163 * num_nested_open_paren : This keeps track of the number of open parens to 164 * handle dcgettext ((const char *)0,"msg",LC_TIME); 165 */ 166 static int in_comment = FALSE; 167 static int in_cplus_comment = FALSE; 168 static int in_gettext = FALSE; 169 static int in_dgettext = FALSE; 170 static int in_dcgettext = FALSE; 171 static int in_textdomain = FALSE; 172 static int in_str = FALSE; 173 static int in_quote = FALSE; 174 static int is_last_comment_line = FALSE; 175 static int is_first_comma_found = FALSE; 176 static int in_skippable_string = FALSE; 177 static int num_nested_open_paren = 0; 178 179 /* 180 * This variable contains the first line of gettext(), dgettext(), or 181 * textdomain() calls. 182 * This is necessary for multiple lines of a single call to store 183 * the starting line. 184 */ 185 static int linenum_saved = 0; 186 187 int stdin_only = FALSE; /* Read input from stdin */ 188 189 /* 190 * curr_file : Contains current file name processed. 191 * curr_domain : Contains the current domain for each dgettext(). 192 * This is NULL for gettext(). 193 * curr_line : Contains the current line processed. 194 * qstring_buf : Contains the double quoted string processed. 195 * curr_linenum : Line number being processed in the current input file. 196 * warn_linenum : Line number of current warning message. 197 */ 198 char curr_file[MAX_PATH_LEN]; 199 static char curr_domain[MAX_DOMAIN_LEN]; 200 static char curr_line[MAX_STRING_LEN]; 201 static char qstring_buf[MAX_STRING_LEN]; 202 int curr_linenum = 1; 203 int warn_linenum = 0; 204 205 /* 206 * strhead : This list contains ANSI style string. 207 * Each node contains double quoted string. 208 * strtail : This is the tail of strhead. 209 * commhead : This list contains comments string. 210 * Each node contains one line of comment. 211 * commtail : This is the tail of commhead. 212 */ 213 static struct strlist_st *strhead = NULL; 214 static struct strlist_st *strtail = NULL; 215 static struct strlist_st *commhead = NULL; 216 static struct strlist_st *commtail = NULL; 217 218 /* 219 * gargc : Same as argc. Used to pass argc to lex routine. 220 * gargv : Same as argv. Used to pass argc to lex routine. 221 */ 222 int gargc; 223 char **gargv; 224 225 static void add_line_to_comment(void); 226 static void add_qstring_to_str(void); 227 static void add_str_to_element_list(int, char *); 228 static void copy_strlist_to_str(char *, struct strlist_st *); 229 static void end_ansi_string(void); 230 static void free_strlist(struct strlist_st *); 231 void handle_newline(void); 232 static void initialize_globals(void); 233 static void output_comment(FILE *, struct strlist_st *); 234 static void output_msgid(FILE *, struct strlist_st *, int); 235 static void output_textdomain(FILE *, struct element_st *); 236 static void print_help(void); 237 static void read_exclude_file(void); 238 static void trim_line(char *); 239 static void write_all_files(void); 240 static void write_one_file(struct domain_st *); 241 242 static void lstrcat(char *, const char *); 243 244 /* 245 * Utility functions to malloc a node and initialize fields. 246 */ 247 static struct domain_st *new_domain(void); 248 static struct strlist_st *new_strlist(void); 249 static struct element_st *new_element(void); 250 static struct exclude_st *new_exclude(void); 251 252 /* 253 * Main program of xgettext. 254 */ 255 int 256 main(int argc, char **argv) 257 { 258 int opterr = FALSE; 259 int c; 260 261 initialize_globals(); 262 263 while ((c = getopt(argc, argv, "jhax:nsc:d:m:M:p:t")) != EOF) { 264 switch (c) { 265 case 'a': 266 aflg = TRUE; 267 break; 268 case 'c': 269 cflg = TRUE; 270 comment_tag = optarg; 271 break; 272 case 'd': 273 default_domain = optarg; 274 break; 275 case 'h': 276 hflg = TRUE; 277 break; 278 case 'j': 279 jflg = TRUE; 280 break; 281 case 'M': 282 Mflg = TRUE; 283 suffix = optarg; 284 break; 285 case 'm': 286 mflg = TRUE; 287 prefix = optarg; 288 break; 289 case 'n': 290 nflg = TRUE; 291 break; 292 case 'p': 293 pflg = TRUE; 294 pathname = optarg; 295 break; 296 case 's': 297 sflg = TRUE; 298 break; 299 case 't': 300 tflg = TRUE; 301 break; 302 case 'x': 303 xflg = TRUE; 304 exclude_file = optarg; 305 break; 306 case '?': 307 opterr = TRUE; 308 break; 309 } 310 } 311 312 /* if -h is used, ignore all other options. */ 313 if (hflg == TRUE) { 314 (void) fprintf(stderr, USAGE); 315 print_help(); 316 exit(0); 317 } 318 319 /* -x can be used only with -a */ 320 if ((xflg == TRUE) && (aflg == FALSE)) 321 opterr = TRUE; 322 323 /* -j cannot be used with -a */ 324 if ((jflg == TRUE) && (aflg == TRUE)) { 325 (void) fprintf(stderr, 326 "-a and -j options cannot be used together.\n"); 327 opterr = TRUE; 328 } 329 330 /* -j cannot be used with -s */ 331 if ((jflg == TRUE) && (sflg == TRUE)) { 332 (void) fprintf(stderr, 333 "-j and -s options cannot be used together.\n"); 334 opterr = TRUE; 335 } 336 337 if (opterr == TRUE) { 338 (void) fprintf(stderr, USAGE); 339 exit(2); 340 } 341 342 /* error, if no files are specified. */ 343 if (optind == argc) { 344 (void) fprintf(stderr, USAGE); 345 exit(2); 346 } 347 348 if (xflg == TRUE) { 349 read_exclude_file(); 350 } 351 352 /* If files are -, then read from stdin */ 353 if (argv[optind][0] == '-') { 354 stdin_only = TRUE; 355 optind++; 356 } else { 357 stdin_only = FALSE; 358 } 359 360 /* Store argc and argv to pass to yylex() */ 361 gargc = argc; 362 gargv = argv; 363 364 #ifdef DEBUG 365 (void) printf("optind=%d\n", optind); 366 { 367 int i = optind; 368 for (; i < argc; i++) { 369 (void) printf(" %d, <%s>\n", i, argv[i]); 370 } 371 } 372 #endif 373 374 if (stdin_only == FALSE) { 375 if (freopen(argv[optind], "r", stdin) == NULL) { 376 (void) fprintf(stderr, 377 "ERROR, can't open input file: %s\n", argv[optind]); 378 exit(2); 379 } 380 (void) strcpy(curr_file, gargv[optind]); 381 optind++; 382 } 383 384 /* 385 * Process input. 386 */ 387 (void) yylex(); 388 389 #ifdef DEBUG 390 printf("\n======= default_domain ========\n"); 391 print_one_domain(def_dom); 392 printf("======= domain list ========\n"); 393 print_all_domain(dom_head); 394 #endif 395 396 /* 397 * Write out all .po files. 398 */ 399 write_all_files(); 400 401 return (0); 402 } /* main */ 403 404 /* 405 * Prints help information for each option. 406 */ 407 static void 408 print_help(void) 409 { 410 (void) fprintf(stderr, "\n"); 411 (void) fprintf(stderr, "-a\t\t\tfind ALL strings\n"); 412 (void) fprintf(stderr, 413 "-c <comment-tag>\tget comments containing <flag>\n"); 414 (void) fprintf(stderr, 415 "-d <default-domain>\tuse <default-domain> for default domain\n"); 416 (void) fprintf(stderr, "-h\t\t\tHelp\n"); 417 (void) fprintf(stderr, 418 "-j\t\t\tupdate existing file with the current result\n"); 419 (void) fprintf(stderr, 420 "-M <suffix>\t\tfill in msgstr with msgid<suffix>\n"); 421 (void) fprintf(stderr, 422 "-m <prefix>\t\tfill in msgstr with <prefix>msgid\n"); 423 (void) fprintf(stderr, 424 "-n\t\t\tline# file name and line number info in output\n"); 425 (void) fprintf(stderr, 426 "-p <pathname>\t\tuse <pathname> for output file directory\n"); 427 (void) fprintf(stderr, 428 "-s\t\t\tgenerate sorted output files\n"); 429 (void) fprintf(stderr, "-x <exclude-file>\texclude strings in file " 430 "<exclude-file> from output\n"); 431 (void) fprintf(stderr, 432 "-\t\t\tread stdin, use as a filter (input only)\n"); 433 } /* print_help */ 434 435 /* 436 * Extract file name and line number information from macro line 437 * and set the global variable accordingly. 438 * The valid line format is 439 * 1) # nnn 440 * or 441 * 2) # nnn "xxxxx" 442 * where nnn is line number and xxxxx is file name. 443 */ 444 static void 445 extract_filename_linenumber(char *mline) 446 { 447 int num; 448 char *p, *q, *r; 449 450 /* 451 * mline can contain multi newline. 452 * line number should be increased by the number of newlines. 453 */ 454 p = mline; 455 while ((p = strchr(p, '\n')) != NULL) { 456 p++; 457 curr_linenum++; 458 } 459 p = strchr(mline, ' '); 460 if (p == NULL) 461 return; 462 q = strchr(++p, ' '); 463 if (q == NULL) { 464 /* case 1 */ 465 if ((num = atoi(p)) > 0) { 466 curr_linenum = num; 467 return; 468 } 469 } else { 470 /* case 2 */ 471 *q++ = 0; 472 if (*q == '"') { 473 q++; 474 r = strchr(q, '"'); 475 if (r == NULL) { 476 return; 477 } 478 *r = 0; 479 if ((num = atoi(p)) > 0) { 480 curr_linenum = num; 481 (void) strcpy(curr_file, q); 482 } 483 } 484 } 485 } /* extract_filename_linenumber */ 486 487 /* 488 * Handler for MACRO line which starts with #. 489 */ 490 void 491 handle_macro_line(void) 492 { 493 #ifdef DEBUG 494 (void) printf("Macro line=<%s>\n", yytext); 495 #endif 496 if (cflg == TRUE) 497 lstrcat(curr_line, yytext); 498 499 if (in_quote == TRUE) { 500 lstrcat(qstring_buf, yytext); 501 } else if (in_comment == FALSE) { 502 extract_filename_linenumber(yytext); 503 } 504 505 curr_linenum--; 506 handle_newline(); 507 } /* handle_macro_line */ 508 509 /* 510 * Handler for C++ comments which starts with //. 511 */ 512 void 513 handle_cplus_comment_line(void) 514 { 515 if (cflg == TRUE) 516 lstrcat(curr_line, yytext); 517 518 if (in_quote == TRUE) { 519 lstrcat(qstring_buf, yytext); 520 } else if ((in_comment == FALSE) && 521 (in_skippable_string == FALSE)) { 522 523 /* 524 * If already in c comments, don't do anything. 525 * Set both flags to TRUE here. 526 * Both flags will be set to FALSE when newline 527 * encounters. 528 */ 529 in_cplus_comment = TRUE; 530 in_comment = TRUE; 531 } 532 } /* handle_cplus_comment_line */ 533 534 /* 535 * Handler for the comment start (slash asterisk) in input file. 536 */ 537 void 538 handle_open_comment(void) 539 { 540 if (cflg == TRUE) 541 lstrcat(curr_line, yytext); 542 543 if (in_quote == TRUE) { 544 lstrcat(qstring_buf, yytext); 545 } else if ((in_comment == FALSE) && 546 (in_skippable_string == FALSE)) { 547 548 in_comment = TRUE; 549 is_last_comment_line = FALSE; 550 /* 551 * If there is any comment extracted before accidently, 552 * clean it up and start the new comment again. 553 */ 554 free_strlist(commhead); 555 commhead = commtail = NULL; 556 } 557 } 558 559 /* 560 * Handler for the comment end (asterisk slash) in input file. 561 */ 562 void 563 handle_close_comment(void) 564 { 565 if (cflg == TRUE) 566 lstrcat(curr_line, yytext); 567 568 if (in_quote == TRUE) { 569 lstrcat(qstring_buf, yytext); 570 } else if (in_skippable_string == FALSE) { 571 in_comment = FALSE; 572 is_last_comment_line = TRUE; 573 } 574 } 575 576 /* 577 * Handler for "gettext" in input file. 578 */ 579 void 580 handle_gettext(void) 581 { 582 /* 583 * If -t option is specified to extrct dcgettext, 584 * don't do anything for gettext(). 585 */ 586 if (tflg == TRUE) { 587 return; 588 } 589 590 num_nested_open_paren = 0; 591 592 if (cflg == TRUE) 593 lstrcat(curr_line, yytext); 594 595 if (in_quote == TRUE) { 596 lstrcat(qstring_buf, yytext); 597 } else if (in_comment == FALSE) { 598 in_gettext = TRUE; 599 linenum_saved = curr_linenum; 600 /* 601 * gettext will be put into default domain .po file 602 * curr_domain does not change for gettext. 603 */ 604 curr_domain[0] = '\0'; 605 } 606 } /* handle_gettext */ 607 608 /* 609 * Handler for "dgettext" in input file. 610 */ 611 void 612 handle_dgettext(void) 613 { 614 /* 615 * If -t option is specified to extrct dcgettext, 616 * don't do anything for dgettext(). 617 */ 618 if (tflg == TRUE) { 619 return; 620 } 621 622 num_nested_open_paren = 0; 623 624 if (cflg == TRUE) 625 lstrcat(curr_line, yytext); 626 627 if (in_quote == TRUE) { 628 lstrcat(qstring_buf, yytext); 629 } else if (in_comment == FALSE) { 630 in_dgettext = TRUE; 631 linenum_saved = curr_linenum; 632 /* 633 * dgettext will be put into domain file specified. 634 * curr_domain will follow. 635 */ 636 curr_domain[0] = '\0'; 637 } 638 } /* handle_dgettext */ 639 640 /* 641 * Handler for "dcgettext" in input file. 642 */ 643 void 644 handle_dcgettext(void) 645 { 646 /* 647 * dcgettext will be extracted only when -t flag is specified. 648 */ 649 if (tflg == FALSE) { 650 return; 651 } 652 653 num_nested_open_paren = 0; 654 655 is_first_comma_found = FALSE; 656 657 if (cflg == TRUE) 658 lstrcat(curr_line, yytext); 659 660 if (in_quote == TRUE) { 661 lstrcat(qstring_buf, yytext); 662 } else if (in_comment == FALSE) { 663 in_dcgettext = TRUE; 664 linenum_saved = curr_linenum; 665 /* 666 * dcgettext will be put into domain file specified. 667 * curr_domain will follow. 668 */ 669 curr_domain[0] = '\0'; 670 } 671 } /* handle_dcgettext */ 672 673 /* 674 * Handler for "textdomain" in input file. 675 */ 676 void 677 handle_textdomain(void) 678 { 679 if (cflg == TRUE) 680 lstrcat(curr_line, yytext); 681 682 if (in_quote == TRUE) { 683 lstrcat(qstring_buf, yytext); 684 } else if (in_comment == FALSE) { 685 in_textdomain = TRUE; 686 linenum_saved = curr_linenum; 687 curr_domain[0] = '\0'; 688 } 689 } /* handle_textdomain */ 690 691 /* 692 * Handler for '(' in input file. 693 */ 694 void 695 handle_open_paren(void) 696 { 697 if (cflg == TRUE) 698 lstrcat(curr_line, yytext); 699 700 if (in_quote == TRUE) { 701 lstrcat(qstring_buf, yytext); 702 } else if (in_comment == FALSE) { 703 if ((in_gettext == TRUE) || 704 (in_dgettext == TRUE) || 705 (in_dcgettext == TRUE) || 706 (in_textdomain == TRUE)) { 707 in_str = TRUE; 708 num_nested_open_paren++; 709 } 710 } 711 } /* handle_open_paren */ 712 713 /* 714 * Handler for ')' in input file. 715 */ 716 void 717 handle_close_paren(void) 718 { 719 if (cflg == TRUE) 720 lstrcat(curr_line, yytext); 721 722 if (in_quote == TRUE) { 723 lstrcat(qstring_buf, yytext); 724 } else if (in_comment == FALSE) { 725 if ((in_gettext == TRUE) || 726 (in_dgettext == TRUE) || 727 (in_dcgettext == TRUE) || 728 (in_textdomain == TRUE)) { 729 /* 730 * If this is not the matching close paren with 731 * the first open paren, no action is necessary. 732 */ 733 if (--num_nested_open_paren > 0) 734 return; 735 add_str_to_element_list(in_textdomain, curr_domain); 736 in_str = FALSE; 737 in_gettext = FALSE; 738 in_dgettext = FALSE; 739 in_dcgettext = FALSE; 740 in_textdomain = FALSE; 741 } else if (aflg == TRUE) { 742 end_ansi_string(); 743 } 744 } 745 } /* handle_close_paren */ 746 747 /* 748 * Handler for '\\n' in input file. 749 * 750 * This is a '\' followed by new line. 751 * This can be treated like a new line except when this is a continuation 752 * of a ANSI-C string. 753 * If this is a part of ANSI string, treat the current line as a double 754 * quoted string and the next line is the start of the double quoted 755 * string. 756 */ 757 void 758 handle_esc_newline(void) 759 { 760 if (cflg == TRUE) 761 lstrcat(curr_line, "\\"); 762 763 curr_linenum++; 764 765 if (in_quote == TRUE) { 766 add_qstring_to_str(); 767 } else if ((in_comment == TRUE) || 768 (is_last_comment_line == TRUE)) { 769 if (in_cplus_comment == FALSE) { 770 add_line_to_comment(); 771 } 772 } 773 774 curr_line[0] = '\0'; 775 } /* handle_esc_newline */ 776 777 /* 778 * Handler for '"' in input file. 779 */ 780 void 781 handle_quote(void) 782 { 783 if (cflg == TRUE) 784 lstrcat(curr_line, yytext); 785 786 if (in_comment == TRUE) { 787 /*EMPTY*/ 788 } else if ((in_gettext == TRUE) || 789 (in_dgettext == TRUE) || 790 (in_dcgettext == TRUE) || 791 (in_textdomain == TRUE)) { 792 if (in_str == TRUE) { 793 if (in_quote == FALSE) { 794 in_quote = TRUE; 795 } else { 796 add_qstring_to_str(); 797 in_quote = FALSE; 798 } 799 } 800 } else if (aflg == TRUE) { 801 /* 802 * The quote is found outside of gettext, dgetext, and 803 * textdomain. Everytime a quoted string is found, 804 * add it to the string list. 805 * in_str stays TRUE until ANSI string ends. 806 */ 807 if (in_str == TRUE) { 808 if (in_quote == TRUE) { 809 in_quote = FALSE; 810 add_qstring_to_str(); 811 } else { 812 in_quote = TRUE; 813 } 814 } else { 815 in_str = TRUE; 816 in_quote = TRUE; 817 linenum_saved = curr_linenum; 818 } 819 } else { 820 in_skippable_string = (in_skippable_string == TRUE) ? 821 FALSE : TRUE; 822 } 823 } /* handle_quote */ 824 825 /* 826 * Handler for ' ' or TAB in input file. 827 */ 828 void 829 handle_spaces(void) 830 { 831 if (cflg == TRUE) 832 lstrcat(curr_line, yytext); 833 834 if (in_quote == TRUE) { 835 lstrcat(qstring_buf, yytext); 836 } 837 } /* handle_spaces */ 838 839 /* 840 * Flattens a linked list containing ANSI string to the one string. 841 */ 842 static void 843 copy_strlist_to_str(char *str, struct strlist_st *strlist) 844 { 845 struct strlist_st *p; 846 847 str[0] = '\0'; 848 849 if (strlist != NULL) { 850 p = strlist; 851 while (p != NULL) { 852 if (p->str != NULL) { 853 lstrcat(str, p->str); 854 } 855 p = p->next; 856 } 857 } 858 } /* copy_strlist_to_str */ 859 860 /* 861 * Handler for ',' in input file. 862 */ 863 void 864 handle_comma(void) 865 { 866 if (cflg == TRUE) 867 lstrcat(curr_line, yytext); 868 869 if (in_quote == TRUE) { 870 lstrcat(qstring_buf, yytext); 871 } else if (in_comment == FALSE) { 872 if (in_str == TRUE) { 873 if (in_dgettext == TRUE) { 874 copy_strlist_to_str(curr_domain, strhead); 875 free_strlist(strhead); 876 strhead = strtail = NULL; 877 } else if (in_dcgettext == TRUE) { 878 /* 879 * Ignore the second comma. 880 */ 881 if (is_first_comma_found == FALSE) { 882 copy_strlist_to_str(curr_domain, 883 strhead); 884 free_strlist(strhead); 885 strhead = strtail = NULL; 886 is_first_comma_found = TRUE; 887 } 888 } else if (aflg == TRUE) { 889 end_ansi_string(); 890 } 891 } 892 } 893 } /* handle_comma */ 894 895 /* 896 * Handler for any other character that does not have special handler. 897 */ 898 void 899 handle_character(void) 900 { 901 if (cflg == TRUE) 902 lstrcat(curr_line, yytext); 903 904 if (in_quote == TRUE) { 905 lstrcat(qstring_buf, yytext); 906 } else if (in_comment == FALSE) { 907 if (in_str == TRUE) { 908 if (aflg == TRUE) { 909 end_ansi_string(); 910 } 911 } 912 } 913 } /* handle_character */ 914 915 /* 916 * Handler for new line in input file. 917 */ 918 void 919 handle_newline(void) 920 { 921 curr_linenum++; 922 923 /* 924 * in_quote is always FALSE here for ANSI-C code. 925 */ 926 if ((in_comment == TRUE) || 927 (is_last_comment_line == TRUE)) { 928 if (in_cplus_comment == TRUE) { 929 in_cplus_comment = FALSE; 930 in_comment = FALSE; 931 } else { 932 add_line_to_comment(); 933 } 934 } 935 936 curr_line[0] = '\0'; 937 /* 938 * C++ comment always ends with new line. 939 */ 940 } /* handle_newline */ 941 942 /* 943 * Process ANSI string. 944 */ 945 static void 946 end_ansi_string(void) 947 { 948 if ((aflg == TRUE) && 949 (in_str == TRUE) && 950 (in_gettext == FALSE) && 951 (in_dgettext == FALSE) && 952 (in_dcgettext == FALSE) && 953 (in_textdomain == FALSE)) { 954 add_str_to_element_list(FALSE, curr_domain); 955 in_str = FALSE; 956 } 957 } /* end_ansi_string */ 958 959 /* 960 * Initialize global variables if necessary. 961 */ 962 static void 963 initialize_globals(void) 964 { 965 default_domain = strdup(DEFAULT_DOMAIN); 966 curr_domain[0] = '\0'; 967 curr_file[0] = '\0'; 968 qstring_buf[0] = '\0'; 969 } /* initialize_globals() */ 970 971 /* 972 * Extract only string part when read a exclude file by removing 973 * keywords (e.g. msgid, msgstr, # ) and heading and trailing blanks and 974 * double quotes. 975 */ 976 static void 977 trim_line(char *line) 978 { 979 int i, p, len; 980 int first = 0; 981 int last = 0; 982 char c; 983 984 len = strlen(line); 985 986 /* 987 * Find the position of the last non-whitespace character. 988 */ 989 i = len - 1; 990 /*CONSTCOND*/ 991 while (1) { 992 c = line[i--]; 993 if ((c != ' ') && (c != '\n') && (c != '\t')) { 994 last = ++i; 995 break; 996 } 997 } 998 999 /* 1000 * Find the position of the first non-whitespace character 1001 * by skipping "msgid" initially. 1002 */ 1003 if (strncmp("msgid ", line, 6) == 0) { 1004 i = 5; 1005 } else if (strncmp("msgstr ", line, 7) == 0) { 1006 i = 6; 1007 } else if (strncmp("# ", line, 2) == 0) { 1008 i = 2; 1009 } else { 1010 i = 0; 1011 } 1012 1013 /*CONSTCOND*/ 1014 while (1) { 1015 c = line[i++]; 1016 if ((c != ' ') && (c != '\n') && (c != '\t')) { 1017 first = --i; 1018 break; 1019 } 1020 } 1021 1022 /* 1023 * For Backward compatibility, we consider both double quoted 1024 * string and non-quoted string. 1025 * The double quote is removed before being stored if exists. 1026 */ 1027 if (line[first] == '"') { 1028 first++; 1029 } 1030 if (line[last] == '"') { 1031 last--; 1032 } 1033 1034 /* 1035 * Now copy the valid part of the string. 1036 */ 1037 p = first; 1038 for (i = 0; i <= (last-first); i++) { 1039 line[i] = line[p++]; 1040 } 1041 line [i] = '\0'; 1042 } /* trim_line */ 1043 1044 /* 1045 * Read exclude file and stores it in the global linked list. 1046 */ 1047 static void 1048 read_exclude_file(void) 1049 { 1050 FILE *fp; 1051 struct exclude_st *tmp_excl; 1052 struct strlist_st *tail; 1053 int ignore_line; 1054 char line [MAX_STRING_LEN]; 1055 1056 if ((fp = fopen(exclude_file, "r")) == NULL) { 1057 (void) fprintf(stderr, "ERROR, can't open exclude file: %s\n", 1058 exclude_file); 1059 exit(2); 1060 } 1061 1062 ignore_line = TRUE; 1063 while (fgets(line, MAX_STRING_LEN, fp) != NULL) { 1064 /* 1065 * Line starting with # is a comment line and ignored. 1066 * Blank line is ignored, too. 1067 */ 1068 if ((line[0] == '\n') || (line[0] == '#')) { 1069 continue; 1070 } else if (strncmp(line, "msgstr", 6) == 0) { 1071 ignore_line = TRUE; 1072 } else if (strncmp(line, "domain", 6) == 0) { 1073 ignore_line = TRUE; 1074 } else if (strncmp(line, "msgid", 5) == 0) { 1075 ignore_line = FALSE; 1076 tmp_excl = new_exclude(); 1077 tmp_excl->exstr = new_strlist(); 1078 trim_line(line); 1079 tmp_excl->exstr->str = strdup(line); 1080 tail = tmp_excl->exstr; 1081 /* 1082 * Prepend new exclude string node to the list. 1083 */ 1084 tmp_excl->next = excl_head; 1085 excl_head = tmp_excl; 1086 } else { 1087 /* 1088 * If more than one line of string forms msgid, 1089 * append it to the string linked list. 1090 */ 1091 if (ignore_line == FALSE) { 1092 trim_line(line); 1093 tail->next = new_strlist(); 1094 tail->next->str = strdup(line); 1095 tail = tail->next; 1096 } 1097 } 1098 } /* while */ 1099 1100 #ifdef DEBUG 1101 tmp_excl = excl_head; 1102 while (tmp_excl != NULL) { 1103 printf("============================\n"); 1104 tail = tmp_excl->exstr; 1105 while (tail != NULL) { 1106 printf("%s###\n", tail->str); 1107 tail = tail->next; 1108 } 1109 tmp_excl = tmp_excl->next; 1110 } 1111 #endif 1112 } /* read_exclude_file */ 1113 1114 /* 1115 * Get next character from the string list containing ANSI style string. 1116 * This function returns three valus. (p, *m, *c). 1117 * p is returned by return value and, *m and *c are returned by changing 1118 * values in the location pointed. 1119 * 1120 * p : points node in the linked list for ANSI string. 1121 * Each node contains double quoted string. 1122 * m : The location of the next characters in the double quoted string 1123 * as integer index in the string. 1124 * When it gets to end of quoted string, the next node will be 1125 * read and m starts as zero for every new node. 1126 * c : Stores the value of the characterto be returned. 1127 */ 1128 static struct strlist_st * 1129 get_next_ch(struct strlist_st *p, int *m, char *c) 1130 { 1131 char ch, oct, hex; 1132 int value, i; 1133 1134 /* 1135 * From the string list, find non-null string first. 1136 */ 1137 1138 /*CONSTCOND*/ 1139 while (1) { 1140 if (p == NULL) { 1141 break; 1142 } else if (p->str == NULL) { 1143 p = p->next; 1144 } else if (p->str[*m] == '\0') { 1145 p = p->next; 1146 *m = 0; 1147 } else { 1148 break; 1149 } 1150 } 1151 1152 /* 1153 * No more character is available. 1154 */ 1155 if (p == NULL) { 1156 *c = 0; 1157 return (NULL); 1158 } 1159 1160 /* 1161 * Check if the character back slash. 1162 * If yes, ANSI defined escape sequence rule is used. 1163 */ 1164 if (p->str[*m] != '\\') { 1165 *c = p->str[*m]; 1166 *m = *m + 1; 1167 return (p); 1168 } else { 1169 /* 1170 * Get next character after '\'. 1171 */ 1172 *m = *m + 1; 1173 ch = p->str[*m]; 1174 switch (ch) { 1175 case 'a': 1176 *c = '\a'; 1177 break; 1178 case 'b': 1179 *c = '\b'; 1180 break; 1181 case 'f': 1182 *c = '\f'; 1183 break; 1184 case 'n': 1185 *c = '\n'; 1186 break; 1187 case 'r': 1188 *c = '\r'; 1189 break; 1190 case 't': 1191 *c = '\t'; 1192 break; 1193 case 'v': 1194 *c = '\v'; 1195 break; 1196 case '0': 1197 case '1': 1198 case '2': 1199 case '3': 1200 case '4': 1201 case '5': 1202 case '6': 1203 case '7': 1204 /* 1205 * Get maximum of three octal digits. 1206 */ 1207 value = ch; 1208 for (i = 0; i < 2; i++) { 1209 *m = *m + 1; 1210 oct = p->str[*m]; 1211 if ((oct >= '0') && (oct <= '7')) { 1212 value = value * 8 + (oct - '0'); 1213 } else { 1214 *m = *m - 1; 1215 break; 1216 } 1217 } 1218 *c = value; 1219 #ifdef DEBUG 1220 /* (void) fprintf(stderr, "octal=%d\n", value); */ 1221 #endif 1222 break; 1223 case 'x': 1224 value = 0; 1225 /* 1226 * Remove all heading zeros first and 1227 * get one or two valuid hexadecimal charaters. 1228 */ 1229 *m = *m + 1; 1230 while (p->str[*m] == '0') { 1231 *m = *m + 1; 1232 } 1233 value = 0; 1234 for (i = 0; i < 2; i++) { 1235 hex = p->str[*m]; 1236 *m = *m + 1; 1237 if (isdigit(hex)) { 1238 value = value * 16 + (hex - '0'); 1239 } else if (isxdigit(hex)) { 1240 hex = tolower(hex); 1241 value = value * 16 + (hex - 'a' + 10); 1242 } else { 1243 *m = *m - 1; 1244 break; 1245 } 1246 } 1247 *c = value; 1248 #ifdef DEBUG 1249 (void) fprintf(stderr, "hex=%d\n", value); 1250 #endif 1251 *m = *m - 1; 1252 break; 1253 default : 1254 /* 1255 * Undefined by ANSI. 1256 * Just ignore "\". 1257 */ 1258 *c = p->str[*m]; 1259 break; 1260 } 1261 /* 1262 * Advance pointer to point the next character to be parsed. 1263 */ 1264 *m = *m + 1; 1265 return (p); 1266 } 1267 } /* get_next_ch */ 1268 1269 /* 1270 * Compares two msgids. 1271 * Comparison is done by values, not by characters represented. 1272 * For example, '\t', '\011' and '0x9' are identical values. 1273 * Return values are same as in strcmp. 1274 * 1 if msgid1 > msgid2 1275 * 0 if msgid1 = msgid2 1276 * -1 if msgid1 < msgid2 1277 */ 1278 static int 1279 msgidcmp(struct strlist_st *id1, struct strlist_st *id2) 1280 { 1281 char c1, c2; 1282 int m1, m2; 1283 1284 m1 = 0; 1285 m2 = 0; 1286 1287 /*CONSTCOND*/ 1288 while (1) { 1289 id1 = get_next_ch(id1, &m1, &c1); 1290 id2 = get_next_ch(id2, &m2, &c2); 1291 1292 if ((c1 == 0) && (c2 == 0)) { 1293 return (0); 1294 } 1295 1296 if (c1 > c2) { 1297 return (1); 1298 } else if (c1 < c2) { 1299 return (-1); 1300 } 1301 } 1302 /*NOTREACHED*/ 1303 } /* msgidcmp */ 1304 1305 /* 1306 * Check if a ANSI string (which is a linked list itself) is a duplicate 1307 * of any string in the list of ANSI string. 1308 */ 1309 static int 1310 isduplicate(struct element_st *list, struct strlist_st *str) 1311 { 1312 struct element_st *p; 1313 1314 if (list == NULL) { 1315 return (FALSE); 1316 } 1317 1318 p = list; 1319 while (p != NULL) { 1320 if (p->msgid != NULL) { 1321 if (msgidcmp(p->msgid, str) == 0) { 1322 return (TRUE); 1323 } 1324 } 1325 p = p->next; 1326 } 1327 1328 return (FALSE); 1329 } /* isduplicate */ 1330 1331 /* 1332 * Extract a comment line and add to the linked list containing 1333 * comment block. 1334 * Each comment line is stored in the node. 1335 */ 1336 static void 1337 add_line_to_comment(void) 1338 { 1339 struct strlist_st *tmp_str; 1340 1341 tmp_str = new_strlist(); 1342 tmp_str->str = strdup(curr_line); 1343 tmp_str->next = NULL; 1344 1345 if (commhead == NULL) { 1346 /* Empty comment list */ 1347 commhead = tmp_str; 1348 commtail = tmp_str; 1349 } else { 1350 /* append it to the list */ 1351 commtail->next = tmp_str; 1352 commtail = commtail->next; 1353 } 1354 1355 is_last_comment_line = FALSE; 1356 } /* add_line_to_comment */ 1357 1358 /* 1359 * Add a double quoted string to the linked list containing ANSI string. 1360 */ 1361 static void 1362 add_qstring_to_str(void) 1363 { 1364 struct strlist_st *tmp_str; 1365 1366 tmp_str = new_strlist(); 1367 tmp_str->str = strdup(qstring_buf); 1368 tmp_str->next = NULL; 1369 1370 if (strhead == NULL) { 1371 /* Null ANSI string */ 1372 strhead = tmp_str; 1373 strtail = tmp_str; 1374 } else { 1375 /* Append it to the ANSI string linked list */ 1376 strtail->next = tmp_str; 1377 strtail = strtail->next; 1378 } 1379 1380 qstring_buf[0] = '\0'; 1381 } /* add_qstring_to_str */ 1382 1383 /* 1384 * Finds the head of domain nodes given domain name. 1385 */ 1386 static struct domain_st * 1387 find_domain_node(char *dname) 1388 { 1389 struct domain_st *tmp_dom, *p; 1390 1391 /* 1392 * If -a option is specified everything will be written to the 1393 * default domain file. 1394 */ 1395 if (aflg == TRUE) { 1396 if (def_dom == NULL) { 1397 def_dom = new_domain(); 1398 } 1399 return (def_dom); 1400 } 1401 1402 if ((dname == NULL) || 1403 (dname[0] == '\0') || 1404 (strcmp(dname, default_domain) == 0)) { 1405 if (def_dom == NULL) { 1406 def_dom = new_domain(); 1407 } 1408 if (strcmp(dname, default_domain) == 0) { 1409 (void) fprintf(stderr, "%s \"%s\" is used in dgettext " 1410 "of file:%s line:%d.\n", 1411 "Warning: default domain name", 1412 default_domain, curr_file, curr_linenum); 1413 } 1414 return (def_dom); 1415 } else { 1416 p = dom_head; 1417 while (p != NULL) { 1418 if (strcmp(p->dname, dname) == 0) { 1419 return (p); 1420 } 1421 p = p->next; 1422 } 1423 1424 tmp_dom = new_domain(); 1425 tmp_dom->dname = strdup(dname); 1426 1427 if (dom_head == NULL) { 1428 dom_head = tmp_dom; 1429 dom_tail = tmp_dom; 1430 } else { 1431 dom_tail->next = tmp_dom; 1432 dom_tail = dom_tail->next; 1433 } 1434 return (tmp_dom); 1435 } 1436 } /* find_domain_node */ 1437 1438 /* 1439 * Frees the ANSI string linked list. 1440 */ 1441 static void 1442 free_strlist(struct strlist_st *ptr) 1443 { 1444 struct strlist_st *p; 1445 1446 p = ptr; 1447 ptr = NULL; 1448 while (p != NULL) { 1449 ptr = p->next; 1450 free(p->str); 1451 free(p); 1452 p = ptr; 1453 } 1454 } /* free_strlist */ 1455 1456 /* 1457 * Finds if a ANSI string is contained in the exclude file. 1458 */ 1459 static int 1460 isexcluded(struct strlist_st *strlist) 1461 { 1462 struct exclude_st *p; 1463 1464 p = excl_head; 1465 while (p != NULL) { 1466 if (msgidcmp(p->exstr, strlist) == 0) { 1467 return (TRUE); 1468 } 1469 p = p->next; 1470 } 1471 return (FALSE); 1472 } /* isexcluded */ 1473 1474 /* 1475 * Finds if a comment block is to be extracted. 1476 * 1477 * When -c option is specified, find out if comment block contains 1478 * comment-tag as a token separated by blanks. If it does, this 1479 * comment block is associated with the next msgid encountered. 1480 * Comment block is a linked list where each node contains one line 1481 * of comments. 1482 */ 1483 static int 1484 isextracted(struct strlist_st *strlist) 1485 { 1486 struct strlist_st *p; 1487 char *first, *pc; 1488 1489 1490 p = strlist; 1491 while (p != NULL) { 1492 first = strdup(p->str); 1493 while ((first != NULL) && (first[0] != '\0')) { 1494 pc = first; 1495 1496 /*CONSTCOND*/ 1497 while (1) { 1498 if (*pc == '\0') { 1499 break; 1500 } else if ((*pc == ' ') || (*pc == '\t')) { 1501 *pc++ = '\0'; 1502 break; 1503 } 1504 pc++; 1505 } 1506 if (strcmp(first, comment_tag) == 0) { 1507 return (TRUE); 1508 } 1509 first = pc; 1510 } 1511 p = p->next; 1512 } /* while */ 1513 1514 /* 1515 * Not found. 1516 */ 1517 return (FALSE); 1518 } /* isextracted */ 1519 1520 /* 1521 * Adds ANSI string to the domain element list. 1522 */ 1523 static void 1524 add_str_to_element_list(int istextdomain, char *domain_list) 1525 { 1526 struct element_st *tmp_elem; 1527 struct element_st *p, *q; 1528 struct domain_st *tmp_dom; 1529 int result; 1530 1531 /* 1532 * This can happen if something like gettext(USAGE) is used 1533 * and it is impossible to get msgid for this gettext. 1534 * Since -x option should be used in this kind of cases, 1535 * it is OK not to catch msgid. 1536 */ 1537 if (strhead == NULL) { 1538 return; 1539 } 1540 1541 /* 1542 * The global variable curr_domain contains either NULL 1543 * for default_domain or domain name for dgettext(). 1544 */ 1545 tmp_dom = find_domain_node(domain_list); 1546 1547 /* 1548 * If this msgid is in the exclude file, 1549 * then free the linked list and return. 1550 */ 1551 if ((istextdomain == FALSE) && 1552 (isexcluded(strhead) == TRUE)) { 1553 free_strlist(strhead); 1554 strhead = strtail = NULL; 1555 return; 1556 } 1557 1558 tmp_elem = new_element(); 1559 tmp_elem->msgid = strhead; 1560 tmp_elem->istextdomain = istextdomain; 1561 /* 1562 * If -c option is specified and TAG matches, 1563 * then associate the comment to the next [d]gettext() calls 1564 * encountered in the source code. 1565 * textdomain() calls will not have any effect. 1566 */ 1567 if (istextdomain == FALSE) { 1568 if ((cflg == TRUE) && (commhead != NULL)) { 1569 if (isextracted(commhead) == TRUE) { 1570 tmp_elem->comment = commhead; 1571 } else { 1572 free_strlist(commhead); 1573 } 1574 commhead = commtail = NULL; 1575 } 1576 } 1577 1578 tmp_elem->linenum = linenum_saved; 1579 tmp_elem->fname = strdup(curr_file); 1580 1581 1582 if (sflg == TRUE) { 1583 /* 1584 * If this is textdomain() call and -s option is specified, 1585 * append this node to the textdomain linked list. 1586 */ 1587 if (istextdomain == TRUE) { 1588 if (tmp_dom->textdomain_head == NULL) { 1589 tmp_dom->textdomain_head = tmp_elem; 1590 tmp_dom->textdomain_tail = tmp_elem; 1591 } else { 1592 tmp_dom->textdomain_tail->next = tmp_elem; 1593 tmp_dom->textdomain_tail = tmp_elem; 1594 } 1595 strhead = strtail = NULL; 1596 return; 1597 } 1598 1599 /* 1600 * Insert the node to the properly sorted position. 1601 */ 1602 q = NULL; 1603 p = tmp_dom->gettext_head; 1604 while (p != NULL) { 1605 result = msgidcmp(strhead, p->msgid); 1606 if (result == 0) { 1607 /* 1608 * Duplicate id. Do not store. 1609 */ 1610 free_strlist(strhead); 1611 strhead = strtail = NULL; 1612 return; 1613 } else if (result > 0) { 1614 /* move to the next node */ 1615 q = p; 1616 p = p->next; 1617 } else { 1618 tmp_elem->next = p; 1619 if (q != NULL) { 1620 q->next = tmp_elem; 1621 } else { 1622 tmp_dom->gettext_head = tmp_elem; 1623 } 1624 strhead = strtail = NULL; 1625 return; 1626 } 1627 } /* while */ 1628 1629 /* 1630 * New msgid is the largest or empty list. 1631 */ 1632 if (q != NULL) { 1633 /* largest case */ 1634 q->next = tmp_elem; 1635 } else { 1636 /* empty list */ 1637 tmp_dom->gettext_head = tmp_elem; 1638 } 1639 } else { 1640 /* 1641 * Check if this msgid is already in the same domain. 1642 */ 1643 if (tmp_dom != NULL) { 1644 if (isduplicate(tmp_dom->gettext_head, 1645 tmp_elem->msgid) == TRUE) { 1646 tmp_elem->isduplicate = TRUE; 1647 } 1648 } 1649 /* 1650 * If -s option is not specified, then everything 1651 * is stored in gettext linked list. 1652 */ 1653 if (tmp_dom->gettext_head == NULL) { 1654 tmp_dom->gettext_head = tmp_elem; 1655 tmp_dom->gettext_tail = tmp_elem; 1656 } else { 1657 tmp_dom->gettext_tail->next = tmp_elem; 1658 tmp_dom->gettext_tail = tmp_elem; 1659 } 1660 } 1661 1662 strhead = strtail = NULL; 1663 } /* add_str_to_element_list */ 1664 1665 /* 1666 * Write all domain linked list to the files. 1667 */ 1668 static void 1669 write_all_files(void) 1670 { 1671 struct domain_st *tmp; 1672 1673 /* 1674 * Write out default domain file. 1675 */ 1676 write_one_file(def_dom); 1677 1678 /* 1679 * If dgettext() exists and -a option is not used, 1680 * then there are non-empty linked list. 1681 */ 1682 tmp = dom_head; 1683 while (tmp != NULL) { 1684 write_one_file(tmp); 1685 tmp = tmp->next; 1686 } 1687 } /* write_all_files */ 1688 1689 /* 1690 * add an element_st list to the linked list. 1691 */ 1692 static void 1693 add_node_to_polist(struct element_st **pohead, 1694 struct element_st **potail, struct element_st *elem) 1695 { 1696 if (elem == NULL) { 1697 return; 1698 } 1699 1700 if (*pohead == NULL) { 1701 *pohead = *potail = elem; 1702 } else { 1703 (*potail)->next = elem; 1704 *potail = (*potail)->next; 1705 } 1706 } /* add_node_to_polist */ 1707 1708 #define INIT_STATE 0 1709 #define IN_MSGID 1 1710 #define IN_MSGSTR 2 1711 #define IN_COMMENT 3 1712 /* 1713 * Reads existing po file into the linked list and returns the head 1714 * of the linked list. 1715 */ 1716 static struct element_st * 1717 read_po(char *fname) 1718 { 1719 struct element_st *tmp_elem = NULL; 1720 struct element_st *ehead = NULL, *etail = NULL; 1721 struct strlist_st *comment_tail = NULL; 1722 struct strlist_st *msgid_tail = NULL; 1723 struct strlist_st *msgstr_tail = NULL; 1724 int state = INIT_STATE; 1725 char line [MAX_STRING_LEN]; 1726 FILE *fp; 1727 1728 if ((fp = fopen(fname, "r")) == NULL) { 1729 return (NULL); 1730 } 1731 1732 while (fgets(line, MAX_STRING_LEN, fp) != NULL) { 1733 /* 1734 * Line starting with # is a comment line and ignored. 1735 * Blank line is ignored, too. 1736 */ 1737 if (line[0] == '\n') { 1738 continue; 1739 } else if (line[0] == '#') { 1740 /* 1741 * If tmp_elem is not NULL, there is msgid pair 1742 * stored. Therefore, add it. 1743 */ 1744 if ((tmp_elem != NULL) && (state == IN_MSGSTR)) { 1745 add_node_to_polist(&ehead, &etail, tmp_elem); 1746 } 1747 1748 if ((state == INIT_STATE) || (state == IN_MSGSTR)) { 1749 state = IN_COMMENT; 1750 tmp_elem = new_element(); 1751 tmp_elem->comment = comment_tail = 1752 new_strlist(); 1753 /* 1754 * remove new line and skip "# " 1755 * in the beginning of the existing 1756 * comment line. 1757 */ 1758 line[strlen(line)-1] = 0; 1759 comment_tail->str = strdup(line+2); 1760 } else if (state == IN_COMMENT) { 1761 comment_tail->next = new_strlist(); 1762 comment_tail = comment_tail->next; 1763 /* 1764 * remove new line and skip "# " 1765 * in the beginning of the existing 1766 * comment line. 1767 */ 1768 line[strlen(line)-1] = 0; 1769 comment_tail->str = strdup(line+2); 1770 } 1771 1772 } else if (strncmp(line, "domain", 6) == 0) { 1773 /* ignore domain line */ 1774 continue; 1775 } else if (strncmp(line, "msgid", 5) == 0) { 1776 if (state == IN_MSGSTR) { 1777 add_node_to_polist(&ehead, &etail, tmp_elem); 1778 tmp_elem = new_element(); 1779 } else if (state == INIT_STATE) { 1780 tmp_elem = new_element(); 1781 } 1782 1783 state = IN_MSGID; 1784 trim_line(line); 1785 tmp_elem->msgid = msgid_tail = new_strlist(); 1786 msgid_tail->str = strdup(line); 1787 1788 } else if (strncmp(line, "msgstr", 6) == 0) { 1789 state = IN_MSGSTR; 1790 trim_line(line); 1791 tmp_elem->msgstr = msgstr_tail = new_strlist(); 1792 msgstr_tail->str = strdup(line); 1793 } else { 1794 /* 1795 * If more than one line of string forms msgid, 1796 * append it to the string linked list. 1797 */ 1798 if (state == IN_MSGID) { 1799 trim_line(line); 1800 msgid_tail->next = new_strlist(); 1801 msgid_tail = msgid_tail->next; 1802 msgid_tail->str = strdup(line); 1803 } else if (state == IN_MSGSTR) { 1804 trim_line(line); 1805 msgstr_tail->next = new_strlist(); 1806 msgstr_tail = msgstr_tail->next; 1807 msgstr_tail->str = strdup(line); 1808 } 1809 } 1810 } /* while */ 1811 1812 /* 1813 * To insert the last msgid pair. 1814 */ 1815 if (tmp_elem != NULL) { 1816 add_node_to_polist(&ehead, &etail, tmp_elem); 1817 } 1818 1819 #ifdef DEBUG 1820 { 1821 struct domain_st *tmp_domain = new_domain(); 1822 char tmpstr[256]; 1823 1824 sprintf(tmpstr, "existing_po file : <%s>", fname); 1825 tmp_domain->dname = strdup(tmpstr); 1826 tmp_domain->gettext_head = ehead; 1827 printf("======= existing po file <%s> ========\n", fname); 1828 print_one_domain(tmp_domain); 1829 } 1830 #endif /* DEBUG */ 1831 1832 (void) fclose(fp); 1833 return (ehead); 1834 } /* read_po */ 1835 1836 /* 1837 * This function will append the second list to the first list. 1838 * If the msgid in the second list contains msgid in the first list, 1839 * it will be marked as duplicate. 1840 */ 1841 static struct element_st * 1842 append_list(struct element_st *l1, struct element_st *l2) 1843 { 1844 struct element_st *p = NULL, *q = NULL, *l1_tail = NULL; 1845 1846 if (l1 == NULL) 1847 return (l2); 1848 if (l2 == NULL) 1849 return (l1); 1850 1851 /* 1852 * in this while loop, just mark isduplicate field of node in the 1853 * l2 list if the same msgid exists in l1 list. 1854 */ 1855 p = l2; 1856 while (p != NULL) { 1857 q = l1; 1858 while (q != NULL) { 1859 if (msgidcmp(p->msgid, q->msgid) == 0) { 1860 p->isduplicate = TRUE; 1861 break; 1862 } 1863 q = q->next; 1864 } 1865 p = p->next; 1866 } 1867 1868 /* Now connect two linked lists. */ 1869 l1_tail = l1; 1870 while (l1_tail->next != NULL) { 1871 if (l1->next == NULL) 1872 break; 1873 l1_tail = l1_tail-> next; 1874 } 1875 l1_tail->next = l2; 1876 1877 return (l1); 1878 } /* append_list */ 1879 1880 /* 1881 * Writes one domain list to the file. 1882 */ 1883 static void 1884 write_one_file(struct domain_st *head) 1885 { 1886 FILE *fp; 1887 char fname [MAX_PATH_LEN]; 1888 char dname [MAX_DOMAIN_LEN]; 1889 struct element_st *p; 1890 struct element_st *existing_po_list; 1891 1892 /* 1893 * If head is NULL, then it still has to create .po file 1894 * so that it will guarantee that the previous .po file was 1895 * alwasys deleted. 1896 * This is why checking NULL pointer has been moved to after 1897 * creating .po file. 1898 */ 1899 1900 /* 1901 * If domain name is NULL, it is the default domain list. 1902 * The domain name is either "messages" or specified by option -d. 1903 * The default domain name is contained in default_domain variable. 1904 */ 1905 dname[0] = '\0'; 1906 if ((head != NULL) && 1907 (head->dname != NULL)) { 1908 (void) strcpy(dname, head->dname); 1909 } else { 1910 (void) strcpy(dname, default_domain); 1911 } 1912 1913 /* 1914 * path is the current directory if not specified by option -p. 1915 */ 1916 fname[0] = 0; 1917 if (pflg == TRUE) { 1918 (void) strcat(fname, pathname); 1919 (void) strcat(fname, "/"); 1920 } 1921 (void) strcat(fname, dname); 1922 (void) strcat(fname, ".po"); 1923 1924 /* 1925 * If -j flag is specified, read exsiting .po file and 1926 * append the current list to the end of the list read from 1927 * the existing .po file. 1928 */ 1929 if (jflg == TRUE) { 1930 /* 1931 * If head is NULL, we don't have to change existing file. 1932 * Therefore, just return it. 1933 */ 1934 if (head == NULL) { 1935 return; 1936 } 1937 existing_po_list = read_po(fname); 1938 head->gettext_head = append_list(existing_po_list, 1939 head->gettext_head); 1940 #ifdef DEBUG 1941 if (head->dname != NULL) { 1942 printf("===after merge (-j option): <%s>===\n", 1943 head->dname); 1944 } else { 1945 printf("===after merge (-j option): <NULL>===\n"); 1946 } 1947 print_one_domain(head); 1948 #endif 1949 1950 } /* if jflg */ 1951 1952 if ((fp = fopen(fname, "w")) == NULL) { 1953 (void) fprintf(stderr, 1954 "ERROR, can't open output file: %s\n", fname); 1955 exit(2); 1956 } 1957 1958 (void) fprintf(fp, "domain \"%s\"\n", dname); 1959 1960 /* See comments above in the beginning of this function */ 1961 if (head == NULL) 1962 return; 1963 1964 /* 1965 * There are separate storage for textdomain() calls if 1966 * -s option is used (textdomain_head linked list). 1967 * Otherwise, textdomain() is mixed with gettext(0 and dgettext(). 1968 * If mixed, the boolean varaible istextdomain is used to see 1969 * if the current node contains textdomain() or [d]gettext(). 1970 */ 1971 if (sflg == TRUE) { 1972 p = head->textdomain_head; 1973 while (p != NULL) { 1974 /* 1975 * textdomain output line already contains 1976 * FIle name and line number information. 1977 * Therefore, does not have to check for nflg. 1978 */ 1979 output_textdomain(fp, p); 1980 p = p->next; 1981 } 1982 } 1983 1984 p = head->gettext_head; 1985 while (p != NULL) { 1986 1987 /* 1988 * Comment is printed only if -c is used and 1989 * associated with gettext or dgettext. 1990 * textdomain is not associated with comments. 1991 * Changes: 1992 * comments should be extracted in case of -j option 1993 * because there are read from exising file. 1994 */ 1995 if (((cflg == TRUE) || (jflg == TRUE)) && 1996 (p->istextdomain != TRUE)) { 1997 output_comment(fp, p->comment); 1998 } 1999 2000 /* 2001 * If -n is used, then file number and line number 2002 * information is printed. 2003 * In case of textdomain(), this information is redundant 2004 * and is not printed. 2005 * If linenum is 0, it means this information has been 2006 * read from existing po file and it already contains 2007 * file and line number info as a comment line. So, it 2008 * should not printed in such case. 2009 */ 2010 if ((nflg == TRUE) && (p->istextdomain == FALSE) && 2011 (p->linenum > 0)) { 2012 (void) fprintf(fp, "# File:%s, line:%d\n", 2013 p->fname, p->linenum); 2014 } 2015 2016 /* 2017 * Depending on the type of node, output textdomain comment 2018 * or msgid. 2019 */ 2020 if ((sflg == FALSE) && 2021 (p->istextdomain == TRUE)) { 2022 output_textdomain(fp, p); 2023 } else { 2024 output_msgid(fp, p->msgid, p->isduplicate); 2025 } 2026 p = p->next; 2027 2028 } /* while */ 2029 2030 (void) fclose(fp); 2031 } /* write_one_file */ 2032 2033 /* 2034 * Prints out textdomain call as a comment line with file name and 2035 * the line number information. 2036 */ 2037 static void 2038 output_textdomain(FILE *fp, struct element_st *p) 2039 { 2040 2041 if (p == NULL) 2042 return; 2043 2044 /* 2045 * Write textdomain() line as a comment. 2046 */ 2047 (void) fprintf(fp, "# File:%s, line:%d, textdomain(\"%s\");\n", 2048 p->fname, p->linenum, p->msgid->str); 2049 } /* output_textdomain */ 2050 2051 /* 2052 * Prints out comments from linked list. 2053 */ 2054 static void 2055 output_comment(FILE *fp, struct strlist_st *p) 2056 { 2057 if (p == NULL) 2058 return; 2059 2060 /* 2061 * Write comment section. 2062 */ 2063 while (p != NULL) { 2064 (void) fprintf(fp, "# %s\n", p->str); 2065 p = p->next; 2066 } 2067 } /* output_comment */ 2068 2069 /* 2070 * Prints out msgid along with msgstr. 2071 */ 2072 static void 2073 output_msgid(FILE *fp, struct strlist_st *p, int duplicate) 2074 { 2075 struct strlist_st *q; 2076 2077 if (p == NULL) 2078 return; 2079 2080 /* 2081 * Write msgid section. 2082 * If duplciate flag is ON, prepend "# " in front of every line 2083 * so that they are considered as comment lines in .po file. 2084 */ 2085 if (duplicate == TRUE) { 2086 (void) fprintf(fp, "# "); 2087 } 2088 (void) fprintf(fp, "msgid \"%s\"\n", p->str); 2089 q = p->next; 2090 while (q != NULL) { 2091 if (duplicate == TRUE) { 2092 (void) fprintf(fp, "# "); 2093 } 2094 (void) fprintf(fp, " \"%s\"\n", q->str); 2095 q = q->next; 2096 } 2097 2098 /* 2099 * Write msgstr section. 2100 * if -M option is specified, append <suffix> to msgid. 2101 * if -m option is specified, prepend <prefix> to msgid. 2102 */ 2103 if (duplicate == TRUE) { 2104 (void) fprintf(fp, "# "); 2105 } 2106 if ((mflg == TRUE) || (Mflg == TRUE)) { 2107 if (mflg == TRUE) { 2108 /* 2109 * If single line msgid, add suffix to the same line 2110 */ 2111 if ((Mflg == TRUE) && (p->next == NULL)) { 2112 /* -M and -m and single line case */ 2113 (void) fprintf(fp, "msgstr \"%s%s%s\"\n", 2114 prefix, p->str, suffix); 2115 } else { 2116 /* -M and -m and multi line case */ 2117 (void) fprintf(fp, "msgstr \"%s%s\"\n", 2118 prefix, p->str); 2119 } 2120 } else { 2121 if ((Mflg == TRUE) && (p->next == NULL)) { 2122 /* -M only with single line case */ 2123 (void) fprintf(fp, "msgstr \"%s%s\"\n", 2124 p->str, suffix); 2125 } else { 2126 /* -M only with multi line case */ 2127 (void) fprintf(fp, "msgstr \"%s\"\n", p->str); 2128 } 2129 } 2130 q = p->next; 2131 while (q != NULL) { 2132 if (duplicate == TRUE) { 2133 (void) fprintf(fp, "# "); 2134 } 2135 (void) fprintf(fp, " \"%s\"\n", q->str); 2136 q = q->next; 2137 } 2138 /* 2139 * If multi line msgid, add suffix after the last line. 2140 */ 2141 if ((Mflg == TRUE) && (p->next != NULL) && 2142 (suffix[0] != '\0')) { 2143 (void) fprintf(fp, " \"%s\"\n", suffix); 2144 } 2145 } else { 2146 (void) fprintf(fp, "msgstr\n"); 2147 } 2148 } /* output_msgid */ 2149 2150 /* 2151 * Malloc a new element node and initialize fields. 2152 */ 2153 static struct element_st * 2154 new_element(void) 2155 { 2156 struct element_st *tmp; 2157 2158 tmp = (struct element_st *)malloc(sizeof (struct element_st)); 2159 tmp->istextdomain = FALSE; 2160 tmp->isduplicate = FALSE; 2161 tmp->msgid = NULL; 2162 tmp->msgstr = NULL; 2163 tmp->comment = NULL; 2164 tmp->fname = NULL; 2165 tmp->linenum = 0; 2166 tmp->next = NULL; 2167 2168 return (tmp); 2169 } /* new_element */ 2170 2171 /* 2172 * Malloc a new domain node and initialize fields. 2173 */ 2174 static struct domain_st * 2175 new_domain(void) 2176 { 2177 struct domain_st *tmp; 2178 2179 tmp = (struct domain_st *)malloc(sizeof (struct domain_st)); 2180 tmp->dname = NULL; 2181 tmp->gettext_head = NULL; 2182 tmp->gettext_tail = NULL; 2183 tmp->textdomain_head = NULL; 2184 tmp->textdomain_tail = NULL; 2185 tmp->next = NULL; 2186 2187 return (tmp); 2188 } /* new_domain */ 2189 2190 /* 2191 * Malloc a new string list node and initialize fields. 2192 */ 2193 static struct strlist_st * 2194 new_strlist(void) 2195 { 2196 struct strlist_st *tmp; 2197 2198 tmp = (struct strlist_st *)malloc(sizeof (struct strlist_st)); 2199 tmp->str = NULL; 2200 tmp->next = NULL; 2201 2202 return (tmp); 2203 } /* new_strlist */ 2204 2205 /* 2206 * Malloc a new exclude string list node and initialize fields. 2207 */ 2208 static struct exclude_st * 2209 new_exclude(void) 2210 { 2211 struct exclude_st *tmp; 2212 2213 tmp = (struct exclude_st *)malloc(sizeof (struct exclude_st)); 2214 tmp->exstr = NULL; 2215 tmp->next = NULL; 2216 2217 return (tmp); 2218 } /* new_exclude */ 2219 2220 /* 2221 * Local version of strcat to keep within maximum string size. 2222 */ 2223 static void 2224 lstrcat(char *s1, const char *s2) 2225 { 2226 char *es1 = &s1[MAX_STRING_LEN]; 2227 char *ss1 = s1; 2228 2229 while (*s1++) 2230 ; 2231 --s1; 2232 while (*s1++ = *s2++) 2233 if (s1 >= es1) { 2234 s1[-1] = '\0'; 2235 if ((in_comment == TRUE || in_quote == TRUE) && 2236 (warn_linenum != curr_linenum)) { 2237 if (stdin_only == FALSE) { 2238 (void) fprintf(stderr, 2239 "WARNING: file %s line %d exceeds "\ 2240 "%d characters: \"%15.15s\"\n", 2241 curr_file, curr_linenum, 2242 MAX_STRING_LEN, ss1); 2243 } else { 2244 (void) fprintf(stderr, 2245 "WARNING: line %d exceeds "\ 2246 "%d characters: \"%15.15s\"\n", 2247 curr_linenum, MAX_STRING_LEN, ss1); 2248 } 2249 warn_linenum = curr_linenum; 2250 } 2251 break; 2252 } 2253 } /* lstrcat */ 2254 2255 #ifdef DEBUG 2256 /* 2257 * Debug print routine. Compiled only with DEBUG on. 2258 */ 2259 void 2260 print_element_list(struct element_st *q) 2261 { 2262 struct strlist_st *r; 2263 2264 while (q != NULL) { 2265 printf(" istextdomain = %d\n", q->istextdomain); 2266 printf(" isduplicate = %d\n", q->isduplicate); 2267 if ((q->msgid != NULL) && (q->msgid->str != NULL)) { 2268 printf(" msgid = <%s>\n", q->msgid->str); 2269 r = q->msgid->next; 2270 while (r != NULL) { 2271 printf(" <%s>\n", r->str); 2272 r = r->next; 2273 } 2274 } else { 2275 printf(" msgid = <NULL>\n"); 2276 } 2277 if ((q->msgstr != NULL) && (q->msgstr->str != NULL)) { 2278 printf(" msgstr= <%s>\n", q->msgstr->str); 2279 r = q->msgstr->next; 2280 while (r != NULL) { 2281 printf(" <%s>\n", r->str); 2282 r = r->next; 2283 } 2284 } else { 2285 printf(" msgstr= <NULL>\n"); 2286 } 2287 2288 if (q->comment == NULL) { 2289 printf(" comment = <NULL>\n"); 2290 } else { 2291 printf(" comment = <%s>\n", q->comment->str); 2292 r = q->comment->next; 2293 while (r != NULL) { 2294 printf(" <%s>\n", r->str); 2295 r = r->next; 2296 } 2297 } 2298 2299 if (q->fname == NULL) { 2300 printf(" fname = <NULL>\n"); 2301 } else { 2302 printf(" fname = <%s>\n", q->fname); 2303 } 2304 printf(" linenum = %d\n", q->linenum); 2305 printf("\n"); 2306 q = q->next; 2307 } 2308 } 2309 2310 /* 2311 * Debug print routine. Compiled only with DEBUG on. 2312 */ 2313 void 2314 print_one_domain(struct domain_st *p) 2315 { 2316 struct element_st *q; 2317 2318 if (p == NULL) { 2319 printf("domain pointer = <NULL>\n"); 2320 return; 2321 } else if (p->dname == NULL) { 2322 printf("domain_name = <%s>\n", "<NULL>"); 2323 } else { 2324 printf("domain_name = <%s>\n", p->dname); 2325 } 2326 q = p->gettext_head; 2327 print_element_list(q); 2328 2329 q = p->textdomain_head; 2330 print_element_list(q); 2331 } /* print_one_domain */ 2332 2333 void 2334 print_all_domain(struct domain_st *dom_list) 2335 { 2336 struct domain_st *p; 2337 struct element_st *q; 2338 2339 p = dom_list; 2340 while (p != NULL) { 2341 print_one_domain(p); 2342 p = p->next; 2343 } /* while */ 2344 } /* print_all_domain */ 2345 #endif 2346