1 %{ 2 /* 3 * Copyright 2010 Nexenta Systems, Inc. All rights reserved. 4 * Copyright 2015 John Marino <draco@marino.st> 5 * 6 * This source code is derived from the illumos localedef command, and 7 * provided under BSD-style license terms by Nexenta Systems, Inc. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 20 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 22 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 23 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29 * POSSIBILITY OF SUCH DAMAGE. 30 * 31 * $FreeBSD$ 32 */ 33 34 /* 35 * POSIX localedef grammar. 36 */ 37 38 #include <wchar.h> 39 #include <stdio.h> 40 #include <limits.h> 41 #include "localedef.h" 42 43 %} 44 %union { 45 int num; 46 wchar_t wc; 47 char *token; 48 collsym_t *collsym; 49 collelem_t *collelem; 50 } 51 52 %token T_CODE_SET 53 %token T_MB_CUR_MAX 54 %token T_MB_CUR_MIN 55 %token T_COM_CHAR 56 %token T_ESC_CHAR 57 %token T_LT 58 %token T_GT 59 %token T_NL 60 %token T_SEMI 61 %token T_COMMA 62 %token T_ELLIPSIS 63 %token T_RPAREN 64 %token T_LPAREN 65 %token T_QUOTE 66 %token T_NULL 67 %token T_WS 68 %token T_END 69 %token T_COPY 70 %token T_CHARMAP 71 %token T_WIDTH 72 %token T_CTYPE 73 %token T_ISUPPER 74 %token T_ISLOWER 75 %token T_ISALPHA 76 %token T_ISDIGIT 77 %token T_ISPUNCT 78 %token T_ISXDIGIT 79 %token T_ISSPACE 80 %token T_ISPRINT 81 %token T_ISGRAPH 82 %token T_ISBLANK 83 %token T_ISCNTRL 84 %token T_ISALNUM 85 %token T_ISSPECIAL 86 %token T_ISPHONOGRAM 87 %token T_ISIDEOGRAM 88 %token T_ISENGLISH 89 %token T_ISNUMBER 90 %token T_TOUPPER 91 %token T_TOLOWER 92 %token T_COLLATE 93 %token T_COLLATING_SYMBOL 94 %token T_COLLATING_ELEMENT 95 %token T_ORDER_START 96 %token T_ORDER_END 97 %token T_FORWARD 98 %token T_BACKWARD 99 %token T_POSITION 100 %token T_FROM 101 %token T_UNDEFINED 102 %token T_IGNORE 103 %token T_MESSAGES 104 %token T_YESSTR 105 %token T_NOSTR 106 %token T_YESEXPR 107 %token T_NOEXPR 108 %token T_MONETARY 109 %token T_INT_CURR_SYMBOL 110 %token T_CURRENCY_SYMBOL 111 %token T_MON_DECIMAL_POINT 112 %token T_MON_THOUSANDS_SEP 113 %token T_POSITIVE_SIGN 114 %token T_NEGATIVE_SIGN 115 %token T_MON_GROUPING 116 %token T_INT_FRAC_DIGITS 117 %token T_FRAC_DIGITS 118 %token T_P_CS_PRECEDES 119 %token T_P_SEP_BY_SPACE 120 %token T_N_CS_PRECEDES 121 %token T_N_SEP_BY_SPACE 122 %token T_P_SIGN_POSN 123 %token T_N_SIGN_POSN 124 %token T_INT_P_CS_PRECEDES 125 %token T_INT_N_CS_PRECEDES 126 %token T_INT_P_SEP_BY_SPACE 127 %token T_INT_N_SEP_BY_SPACE 128 %token T_INT_P_SIGN_POSN 129 %token T_INT_N_SIGN_POSN 130 %token T_NUMERIC 131 %token T_DECIMAL_POINT 132 %token T_THOUSANDS_SEP 133 %token T_GROUPING 134 %token T_TIME 135 %token T_ABDAY 136 %token T_DAY 137 %token T_ABMON 138 %token T_MON 139 %token T_ERA 140 %token T_ERA_D_FMT 141 %token T_ERA_T_FMT 142 %token T_ERA_D_T_FMT 143 %token T_ALT_DIGITS 144 %token T_D_T_FMT 145 %token T_D_FMT 146 %token T_T_FMT 147 %token T_AM_PM 148 %token T_T_FMT_AMPM 149 %token T_DATE_FMT 150 %token <wc> T_CHAR 151 %token <token> T_NAME 152 %token <num> T_NUMBER 153 %token <token> T_SYMBOL 154 %token <collsym> T_COLLSYM 155 %token <collelem> T_COLLELEM 156 157 %% 158 159 localedef : setting_list categories 160 | categories 161 ; 162 163 string : T_QUOTE charlist T_QUOTE 164 | T_QUOTE T_QUOTE 165 ; 166 167 charlist : charlist T_CHAR 168 { 169 add_wcs($2); 170 } 171 | T_CHAR 172 { 173 add_wcs($1); 174 } 175 ; 176 177 setting_list : setting_list setting 178 | setting 179 ; 180 181 182 setting : T_COM_CHAR T_CHAR T_NL 183 { 184 com_char = $2; 185 } 186 | T_ESC_CHAR T_CHAR T_NL 187 { 188 esc_char = $2; 189 } 190 | T_MB_CUR_MAX T_NUMBER T_NL 191 { 192 mb_cur_max = $2; 193 } 194 | T_MB_CUR_MIN T_NUMBER T_NL 195 { 196 mb_cur_min = $2; 197 } 198 | T_CODE_SET string T_NL 199 { 200 wchar_t *w = get_wcs(); 201 set_wide_encoding(to_mb_string(w)); 202 free(w); 203 } 204 | T_CODE_SET T_NAME T_NL 205 { 206 set_wide_encoding($2); 207 } 208 ; 209 210 copycat : T_COPY T_NAME T_NL 211 { 212 copy_category($2); 213 } 214 | T_COPY string T_NL 215 { 216 wchar_t *w = get_wcs(); 217 copy_category(to_mb_string(w)); 218 free(w); 219 } 220 ; 221 222 categories : categories category 223 | category 224 ; 225 226 227 category : charmap 228 | messages 229 | monetary 230 | ctype 231 | collate 232 | numeric 233 | time 234 ; 235 236 237 charmap : T_CHARMAP T_NL charmap_list T_END T_CHARMAP T_NL 238 | T_WIDTH T_NL width_list T_END T_WIDTH T_NL 239 ; 240 241 242 charmap_list : charmap_list charmap_entry 243 | charmap_entry 244 ; 245 246 247 charmap_entry : T_SYMBOL T_CHAR 248 { 249 add_charmap($1, $2); 250 scan_to_eol(); 251 } 252 | T_SYMBOL T_ELLIPSIS T_SYMBOL T_CHAR 253 { 254 add_charmap_range($1, $3, $4); 255 scan_to_eol(); 256 } 257 | T_NL 258 ; 259 260 width_list : width_list width_entry 261 | width_entry 262 ; 263 264 width_entry : T_CHAR T_NUMBER T_NL 265 { 266 add_width($1, $2); 267 } 268 | T_SYMBOL T_NUMBER T_NL 269 { 270 add_charmap_undefined($1); 271 } 272 | T_CHAR T_ELLIPSIS T_CHAR T_NUMBER T_NL 273 { 274 add_width_range($1, $3, $4); 275 } 276 | T_SYMBOL T_ELLIPSIS T_SYMBOL T_NUMBER T_NL 277 { 278 add_charmap_undefined($1); 279 add_charmap_undefined($3); 280 } 281 | T_CHAR T_ELLIPSIS T_SYMBOL T_NUMBER T_NL 282 { 283 add_width($1, $4); 284 add_charmap_undefined($3); 285 } 286 | T_SYMBOL T_ELLIPSIS T_CHAR T_NUMBER T_NL 287 { 288 add_width($3, $4); 289 add_charmap_undefined($1); 290 } 291 | T_NL 292 ; 293 294 ctype : T_CTYPE T_NL ctype_list T_END T_CTYPE T_NL 295 { 296 dump_ctype(); 297 } 298 | T_CTYPE T_NL copycat T_END T_CTYPE T_NL 299 ; 300 301 ctype_list : ctype_list ctype_kw 302 | ctype_kw 303 ; 304 305 ctype_kw : T_ISUPPER cc_list T_NL 306 | T_ISLOWER cc_list T_NL 307 | T_ISALPHA cc_list T_NL 308 | T_ISDIGIT cc_list T_NL 309 | T_ISPUNCT cc_list T_NL 310 | T_ISXDIGIT cc_list T_NL 311 | T_ISSPACE cc_list T_NL 312 | T_ISPRINT cc_list T_NL 313 | T_ISGRAPH cc_list T_NL 314 | T_ISBLANK cc_list T_NL 315 | T_ISCNTRL cc_list T_NL 316 | T_ISALNUM cc_list T_NL 317 | T_ISSPECIAL cc_list T_NL 318 | T_ISENGLISH cc_list T_NL 319 | T_ISNUMBER cc_list T_NL 320 | T_ISIDEOGRAM cc_list T_NL 321 | T_ISPHONOGRAM cc_list T_NL 322 | T_TOUPPER conv_list T_NL 323 | T_TOLOWER conv_list T_NL 324 ; 325 326 cc_list : cc_list T_SEMI cc_range_end 327 | cc_list T_SEMI cc_char 328 | cc_char 329 ; 330 331 cc_range_end : T_ELLIPSIS T_SEMI T_CHAR 332 { 333 add_ctype_range($3); 334 } 335 ; 336 337 cc_char : T_CHAR 338 { 339 add_ctype($1); 340 } 341 | T_SYMBOL 342 { 343 add_charmap_undefined($1); 344 } 345 ; 346 347 conv_list : conv_list T_SEMI conv_pair 348 | conv_pair 349 ; 350 351 352 conv_pair : T_LPAREN T_CHAR T_COMMA T_CHAR T_RPAREN 353 { 354 add_caseconv($2, $4); 355 } 356 | T_LPAREN T_SYMBOL T_COMMA T_CHAR T_RPAREN 357 { 358 add_charmap_undefined($2); 359 } 360 | T_LPAREN T_SYMBOL T_COMMA T_SYMBOL T_RPAREN 361 { 362 add_charmap_undefined($2); 363 add_charmap_undefined($4); 364 } 365 | T_LPAREN T_CHAR T_COMMA T_SYMBOL T_RPAREN 366 { 367 add_charmap_undefined($4); 368 } 369 ; 370 371 collate : T_COLLATE T_NL coll_order T_END T_COLLATE T_NL 372 { 373 dump_collate(); 374 } 375 | T_COLLATE T_NL coll_optional coll_order T_END T_COLLATE T_NL 376 { 377 dump_collate(); 378 } 379 | T_COLLATE T_NL copycat T_END T_COLLATE T_NL 380 ; 381 382 383 coll_optional : coll_optional coll_symbols 384 | coll_optional coll_elements 385 | coll_symbols 386 | coll_elements 387 ; 388 389 390 coll_symbols : T_COLLATING_SYMBOL T_SYMBOL T_NL 391 { 392 define_collsym($2); 393 } 394 ; 395 396 397 coll_elements : T_COLLATING_ELEMENT T_SYMBOL T_FROM string T_NL 398 { 399 define_collelem($2, get_wcs()); 400 } 401 ; 402 403 coll_order : T_ORDER_START T_NL order_list T_ORDER_END T_NL 404 { 405 /* If no order list supplied default to one forward */ 406 add_order_bit(T_FORWARD); 407 add_order_directive(); 408 } 409 | T_ORDER_START order_args T_NL order_list T_ORDER_END T_NL 410 ; 411 412 413 order_args : order_args T_SEMI order_arg 414 { 415 add_order_directive(); 416 } 417 | order_arg 418 { 419 add_order_directive(); 420 } 421 ; 422 423 order_arg : order_arg T_COMMA order_dir 424 | order_dir 425 ; 426 427 order_dir : T_FORWARD 428 { 429 add_order_bit(T_FORWARD); 430 } 431 | T_BACKWARD 432 { 433 add_order_bit(T_BACKWARD); 434 } 435 | T_POSITION 436 { 437 add_order_bit(T_POSITION); 438 } 439 ; 440 441 order_list : order_list order_item 442 | order_item 443 ; 444 445 order_item : T_COLLSYM T_NL 446 { 447 end_order_collsym($1); 448 } 449 | order_itemkw T_NL 450 { 451 end_order(); 452 } 453 | order_itemkw order_weights T_NL 454 { 455 end_order(); 456 } 457 ; 458 459 order_itemkw : T_CHAR 460 { 461 start_order_char($1); 462 } 463 | T_ELLIPSIS 464 { 465 start_order_ellipsis(); 466 } 467 | T_COLLELEM 468 { 469 start_order_collelem($1); 470 } 471 | T_UNDEFINED 472 { 473 start_order_undefined(); 474 } 475 | T_SYMBOL 476 { 477 start_order_symbol($1); 478 } 479 ; 480 481 order_weights : order_weights T_SEMI order_weight 482 | order_weights T_SEMI 483 | order_weight 484 ; 485 486 order_weight : T_COLLELEM 487 { 488 add_order_collelem($1); 489 } 490 | T_COLLSYM 491 { 492 add_order_collsym($1); 493 } 494 | T_CHAR 495 { 496 add_order_char($1); 497 } 498 | T_ELLIPSIS 499 { 500 add_order_ellipsis(); 501 } 502 | T_IGNORE 503 { 504 add_order_ignore(); 505 } 506 | T_SYMBOL 507 { 508 add_order_symbol($1); 509 } 510 | T_QUOTE order_str T_QUOTE 511 { 512 add_order_subst(); 513 } 514 ; 515 516 order_str : order_str order_stritem 517 | order_stritem 518 ; 519 520 order_stritem : T_CHAR 521 { 522 add_subst_char($1); 523 } 524 | T_COLLSYM 525 { 526 add_subst_collsym($1); 527 } 528 | T_COLLELEM 529 { 530 add_subst_collelem($1); 531 } 532 | T_SYMBOL 533 { 534 add_subst_symbol($1); 535 } 536 ; 537 538 messages : T_MESSAGES T_NL messages_list T_END T_MESSAGES T_NL 539 { 540 dump_messages(); 541 } 542 | T_MESSAGES T_NL copycat T_END T_MESSAGES T_NL 543 ; 544 545 messages_list : messages_list messages_item 546 | messages_item 547 ; 548 549 messages_kw : T_YESSTR 550 | T_NOSTR 551 | T_YESEXPR 552 | T_NOEXPR 553 ; 554 555 messages_item : messages_kw string T_NL 556 { 557 add_message(get_wcs()); 558 } 559 ; 560 561 monetary : T_MONETARY T_NL monetary_list T_END T_MONETARY T_NL 562 { 563 dump_monetary(); 564 } 565 | T_MONETARY T_NL copycat T_END T_MONETARY T_NL 566 ; 567 568 monetary_list : monetary_list monetary_kw 569 | monetary_kw 570 ; 571 572 monetary_strkw : T_INT_CURR_SYMBOL 573 | T_CURRENCY_SYMBOL 574 | T_MON_DECIMAL_POINT 575 | T_MON_THOUSANDS_SEP 576 | T_POSITIVE_SIGN 577 | T_NEGATIVE_SIGN 578 ; 579 580 monetary_numkw : T_INT_FRAC_DIGITS 581 | T_FRAC_DIGITS 582 | T_P_CS_PRECEDES 583 | T_P_SEP_BY_SPACE 584 | T_N_CS_PRECEDES 585 | T_N_SEP_BY_SPACE 586 | T_P_SIGN_POSN 587 | T_N_SIGN_POSN 588 | T_INT_P_CS_PRECEDES 589 | T_INT_N_CS_PRECEDES 590 | T_INT_P_SEP_BY_SPACE 591 | T_INT_N_SEP_BY_SPACE 592 | T_INT_P_SIGN_POSN 593 | T_INT_N_SIGN_POSN 594 ; 595 596 monetary_kw : monetary_strkw string T_NL 597 { 598 add_monetary_str(get_wcs()); 599 } 600 | monetary_numkw T_NUMBER T_NL 601 { 602 add_monetary_num($2); 603 } 604 | T_MON_GROUPING mon_group_list T_NL 605 ; 606 607 mon_group_list : T_NUMBER 608 { 609 reset_monetary_group(); 610 add_monetary_group($1); 611 } 612 | mon_group_list T_SEMI T_NUMBER 613 { 614 add_monetary_group($3); 615 } 616 ; 617 618 619 numeric : T_NUMERIC T_NL numeric_list T_END T_NUMERIC T_NL 620 { 621 dump_numeric(); 622 } 623 | T_NUMERIC T_NL copycat T_END T_NUMERIC T_NL 624 ; 625 626 627 numeric_list : numeric_list numeric_item 628 | numeric_item 629 ; 630 631 632 numeric_item : numeric_strkw string T_NL 633 { 634 add_numeric_str(get_wcs()); 635 } 636 | T_GROUPING group_list T_NL 637 ; 638 639 numeric_strkw : T_DECIMAL_POINT 640 | T_THOUSANDS_SEP 641 ; 642 643 644 group_list : T_NUMBER 645 { 646 reset_numeric_group(); 647 add_numeric_group($1); 648 } 649 | group_list T_SEMI T_NUMBER 650 { 651 add_numeric_group($3); 652 } 653 ; 654 655 656 time : T_TIME T_NL time_kwlist T_END T_TIME T_NL 657 { 658 dump_time(); 659 } 660 | T_TIME T_NL copycat T_END T_NUMERIC T_NL 661 ; 662 663 time_kwlist : time_kwlist time_kw 664 | time_kw 665 ; 666 667 time_kw : time_strkw string T_NL 668 { 669 add_time_str(get_wcs()); 670 } 671 | time_listkw time_list T_NL 672 { 673 check_time_list(); 674 } 675 ; 676 677 time_listkw : T_ABDAY 678 | T_DAY 679 | T_ABMON 680 | T_MON 681 | T_ERA 682 | T_ALT_DIGITS 683 | T_AM_PM 684 ; 685 686 time_strkw : T_ERA_D_T_FMT 687 | T_ERA_T_FMT 688 | T_ERA_D_FMT 689 | T_D_T_FMT 690 | T_D_FMT 691 | T_T_FMT 692 | T_T_FMT_AMPM 693 | T_DATE_FMT 694 ; 695 696 time_list : time_list T_SEMI string 697 { 698 add_time_list(get_wcs()); 699 } 700 | string 701 { 702 reset_time_list(); 703 add_time_list(get_wcs()); 704 } 705 ; 706