1 %{ 2 /* 3 * Copyright 2010 Nexenta Systems, Inc. All rights reserved. 4 * Copyright 2015 John Marino <draco@marino.st> 5 * 6 * This source code is derived from the illumos localedef command, and 7 * provided under BSD-style license terms by Nexenta Systems, Inc. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 20 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 22 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 23 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29 * POSSIBILITY OF SUCH DAMAGE. 30 */ 31 32 /* 33 * POSIX localedef grammar. 34 */ 35 36 #include <wchar.h> 37 #include <stdio.h> 38 #include <limits.h> 39 #include "localedef.h" 40 41 %} 42 %union { 43 int num; 44 wchar_t wc; 45 char *token; 46 collsym_t *collsym; 47 collelem_t *collelem; 48 } 49 50 %token T_CODE_SET 51 %token T_MB_CUR_MAX 52 %token T_MB_CUR_MIN 53 %token T_COM_CHAR 54 %token T_ESC_CHAR 55 %token T_LT 56 %token T_GT 57 %token T_NL 58 %token T_SEMI 59 %token T_COMMA 60 %token T_ELLIPSIS 61 %token T_RPAREN 62 %token T_LPAREN 63 %token T_QUOTE 64 %token T_NULL 65 %token T_WS 66 %token T_END 67 %token T_COPY 68 %token T_CHARMAP 69 %token T_WIDTH 70 %token T_CTYPE 71 %token T_ISUPPER 72 %token T_ISLOWER 73 %token T_ISALPHA 74 %token T_ISDIGIT 75 %token T_ISPUNCT 76 %token T_ISXDIGIT 77 %token T_ISSPACE 78 %token T_ISPRINT 79 %token T_ISGRAPH 80 %token T_ISBLANK 81 %token T_ISCNTRL 82 %token T_ISALNUM 83 %token T_ISSPECIAL 84 %token T_ISPHONOGRAM 85 %token T_ISIDEOGRAM 86 %token T_ISENGLISH 87 %token T_ISNUMBER 88 %token T_TOUPPER 89 %token T_TOLOWER 90 %token T_COLLATE 91 %token T_COLLATING_SYMBOL 92 %token T_COLLATING_ELEMENT 93 %token T_ORDER_START 94 %token T_ORDER_END 95 %token T_FORWARD 96 %token T_BACKWARD 97 %token T_POSITION 98 %token T_FROM 99 %token T_UNDEFINED 100 %token T_IGNORE 101 %token T_MESSAGES 102 %token T_YESSTR 103 %token T_NOSTR 104 %token T_YESEXPR 105 %token T_NOEXPR 106 %token T_MONETARY 107 %token T_INT_CURR_SYMBOL 108 %token T_CURRENCY_SYMBOL 109 %token T_MON_DECIMAL_POINT 110 %token T_MON_THOUSANDS_SEP 111 %token T_POSITIVE_SIGN 112 %token T_NEGATIVE_SIGN 113 %token T_MON_GROUPING 114 %token T_INT_FRAC_DIGITS 115 %token T_FRAC_DIGITS 116 %token T_P_CS_PRECEDES 117 %token T_P_SEP_BY_SPACE 118 %token T_N_CS_PRECEDES 119 %token T_N_SEP_BY_SPACE 120 %token T_P_SIGN_POSN 121 %token T_N_SIGN_POSN 122 %token T_INT_P_CS_PRECEDES 123 %token T_INT_N_CS_PRECEDES 124 %token T_INT_P_SEP_BY_SPACE 125 %token T_INT_N_SEP_BY_SPACE 126 %token T_INT_P_SIGN_POSN 127 %token T_INT_N_SIGN_POSN 128 %token T_NUMERIC 129 %token T_DECIMAL_POINT 130 %token T_THOUSANDS_SEP 131 %token T_GROUPING 132 %token T_TIME 133 %token T_ABDAY 134 %token T_DAY 135 %token T_ABMON 136 %token T_MON 137 %token T_ERA 138 %token T_ERA_D_FMT 139 %token T_ERA_T_FMT 140 %token T_ERA_D_T_FMT 141 %token T_ALT_DIGITS 142 %token T_D_T_FMT 143 %token T_D_FMT 144 %token T_T_FMT 145 %token T_AM_PM 146 %token T_T_FMT_AMPM 147 %token T_DATE_FMT 148 %token <wc> T_CHAR 149 %token <token> T_NAME 150 %token <num> T_NUMBER 151 %token <token> T_SYMBOL 152 %token <collsym> T_COLLSYM 153 %token <collelem> T_COLLELEM 154 155 %% 156 157 localedef : setting_list categories 158 | categories 159 ; 160 161 string : T_QUOTE charlist T_QUOTE 162 | T_QUOTE T_QUOTE 163 ; 164 165 charlist : charlist T_CHAR 166 { 167 add_wcs($2); 168 } 169 | T_CHAR 170 { 171 add_wcs($1); 172 } 173 ; 174 175 setting_list : setting_list setting 176 | setting 177 ; 178 179 180 setting : T_COM_CHAR T_CHAR T_NL 181 { 182 com_char = $2; 183 } 184 | T_ESC_CHAR T_CHAR T_NL 185 { 186 esc_char = $2; 187 } 188 | T_MB_CUR_MAX T_NUMBER T_NL 189 { 190 mb_cur_max = $2; 191 } 192 | T_MB_CUR_MIN T_NUMBER T_NL 193 { 194 mb_cur_min = $2; 195 } 196 | T_CODE_SET string T_NL 197 { 198 wchar_t *w = get_wcs(); 199 set_wide_encoding(to_mb_string(w)); 200 free(w); 201 } 202 | T_CODE_SET T_NAME T_NL 203 { 204 set_wide_encoding($2); 205 } 206 ; 207 208 copycat : T_COPY T_NAME T_NL 209 { 210 copy_category($2); 211 } 212 | T_COPY string T_NL 213 { 214 wchar_t *w = get_wcs(); 215 copy_category(to_mb_string(w)); 216 free(w); 217 } 218 ; 219 220 categories : categories category 221 | category 222 ; 223 224 225 category : charmap 226 | messages 227 | monetary 228 | ctype 229 | collate 230 | numeric 231 | time 232 ; 233 234 235 charmap : T_CHARMAP T_NL charmap_list T_END T_CHARMAP T_NL 236 | T_WIDTH T_NL width_list T_END T_WIDTH T_NL 237 ; 238 239 240 charmap_list : charmap_list charmap_entry 241 | charmap_entry 242 ; 243 244 245 charmap_entry : T_SYMBOL T_CHAR 246 { 247 add_charmap($1, $2); 248 scan_to_eol(); 249 } 250 | T_SYMBOL T_ELLIPSIS T_SYMBOL T_CHAR 251 { 252 add_charmap_range($1, $3, $4); 253 scan_to_eol(); 254 } 255 | T_NL 256 ; 257 258 width_list : width_list width_entry 259 | width_entry 260 ; 261 262 width_entry : T_CHAR T_NUMBER T_NL 263 { 264 add_width($1, $2); 265 } 266 | T_SYMBOL T_NUMBER T_NL 267 { 268 add_charmap_undefined($1); 269 } 270 | T_CHAR T_ELLIPSIS T_CHAR T_NUMBER T_NL 271 { 272 add_width_range($1, $3, $4); 273 } 274 | T_SYMBOL T_ELLIPSIS T_SYMBOL T_NUMBER T_NL 275 { 276 add_charmap_undefined($1); 277 add_charmap_undefined($3); 278 } 279 | T_CHAR T_ELLIPSIS T_SYMBOL T_NUMBER T_NL 280 { 281 add_width($1, $4); 282 add_charmap_undefined($3); 283 } 284 | T_SYMBOL T_ELLIPSIS T_CHAR T_NUMBER T_NL 285 { 286 add_width($3, $4); 287 add_charmap_undefined($1); 288 } 289 | T_NL 290 ; 291 292 ctype : T_CTYPE T_NL ctype_list T_END T_CTYPE T_NL 293 { 294 dump_ctype(); 295 } 296 | T_CTYPE T_NL copycat T_END T_CTYPE T_NL 297 ; 298 299 ctype_list : ctype_list ctype_kw 300 | ctype_kw 301 ; 302 303 ctype_kw : T_ISUPPER cc_list T_NL 304 | T_ISLOWER cc_list T_NL 305 | T_ISALPHA cc_list T_NL 306 | T_ISDIGIT cc_list T_NL 307 | T_ISPUNCT cc_list T_NL 308 | T_ISXDIGIT cc_list T_NL 309 | T_ISSPACE cc_list T_NL 310 | T_ISPRINT cc_list T_NL 311 | T_ISGRAPH cc_list T_NL 312 | T_ISBLANK cc_list T_NL 313 | T_ISCNTRL cc_list T_NL 314 | T_ISALNUM cc_list T_NL 315 | T_ISSPECIAL cc_list T_NL 316 | T_ISENGLISH cc_list T_NL 317 | T_ISNUMBER cc_list T_NL 318 | T_ISIDEOGRAM cc_list T_NL 319 | T_ISPHONOGRAM cc_list T_NL 320 | T_TOUPPER conv_list T_NL 321 | T_TOLOWER conv_list T_NL 322 ; 323 324 325 cc_list : cc_list T_SEMI T_CHAR 326 { 327 add_ctype($3); 328 } 329 | cc_list T_SEMI T_SYMBOL 330 { 331 add_charmap_undefined($3); 332 } 333 | cc_list T_SEMI T_ELLIPSIS T_SEMI T_CHAR 334 { 335 /* note that the endpoints *must* be characters */ 336 add_ctype_range($5); 337 } 338 | T_CHAR 339 { 340 add_ctype($1); 341 } 342 | T_SYMBOL 343 { 344 add_charmap_undefined($1); 345 } 346 ; 347 348 conv_list : conv_list T_SEMI conv_pair 349 | conv_pair 350 ; 351 352 353 conv_pair : T_LPAREN T_CHAR T_COMMA T_CHAR T_RPAREN 354 { 355 add_caseconv($2, $4); 356 } 357 | T_LPAREN T_SYMBOL T_COMMA T_CHAR T_RPAREN 358 { 359 add_charmap_undefined($2); 360 } 361 | T_LPAREN T_SYMBOL T_COMMA T_SYMBOL T_RPAREN 362 { 363 add_charmap_undefined($2); 364 add_charmap_undefined($4); 365 } 366 | T_LPAREN T_CHAR T_COMMA T_SYMBOL T_RPAREN 367 { 368 add_charmap_undefined($4); 369 } 370 ; 371 372 collate : T_COLLATE T_NL coll_order T_END T_COLLATE T_NL 373 { 374 dump_collate(); 375 } 376 | T_COLLATE T_NL coll_optional coll_order T_END T_COLLATE T_NL 377 { 378 dump_collate(); 379 } 380 | T_COLLATE T_NL copycat T_END T_COLLATE T_NL 381 ; 382 383 384 coll_optional : coll_optional coll_symbols 385 | coll_optional coll_elements 386 | coll_symbols 387 | coll_elements 388 ; 389 390 391 coll_symbols : T_COLLATING_SYMBOL T_SYMBOL T_NL 392 { 393 define_collsym($2); 394 } 395 ; 396 397 398 coll_elements : T_COLLATING_ELEMENT T_SYMBOL T_FROM string T_NL 399 { 400 define_collelem($2, get_wcs()); 401 } 402 ; 403 404 coll_order : T_ORDER_START T_NL order_list T_ORDER_END T_NL 405 { 406 /* If no order list supplied default to one forward */ 407 add_order_bit(T_FORWARD); 408 add_order_directive(); 409 } 410 | T_ORDER_START order_args T_NL order_list T_ORDER_END T_NL 411 ; 412 413 414 order_args : order_args T_SEMI order_arg 415 { 416 add_order_directive(); 417 } 418 | order_arg 419 { 420 add_order_directive(); 421 } 422 ; 423 424 order_arg : order_arg T_COMMA order_dir 425 | order_dir 426 ; 427 428 order_dir : T_FORWARD 429 { 430 add_order_bit(T_FORWARD); 431 } 432 | T_BACKWARD 433 { 434 add_order_bit(T_BACKWARD); 435 } 436 | T_POSITION 437 { 438 add_order_bit(T_POSITION); 439 } 440 ; 441 442 order_list : order_list order_item 443 | order_item 444 ; 445 446 order_item : T_COLLSYM T_NL 447 { 448 end_order_collsym($1); 449 } 450 | order_itemkw T_NL 451 { 452 end_order(); 453 } 454 | order_itemkw order_weights T_NL 455 { 456 end_order(); 457 } 458 ; 459 460 order_itemkw : T_CHAR 461 { 462 start_order_char($1); 463 } 464 | T_ELLIPSIS 465 { 466 start_order_ellipsis(); 467 } 468 | T_COLLELEM 469 { 470 start_order_collelem($1); 471 } 472 | T_UNDEFINED 473 { 474 start_order_undefined(); 475 } 476 | T_SYMBOL 477 { 478 start_order_symbol($1); 479 } 480 ; 481 482 order_weights : order_weights T_SEMI order_weight 483 | order_weights T_SEMI 484 | order_weight 485 ; 486 487 order_weight : T_COLLELEM 488 { 489 add_order_collelem($1); 490 } 491 | T_COLLSYM 492 { 493 add_order_collsym($1); 494 } 495 | T_CHAR 496 { 497 add_order_char($1); 498 } 499 | T_ELLIPSIS 500 { 501 add_order_ellipsis(); 502 } 503 | T_IGNORE 504 { 505 add_order_ignore(); 506 } 507 | T_SYMBOL 508 { 509 add_order_symbol($1); 510 } 511 | T_QUOTE order_str T_QUOTE 512 { 513 add_order_subst(); 514 } 515 ; 516 517 order_str : order_str order_stritem 518 | order_stritem 519 ; 520 521 order_stritem : T_CHAR 522 { 523 add_subst_char($1); 524 } 525 | T_COLLSYM 526 { 527 add_subst_collsym($1); 528 } 529 | T_COLLELEM 530 { 531 add_subst_collelem($1); 532 } 533 | T_SYMBOL 534 { 535 add_subst_symbol($1); 536 } 537 ; 538 539 messages : T_MESSAGES T_NL messages_list T_END T_MESSAGES T_NL 540 { 541 dump_messages(); 542 } 543 | T_MESSAGES T_NL copycat T_END T_MESSAGES T_NL 544 ; 545 546 messages_list : messages_list messages_item 547 | messages_item 548 ; 549 550 messages_kw : T_YESSTR 551 | T_NOSTR 552 | T_YESEXPR 553 | T_NOEXPR 554 ; 555 556 messages_item : messages_kw string T_NL 557 { 558 add_message(get_wcs()); 559 } 560 ; 561 562 monetary : T_MONETARY T_NL monetary_list T_END T_MONETARY T_NL 563 { 564 dump_monetary(); 565 } 566 | T_MONETARY T_NL copycat T_END T_MONETARY T_NL 567 ; 568 569 monetary_list : monetary_list monetary_kw 570 | monetary_kw 571 ; 572 573 monetary_strkw : T_INT_CURR_SYMBOL 574 | T_CURRENCY_SYMBOL 575 | T_MON_DECIMAL_POINT 576 | T_MON_THOUSANDS_SEP 577 | T_POSITIVE_SIGN 578 | T_NEGATIVE_SIGN 579 ; 580 581 monetary_numkw : T_INT_FRAC_DIGITS 582 | T_FRAC_DIGITS 583 | T_P_CS_PRECEDES 584 | T_P_SEP_BY_SPACE 585 | T_N_CS_PRECEDES 586 | T_N_SEP_BY_SPACE 587 | T_P_SIGN_POSN 588 | T_N_SIGN_POSN 589 | T_INT_P_CS_PRECEDES 590 | T_INT_N_CS_PRECEDES 591 | T_INT_P_SEP_BY_SPACE 592 | T_INT_N_SEP_BY_SPACE 593 | T_INT_P_SIGN_POSN 594 | T_INT_N_SIGN_POSN 595 ; 596 597 monetary_kw : monetary_strkw string T_NL 598 { 599 add_monetary_str(get_wcs()); 600 } 601 | monetary_numkw T_NUMBER T_NL 602 { 603 add_monetary_num($2); 604 } 605 | T_MON_GROUPING mon_group_list T_NL 606 ; 607 608 mon_group_list : T_NUMBER 609 { 610 reset_monetary_group(); 611 add_monetary_group($1); 612 } 613 | mon_group_list T_SEMI T_NUMBER 614 { 615 add_monetary_group($3); 616 } 617 ; 618 619 620 numeric : T_NUMERIC T_NL numeric_list T_END T_NUMERIC T_NL 621 { 622 dump_numeric(); 623 } 624 | T_NUMERIC T_NL copycat T_END T_NUMERIC T_NL 625 ; 626 627 628 numeric_list : numeric_list numeric_item 629 | numeric_item 630 ; 631 632 633 numeric_item : numeric_strkw string T_NL 634 { 635 add_numeric_str(get_wcs()); 636 } 637 | T_GROUPING group_list T_NL 638 ; 639 640 numeric_strkw : T_DECIMAL_POINT 641 | T_THOUSANDS_SEP 642 ; 643 644 645 group_list : T_NUMBER 646 { 647 reset_numeric_group(); 648 add_numeric_group($1); 649 } 650 | group_list T_SEMI T_NUMBER 651 { 652 add_numeric_group($3); 653 } 654 ; 655 656 657 time : T_TIME T_NL time_kwlist T_END T_TIME T_NL 658 { 659 dump_time(); 660 } 661 | T_TIME T_NL copycat T_END T_NUMERIC T_NL 662 ; 663 664 time_kwlist : time_kwlist time_kw 665 | time_kw 666 ; 667 668 time_kw : time_strkw string T_NL 669 { 670 add_time_str(get_wcs()); 671 } 672 | time_listkw time_list T_NL 673 { 674 check_time_list(); 675 } 676 ; 677 678 time_listkw : T_ABDAY 679 | T_DAY 680 | T_ABMON 681 | T_MON 682 | T_ERA 683 | T_ALT_DIGITS 684 | T_AM_PM 685 ; 686 687 time_strkw : T_ERA_D_T_FMT 688 | T_ERA_T_FMT 689 | T_ERA_D_FMT 690 | T_D_T_FMT 691 | T_D_FMT 692 | T_T_FMT 693 | T_T_FMT_AMPM 694 | T_DATE_FMT 695 ; 696 697 time_list : time_list T_SEMI string 698 { 699 add_time_list(get_wcs()); 700 } 701 | string 702 { 703 reset_time_list(); 704 add_time_list(get_wcs()); 705 } 706 ; 707