1 %{ 2 /*- 3 * Copyright 2010 Nexenta Systems, Inc. All rights reserved. 4 * Copyright 2015 John Marino <draco@marino.st> 5 * 6 * This source code is derived from the illumos localedef command, and 7 * provided under BSD-style license terms by Nexenta Systems, Inc. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 20 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 22 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 23 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29 * POSSIBILITY OF SUCH DAMAGE. 30 */ 31 32 /* 33 * POSIX localedef grammar. 34 */ 35 36 #include <wchar.h> 37 #include <stdio.h> 38 #include <limits.h> 39 #include "localedef.h" 40 41 %} 42 %union { 43 int num; 44 wchar_t wc; 45 char *token; 46 collsym_t *collsym; 47 collelem_t *collelem; 48 } 49 50 %token T_CODE_SET 51 %token T_MB_CUR_MAX 52 %token T_MB_CUR_MIN 53 %token T_COM_CHAR 54 %token T_ESC_CHAR 55 %token T_LT 56 %token T_GT 57 %token T_NL 58 %token T_SEMI 59 %token T_COMMA 60 %token T_ELLIPSIS 61 %token T_RPAREN 62 %token T_LPAREN 63 %token T_QUOTE 64 %token T_NULL 65 %token T_WS 66 %token T_END 67 %token T_COPY 68 %token T_CHARMAP 69 %token T_WIDTH 70 %token T_CTYPE 71 %token T_ISUPPER 72 %token T_ISLOWER 73 %token T_ISALPHA 74 %token T_ISDIGIT 75 %token T_ISPUNCT 76 %token T_ISXDIGIT 77 %token T_ISSPACE 78 %token T_ISPRINT 79 %token T_ISGRAPH 80 %token T_ISBLANK 81 %token T_ISCNTRL 82 %token T_ISALNUM 83 %token T_ISSPECIAL 84 %token T_ISPHONOGRAM 85 %token T_ISIDEOGRAM 86 %token T_ISENGLISH 87 %token T_ISNUMBER 88 %token T_TOUPPER 89 %token T_TOLOWER 90 %token T_COLLATE 91 %token T_COLLATING_SYMBOL 92 %token T_COLLATING_ELEMENT 93 %token T_ORDER_START 94 %token T_ORDER_END 95 %token T_FORWARD 96 %token T_BACKWARD 97 %token T_POSITION 98 %token T_FROM 99 %token T_UNDEFINED 100 %token T_IGNORE 101 %token T_MESSAGES 102 %token T_YESSTR 103 %token T_NOSTR 104 %token T_YESEXPR 105 %token T_NOEXPR 106 %token T_MONETARY 107 %token T_INT_CURR_SYMBOL 108 %token T_CURRENCY_SYMBOL 109 %token T_MON_DECIMAL_POINT 110 %token T_MON_THOUSANDS_SEP 111 %token T_POSITIVE_SIGN 112 %token T_NEGATIVE_SIGN 113 %token T_MON_GROUPING 114 %token T_INT_FRAC_DIGITS 115 %token T_FRAC_DIGITS 116 %token T_P_CS_PRECEDES 117 %token T_P_SEP_BY_SPACE 118 %token T_N_CS_PRECEDES 119 %token T_N_SEP_BY_SPACE 120 %token T_P_SIGN_POSN 121 %token T_N_SIGN_POSN 122 %token T_INT_P_CS_PRECEDES 123 %token T_INT_N_CS_PRECEDES 124 %token T_INT_P_SEP_BY_SPACE 125 %token T_INT_N_SEP_BY_SPACE 126 %token T_INT_P_SIGN_POSN 127 %token T_INT_N_SIGN_POSN 128 %token T_NUMERIC 129 %token T_DECIMAL_POINT 130 %token T_THOUSANDS_SEP 131 %token T_GROUPING 132 %token T_TIME 133 %token T_ABDAY 134 %token T_DAY 135 %token T_ABMON 136 %token T_MON 137 %token T_ERA 138 %token T_ERA_D_FMT 139 %token T_ERA_T_FMT 140 %token T_ERA_D_T_FMT 141 %token T_ALT_DIGITS 142 %token T_D_T_FMT 143 %token T_D_FMT 144 %token T_T_FMT 145 %token T_AM_PM 146 %token T_T_FMT_AMPM 147 %token T_DATE_FMT 148 %token <wc> T_CHAR 149 %token <token> T_NAME 150 %token <num> T_NUMBER 151 %token <token> T_SYMBOL 152 %token <collsym> T_COLLSYM 153 %token <collelem> T_COLLELEM 154 155 %% 156 157 localedef : setting_list categories 158 | categories 159 ; 160 161 string : T_QUOTE charlist T_QUOTE 162 | T_QUOTE T_QUOTE 163 ; 164 165 charlist : charlist T_CHAR 166 { 167 add_wcs($2); 168 } 169 | T_CHAR 170 { 171 add_wcs($1); 172 } 173 ; 174 175 setting_list : setting_list setting 176 | setting 177 ; 178 179 180 setting : T_COM_CHAR T_CHAR T_NL 181 { 182 com_char = $2; 183 } 184 | T_ESC_CHAR T_CHAR T_NL 185 { 186 esc_char = $2; 187 } 188 | T_MB_CUR_MAX T_NUMBER T_NL 189 { 190 mb_cur_max = $2; 191 } 192 | T_MB_CUR_MIN T_NUMBER T_NL 193 { 194 mb_cur_min = $2; 195 } 196 | T_CODE_SET string T_NL 197 { 198 wchar_t *w = get_wcs(); 199 set_wide_encoding(to_mb_string(w)); 200 free(w); 201 } 202 | T_CODE_SET T_NAME T_NL 203 { 204 set_wide_encoding($2); 205 } 206 ; 207 208 copycat : T_COPY T_NAME T_NL 209 { 210 copy_category($2); 211 } 212 | T_COPY string T_NL 213 { 214 wchar_t *w = get_wcs(); 215 copy_category(to_mb_string(w)); 216 free(w); 217 } 218 ; 219 220 categories : categories category 221 | category 222 ; 223 224 225 category : charmap 226 | messages 227 | monetary 228 | ctype 229 | collate 230 | numeric 231 | time 232 ; 233 234 235 charmap : T_CHARMAP T_NL charmap_list T_END T_CHARMAP T_NL 236 | T_WIDTH T_NL width_list T_END T_WIDTH T_NL 237 ; 238 239 240 charmap_list : charmap_list charmap_entry 241 | charmap_entry 242 ; 243 244 245 charmap_entry : T_SYMBOL T_CHAR 246 { 247 add_charmap($1, $2); 248 scan_to_eol(); 249 } 250 | T_SYMBOL T_ELLIPSIS T_SYMBOL T_CHAR 251 { 252 add_charmap_range($1, $3, $4); 253 scan_to_eol(); 254 } 255 | T_NL 256 ; 257 258 width_list : width_list width_entry 259 | width_entry 260 ; 261 262 width_entry : T_CHAR T_NUMBER T_NL 263 { 264 add_width($1, $2); 265 } 266 | T_SYMBOL T_NUMBER T_NL 267 { 268 add_charmap_undefined($1); 269 } 270 | T_CHAR T_ELLIPSIS T_CHAR T_NUMBER T_NL 271 { 272 add_width_range($1, $3, $4); 273 } 274 | T_SYMBOL T_ELLIPSIS T_SYMBOL T_NUMBER T_NL 275 { 276 add_charmap_undefined($1); 277 add_charmap_undefined($3); 278 } 279 | T_CHAR T_ELLIPSIS T_SYMBOL T_NUMBER T_NL 280 { 281 add_width($1, $4); 282 add_charmap_undefined($3); 283 } 284 | T_SYMBOL T_ELLIPSIS T_CHAR T_NUMBER T_NL 285 { 286 add_width($3, $4); 287 add_charmap_undefined($1); 288 } 289 | T_NL 290 ; 291 292 ctype : T_CTYPE T_NL ctype_list T_END T_CTYPE T_NL 293 { 294 dump_ctype(); 295 } 296 | T_CTYPE T_NL copycat T_END T_CTYPE T_NL 297 ; 298 299 ctype_list : ctype_list ctype_kw 300 | ctype_kw 301 ; 302 303 ctype_kw : T_ISUPPER cc_list T_NL 304 | T_ISLOWER cc_list T_NL 305 | T_ISALPHA cc_list T_NL 306 | T_ISDIGIT cc_list T_NL 307 | T_ISPUNCT cc_list T_NL 308 | T_ISXDIGIT cc_list T_NL 309 | T_ISSPACE cc_list T_NL 310 | T_ISPRINT cc_list T_NL 311 | T_ISGRAPH cc_list T_NL 312 | T_ISBLANK cc_list T_NL 313 | T_ISCNTRL cc_list T_NL 314 | T_ISALNUM cc_list T_NL 315 | T_ISSPECIAL cc_list T_NL 316 | T_ISENGLISH cc_list T_NL 317 | T_ISNUMBER cc_list T_NL 318 | T_ISIDEOGRAM cc_list T_NL 319 | T_ISPHONOGRAM cc_list T_NL 320 | T_TOUPPER conv_list T_NL 321 | T_TOLOWER conv_list T_NL 322 ; 323 324 cc_list : cc_list T_SEMI cc_range_end 325 | cc_list T_SEMI cc_char 326 | cc_char 327 ; 328 329 cc_range_end : T_ELLIPSIS T_SEMI T_CHAR 330 { 331 add_ctype_range($3); 332 } 333 ; 334 335 cc_char : T_CHAR 336 { 337 add_ctype($1); 338 } 339 | T_SYMBOL 340 { 341 add_charmap_undefined($1); 342 } 343 ; 344 345 conv_list : conv_list T_SEMI conv_pair 346 | conv_pair 347 ; 348 349 350 conv_pair : T_LPAREN T_CHAR T_COMMA T_CHAR T_RPAREN 351 { 352 add_caseconv($2, $4); 353 } 354 | T_LPAREN T_SYMBOL T_COMMA T_CHAR T_RPAREN 355 { 356 add_charmap_undefined($2); 357 } 358 | T_LPAREN T_SYMBOL T_COMMA T_SYMBOL T_RPAREN 359 { 360 add_charmap_undefined($2); 361 add_charmap_undefined($4); 362 } 363 | T_LPAREN T_CHAR T_COMMA T_SYMBOL T_RPAREN 364 { 365 add_charmap_undefined($4); 366 } 367 ; 368 369 collate : T_COLLATE T_NL coll_order T_END T_COLLATE T_NL 370 { 371 dump_collate(); 372 } 373 | T_COLLATE T_NL coll_optional coll_order T_END T_COLLATE T_NL 374 { 375 dump_collate(); 376 } 377 | T_COLLATE T_NL copycat T_END T_COLLATE T_NL 378 ; 379 380 381 coll_optional : coll_optional coll_symbols 382 | coll_optional coll_elements 383 | coll_symbols 384 | coll_elements 385 ; 386 387 388 coll_symbols : T_COLLATING_SYMBOL T_SYMBOL T_NL 389 { 390 define_collsym($2); 391 } 392 ; 393 394 395 coll_elements : T_COLLATING_ELEMENT T_SYMBOL T_FROM string T_NL 396 { 397 define_collelem($2, get_wcs()); 398 } 399 ; 400 401 coll_order : T_ORDER_START T_NL order_list T_ORDER_END T_NL 402 { 403 /* If no order list supplied default to one forward */ 404 add_order_bit(T_FORWARD); 405 add_order_directive(); 406 } 407 | T_ORDER_START order_args T_NL order_list T_ORDER_END T_NL 408 ; 409 410 411 order_args : order_args T_SEMI order_arg 412 { 413 add_order_directive(); 414 } 415 | order_arg 416 { 417 add_order_directive(); 418 } 419 ; 420 421 order_arg : order_arg T_COMMA order_dir 422 | order_dir 423 ; 424 425 order_dir : T_FORWARD 426 { 427 add_order_bit(T_FORWARD); 428 } 429 | T_BACKWARD 430 { 431 add_order_bit(T_BACKWARD); 432 } 433 | T_POSITION 434 { 435 add_order_bit(T_POSITION); 436 } 437 ; 438 439 order_list : order_list order_item 440 | order_item 441 ; 442 443 order_item : T_COLLSYM T_NL 444 { 445 end_order_collsym($1); 446 } 447 | order_itemkw T_NL 448 { 449 end_order(); 450 } 451 | order_itemkw order_weights T_NL 452 { 453 end_order(); 454 } 455 ; 456 457 order_itemkw : T_CHAR 458 { 459 start_order_char($1); 460 } 461 | T_ELLIPSIS 462 { 463 start_order_ellipsis(); 464 } 465 | T_COLLELEM 466 { 467 start_order_collelem($1); 468 } 469 | T_UNDEFINED 470 { 471 start_order_undefined(); 472 } 473 | T_SYMBOL 474 { 475 start_order_symbol($1); 476 } 477 ; 478 479 order_weights : order_weights T_SEMI order_weight 480 | order_weights T_SEMI 481 | order_weight 482 ; 483 484 order_weight : T_COLLELEM 485 { 486 add_order_collelem($1); 487 } 488 | T_COLLSYM 489 { 490 add_order_collsym($1); 491 } 492 | T_CHAR 493 { 494 add_order_char($1); 495 } 496 | T_ELLIPSIS 497 { 498 add_order_ellipsis(); 499 } 500 | T_IGNORE 501 { 502 add_order_ignore(); 503 } 504 | T_SYMBOL 505 { 506 add_order_symbol($1); 507 } 508 | T_QUOTE order_str T_QUOTE 509 { 510 add_order_subst(); 511 } 512 ; 513 514 order_str : order_str order_stritem 515 | order_stritem 516 ; 517 518 order_stritem : T_CHAR 519 { 520 add_subst_char($1); 521 } 522 | T_COLLSYM 523 { 524 add_subst_collsym($1); 525 } 526 | T_COLLELEM 527 { 528 add_subst_collelem($1); 529 } 530 | T_SYMBOL 531 { 532 add_subst_symbol($1); 533 } 534 ; 535 536 messages : T_MESSAGES T_NL messages_list T_END T_MESSAGES T_NL 537 { 538 dump_messages(); 539 } 540 | T_MESSAGES T_NL copycat T_END T_MESSAGES T_NL 541 ; 542 543 messages_list : messages_list messages_item 544 | messages_item 545 ; 546 547 messages_kw : T_YESSTR 548 | T_NOSTR 549 | T_YESEXPR 550 | T_NOEXPR 551 ; 552 553 messages_item : messages_kw string T_NL 554 { 555 add_message(get_wcs()); 556 } 557 ; 558 559 monetary : T_MONETARY T_NL monetary_list T_END T_MONETARY T_NL 560 { 561 dump_monetary(); 562 } 563 | T_MONETARY T_NL copycat T_END T_MONETARY T_NL 564 ; 565 566 monetary_list : monetary_list monetary_kw 567 | monetary_kw 568 ; 569 570 monetary_strkw : T_INT_CURR_SYMBOL 571 | T_CURRENCY_SYMBOL 572 | T_MON_DECIMAL_POINT 573 | T_MON_THOUSANDS_SEP 574 | T_POSITIVE_SIGN 575 | T_NEGATIVE_SIGN 576 ; 577 578 monetary_numkw : T_INT_FRAC_DIGITS 579 | T_FRAC_DIGITS 580 | T_P_CS_PRECEDES 581 | T_P_SEP_BY_SPACE 582 | T_N_CS_PRECEDES 583 | T_N_SEP_BY_SPACE 584 | T_P_SIGN_POSN 585 | T_N_SIGN_POSN 586 | T_INT_P_CS_PRECEDES 587 | T_INT_N_CS_PRECEDES 588 | T_INT_P_SEP_BY_SPACE 589 | T_INT_N_SEP_BY_SPACE 590 | T_INT_P_SIGN_POSN 591 | T_INT_N_SIGN_POSN 592 ; 593 594 monetary_kw : monetary_strkw string T_NL 595 { 596 add_monetary_str(get_wcs()); 597 } 598 | monetary_numkw T_NUMBER T_NL 599 { 600 add_monetary_num($2); 601 } 602 | T_MON_GROUPING mon_group_list T_NL 603 ; 604 605 mon_group_list : T_NUMBER 606 { 607 reset_monetary_group(); 608 add_monetary_group($1); 609 } 610 | mon_group_list T_SEMI T_NUMBER 611 { 612 add_monetary_group($3); 613 } 614 ; 615 616 617 numeric : T_NUMERIC T_NL numeric_list T_END T_NUMERIC T_NL 618 { 619 dump_numeric(); 620 } 621 | T_NUMERIC T_NL copycat T_END T_NUMERIC T_NL 622 ; 623 624 625 numeric_list : numeric_list numeric_item 626 | numeric_item 627 ; 628 629 630 numeric_item : numeric_strkw string T_NL 631 { 632 add_numeric_str(get_wcs()); 633 } 634 | T_GROUPING group_list T_NL 635 ; 636 637 numeric_strkw : T_DECIMAL_POINT 638 | T_THOUSANDS_SEP 639 ; 640 641 642 group_list : T_NUMBER 643 { 644 reset_numeric_group(); 645 add_numeric_group($1); 646 } 647 | group_list T_SEMI T_NUMBER 648 { 649 add_numeric_group($3); 650 } 651 ; 652 653 654 time : T_TIME T_NL time_kwlist T_END T_TIME T_NL 655 { 656 dump_time(); 657 } 658 | T_TIME T_NL copycat T_END T_NUMERIC T_NL 659 ; 660 661 time_kwlist : time_kwlist time_kw 662 | time_kw 663 ; 664 665 time_kw : time_strkw string T_NL 666 { 667 add_time_str(get_wcs()); 668 } 669 | time_listkw time_list T_NL 670 { 671 check_time_list(); 672 } 673 ; 674 675 time_listkw : T_ABDAY 676 | T_DAY 677 | T_ABMON 678 | T_MON 679 | T_ERA 680 | T_ALT_DIGITS 681 | T_AM_PM 682 ; 683 684 time_strkw : T_ERA_D_T_FMT 685 | T_ERA_T_FMT 686 | T_ERA_D_FMT 687 | T_D_T_FMT 688 | T_D_FMT 689 | T_T_FMT 690 | T_T_FMT_AMPM 691 | T_DATE_FMT 692 ; 693 694 time_list : time_list T_SEMI string 695 { 696 add_time_list(get_wcs()); 697 } 698 | string 699 { 700 reset_time_list(); 701 add_time_list(get_wcs()); 702 } 703 ; 704