1 %{ 2 /* 3 * This file and its contents are supplied under the terms of the 4 * Common Development and Distribution License ("CDDL"), version 1.0. 5 * You may only use this file in accordance with the terms of version 6 * 1.0 of the CDDL. 7 * 8 * A full copy of the text of the CDDL should have accompanied this 9 * source. A copy of the CDDL is also available via the Internet at 10 * http://www.illumos.org/license/CDDL. 11 */ 12 13 /* 14 * Copyright 2010 Nexenta Systems, Inc. All rights reserved. 15 * Copyright 2013 DEY Storage Systems, Inc. 16 */ 17 18 /* 19 * POSIX localedef grammar. 20 */ 21 22 #include <wchar.h> 23 #include <stdio.h> 24 #include <limits.h> 25 #include "localedef.h" 26 27 %} 28 %union { 29 int num; 30 wchar_t wc; 31 char *token; 32 collsym_t *collsym; 33 collelem_t *collelem; 34 } 35 36 %token T_CODE_SET 37 %token T_MB_CUR_MAX 38 %token T_MB_CUR_MIN 39 %token T_COM_CHAR 40 %token T_ESC_CHAR 41 %token T_LT 42 %token T_GT 43 %token T_NL 44 %token T_SEMI 45 %token T_COMMA 46 %token T_ELLIPSIS 47 %token T_RPAREN 48 %token T_LPAREN 49 %token T_QUOTE 50 %token T_NULL 51 %token T_WS 52 %token T_END 53 %token T_COPY 54 %token T_CHARMAP 55 %token T_WIDTH 56 %token T_CTYPE 57 %token T_ISUPPER 58 %token T_ISLOWER 59 %token T_ISALPHA 60 %token T_ISDIGIT 61 %token T_ISPUNCT 62 %token T_ISXDIGIT 63 %token T_ISSPACE 64 %token T_ISPRINT 65 %token T_ISGRAPH 66 %token T_ISBLANK 67 %token T_ISCNTRL 68 %token T_ISALNUM 69 %token T_ISSPECIAL 70 %token T_ISPHONOGRAM 71 %token T_ISIDEOGRAM 72 %token T_ISENGLISH 73 %token T_ISNUMBER 74 %token T_TOUPPER 75 %token T_TOLOWER 76 %token T_COLLATE 77 %token T_COLLATING_SYMBOL 78 %token T_COLLATING_ELEMENT 79 %token T_ORDER_START 80 %token T_ORDER_END 81 %token T_FORWARD 82 %token T_BACKWARD 83 %token T_POSITION 84 %token T_FROM 85 %token T_UNDEFINED 86 %token T_IGNORE 87 %token T_MESSAGES 88 %token T_YESSTR 89 %token T_NOSTR 90 %token T_YESEXPR 91 %token T_NOEXPR 92 %token T_MONETARY 93 %token T_INT_CURR_SYMBOL 94 %token T_CURRENCY_SYMBOL 95 %token T_MON_DECIMAL_POINT 96 %token T_MON_THOUSANDS_SEP 97 %token T_POSITIVE_SIGN 98 %token T_NEGATIVE_SIGN 99 %token T_MON_GROUPING 100 %token T_INT_FRAC_DIGITS 101 %token T_FRAC_DIGITS 102 %token T_P_CS_PRECEDES 103 %token T_P_SEP_BY_SPACE 104 %token T_N_CS_PRECEDES 105 %token T_N_SEP_BY_SPACE 106 %token T_P_SIGN_POSN 107 %token T_N_SIGN_POSN 108 %token T_INT_P_CS_PRECEDES 109 %token T_INT_N_CS_PRECEDES 110 %token T_INT_P_SEP_BY_SPACE 111 %token T_INT_N_SEP_BY_SPACE 112 %token T_INT_P_SIGN_POSN 113 %token T_INT_N_SIGN_POSN 114 %token T_NUMERIC 115 %token T_DECIMAL_POINT 116 %token T_THOUSANDS_SEP 117 %token T_GROUPING 118 %token T_TIME 119 %token T_ABDAY 120 %token T_DAY 121 %token T_ABMON 122 %token T_MON 123 %token T_ERA 124 %token T_ERA_D_FMT 125 %token T_ERA_T_FMT 126 %token T_ERA_D_T_FMT 127 %token T_ALT_DIGITS 128 %token T_D_T_FMT 129 %token T_D_FMT 130 %token T_T_FMT 131 %token T_AM_PM 132 %token T_T_FMT_AMPM 133 %token T_DATE_FMT 134 %token <wc> T_CHAR 135 %token <token> T_NAME 136 %token <num> T_NUMBER 137 %token <token> T_SYMBOL 138 %token <collsym> T_COLLSYM 139 %token <collelem> T_COLLELEM 140 141 %% 142 143 localedef : setting_list categories 144 | categories 145 ; 146 147 string : T_QUOTE charlist T_QUOTE 148 | T_QUOTE T_QUOTE 149 ; 150 151 charlist : charlist T_CHAR 152 { 153 add_wcs($2); 154 } 155 | T_CHAR 156 { 157 add_wcs($1); 158 } 159 ; 160 161 setting_list : setting_list setting 162 | setting 163 ; 164 165 166 setting : T_COM_CHAR T_CHAR T_NL 167 { 168 com_char = $2; 169 } 170 | T_ESC_CHAR T_CHAR T_NL 171 { 172 esc_char = $2; 173 } 174 | T_MB_CUR_MAX T_NUMBER T_NL 175 { 176 mb_cur_max = $2; 177 } 178 | T_MB_CUR_MIN T_NUMBER T_NL 179 { 180 mb_cur_min = $2; 181 } 182 | T_CODE_SET string T_NL 183 { 184 wchar_t *w = get_wcs(); 185 set_wide_encoding(to_mb_string(w)); 186 free(w); 187 } 188 | T_CODE_SET T_NAME T_NL 189 { 190 set_wide_encoding($2); 191 } 192 ; 193 194 copycat : T_COPY T_NAME T_NL 195 { 196 copy_category($2); 197 } 198 | T_COPY string T_NL 199 { 200 wchar_t *w = get_wcs(); 201 copy_category(to_mb_string(w)); 202 free(w); 203 } 204 ; 205 206 categories : categories category 207 | category 208 ; 209 210 211 category : charmap 212 | messages 213 | monetary 214 | ctype 215 | collate 216 | numeric 217 | time 218 ; 219 220 221 charmap : T_CHARMAP T_NL charmap_list T_END T_CHARMAP T_NL 222 | T_WIDTH T_NL width_list T_END T_WIDTH T_NL 223 ; 224 225 226 charmap_list : charmap_list charmap_entry 227 | charmap_entry 228 ; 229 230 231 charmap_entry : T_SYMBOL T_CHAR 232 { 233 add_charmap($1, $2); 234 scan_to_eol(); 235 } 236 | T_SYMBOL T_ELLIPSIS T_SYMBOL T_CHAR 237 { 238 add_charmap_range($1, $3, $4); 239 scan_to_eol(); 240 } 241 | T_NL 242 ; 243 244 width_list : width_list width_entry 245 | width_entry 246 ; 247 248 width_entry : T_CHAR T_NUMBER T_NL 249 { 250 add_width($1, $2); 251 } 252 | T_SYMBOL T_NUMBER T_NL 253 { 254 add_charmap_undefined($1); 255 } 256 | T_CHAR T_ELLIPSIS T_CHAR T_NUMBER T_NL 257 { 258 add_width_range($1, $3, $4); 259 } 260 | T_SYMBOL T_ELLIPSIS T_SYMBOL T_NUMBER T_NL 261 { 262 add_charmap_undefined($1); 263 add_charmap_undefined($3); 264 } 265 | T_CHAR T_ELLIPSIS T_SYMBOL T_NUMBER T_NL 266 { 267 add_width($1, $4); 268 add_charmap_undefined($3); 269 } 270 | T_SYMBOL T_ELLIPSIS T_CHAR T_NUMBER T_NL 271 { 272 add_width($3, $4); 273 add_charmap_undefined($1); 274 } 275 | T_NL 276 ; 277 278 ctype : T_CTYPE T_NL ctype_list T_END T_CTYPE T_NL 279 { 280 dump_ctype(); 281 } 282 | T_CTYPE T_NL copycat T_END T_CTYPE T_NL 283 ; 284 285 ctype_list : ctype_list ctype_kw 286 | ctype_kw 287 ; 288 289 ctype_kw : T_ISUPPER cc_list T_NL 290 | T_ISLOWER cc_list T_NL 291 | T_ISALPHA cc_list T_NL 292 | T_ISDIGIT cc_list T_NL 293 | T_ISPUNCT cc_list T_NL 294 | T_ISXDIGIT cc_list T_NL 295 | T_ISSPACE cc_list T_NL 296 | T_ISPRINT cc_list T_NL 297 | T_ISGRAPH cc_list T_NL 298 | T_ISBLANK cc_list T_NL 299 | T_ISCNTRL cc_list T_NL 300 | T_ISALNUM cc_list T_NL 301 | T_ISSPECIAL cc_list T_NL 302 | T_ISENGLISH cc_list T_NL 303 | T_ISNUMBER cc_list T_NL 304 | T_ISIDEOGRAM cc_list T_NL 305 | T_ISPHONOGRAM cc_list T_NL 306 | T_TOUPPER conv_list T_NL 307 | T_TOLOWER conv_list T_NL 308 ; 309 310 cc_list : cc_list T_SEMI cc_range_end 311 | cc_list T_SEMI cc_char 312 | cc_char 313 ; 314 315 cc_range_end : T_ELLIPSIS T_SEMI T_CHAR 316 { 317 add_ctype_range($3); 318 } 319 ; 320 321 cc_char : T_CHAR 322 { 323 add_ctype($1); 324 } 325 | T_SYMBOL 326 { 327 add_charmap_undefined($1); 328 } 329 ; 330 331 conv_list : conv_list T_SEMI conv_pair 332 | conv_pair 333 ; 334 335 336 conv_pair : T_LPAREN T_CHAR T_COMMA T_CHAR T_RPAREN 337 { 338 add_caseconv($2, $4); 339 } 340 | T_LPAREN T_SYMBOL T_COMMA T_CHAR T_RPAREN 341 { 342 add_charmap_undefined($2); 343 } 344 | T_LPAREN T_SYMBOL T_COMMA T_SYMBOL T_RPAREN 345 { 346 add_charmap_undefined($2); 347 add_charmap_undefined($4); 348 } 349 | T_LPAREN T_CHAR T_COMMA T_SYMBOL T_RPAREN 350 { 351 add_charmap_undefined($4); 352 } 353 ; 354 355 collate : T_COLLATE T_NL coll_order T_END T_COLLATE T_NL 356 { 357 dump_collate(); 358 } 359 | T_COLLATE T_NL coll_optional coll_order T_END T_COLLATE T_NL 360 { 361 dump_collate(); 362 } 363 | T_COLLATE T_NL copycat T_END T_COLLATE T_NL 364 ; 365 366 367 coll_optional : coll_optional coll_symbols 368 | coll_optional coll_elements 369 | coll_symbols 370 | coll_elements 371 ; 372 373 374 coll_symbols : T_COLLATING_SYMBOL T_SYMBOL T_NL 375 { 376 define_collsym($2); 377 } 378 ; 379 380 381 coll_elements : T_COLLATING_ELEMENT T_SYMBOL T_FROM string T_NL 382 { 383 define_collelem($2, get_wcs()); 384 } 385 ; 386 387 coll_order : T_ORDER_START T_NL order_list T_ORDER_END T_NL 388 { 389 /* If no order list supplied default to one forward */ 390 add_order_bit(T_FORWARD); 391 add_order_directive(); 392 } 393 | T_ORDER_START order_args T_NL order_list T_ORDER_END T_NL 394 ; 395 396 397 order_args : order_args T_SEMI order_arg 398 { 399 add_order_directive(); 400 } 401 | order_arg 402 { 403 add_order_directive(); 404 } 405 ; 406 407 order_arg : order_arg T_COMMA order_dir 408 | order_dir 409 ; 410 411 order_dir : T_FORWARD 412 { 413 add_order_bit(T_FORWARD); 414 } 415 | T_BACKWARD 416 { 417 add_order_bit(T_BACKWARD); 418 } 419 | T_POSITION 420 { 421 add_order_bit(T_POSITION); 422 } 423 ; 424 425 order_list : order_list order_item 426 | order_item 427 ; 428 429 order_item : T_COLLSYM T_NL 430 { 431 end_order_collsym($1); 432 } 433 | order_itemkw T_NL 434 { 435 end_order(); 436 } 437 | order_itemkw order_weights T_NL 438 { 439 end_order(); 440 } 441 ; 442 443 order_itemkw : T_CHAR 444 { 445 start_order_char($1); 446 } 447 | T_ELLIPSIS 448 { 449 start_order_ellipsis(); 450 } 451 | T_COLLELEM 452 { 453 start_order_collelem($1); 454 } 455 | T_UNDEFINED 456 { 457 start_order_undefined(); 458 } 459 | T_SYMBOL 460 { 461 start_order_symbol($1); 462 } 463 ; 464 465 order_weights : order_weights T_SEMI order_weight 466 | order_weights T_SEMI 467 | order_weight 468 ; 469 470 order_weight : T_COLLELEM 471 { 472 add_order_collelem($1); 473 } 474 | T_COLLSYM 475 { 476 add_order_collsym($1); 477 } 478 | T_CHAR 479 { 480 add_order_char($1); 481 } 482 | T_ELLIPSIS 483 { 484 add_order_ellipsis(); 485 } 486 | T_IGNORE 487 { 488 add_order_ignore(); 489 } 490 | T_SYMBOL 491 { 492 add_order_symbol($1); 493 } 494 | T_QUOTE order_str T_QUOTE 495 { 496 add_order_subst(); 497 } 498 ; 499 500 order_str : order_str order_stritem 501 | order_stritem 502 ; 503 504 order_stritem : T_CHAR 505 { 506 add_subst_char($1); 507 } 508 | T_COLLSYM 509 { 510 add_subst_collsym($1); 511 } 512 | T_COLLELEM 513 { 514 add_subst_collelem($1); 515 } 516 | T_SYMBOL 517 { 518 add_subst_symbol($1); 519 } 520 ; 521 522 messages : T_MESSAGES T_NL messages_list T_END T_MESSAGES T_NL 523 { 524 dump_messages(); 525 } 526 | T_MESSAGES T_NL copycat T_END T_MESSAGES T_NL 527 ; 528 529 messages_list : messages_list messages_item 530 | messages_item 531 ; 532 533 messages_kw : T_YESSTR 534 | T_NOSTR 535 | T_YESEXPR 536 | T_NOEXPR 537 ; 538 539 messages_item : messages_kw string T_NL 540 { 541 add_message(get_wcs()); 542 } 543 ; 544 545 monetary : T_MONETARY T_NL monetary_list T_END T_MONETARY T_NL 546 { 547 dump_monetary(); 548 } 549 | T_MONETARY T_NL copycat T_END T_MONETARY T_NL 550 ; 551 552 monetary_list : monetary_list monetary_kw 553 | monetary_kw 554 ; 555 556 monetary_strkw : T_INT_CURR_SYMBOL 557 | T_CURRENCY_SYMBOL 558 | T_MON_DECIMAL_POINT 559 | T_MON_THOUSANDS_SEP 560 | T_POSITIVE_SIGN 561 | T_NEGATIVE_SIGN 562 ; 563 564 monetary_numkw : T_INT_FRAC_DIGITS 565 | T_FRAC_DIGITS 566 | T_P_CS_PRECEDES 567 | T_P_SEP_BY_SPACE 568 | T_N_CS_PRECEDES 569 | T_N_SEP_BY_SPACE 570 | T_P_SIGN_POSN 571 | T_N_SIGN_POSN 572 | T_INT_P_CS_PRECEDES 573 | T_INT_N_CS_PRECEDES 574 | T_INT_P_SEP_BY_SPACE 575 | T_INT_N_SEP_BY_SPACE 576 | T_INT_P_SIGN_POSN 577 | T_INT_N_SIGN_POSN 578 ; 579 580 monetary_kw : monetary_strkw string T_NL 581 { 582 add_monetary_str(get_wcs()); 583 } 584 | monetary_numkw T_NUMBER T_NL 585 { 586 add_monetary_num($2); 587 } 588 | T_MON_GROUPING mon_group_list T_NL 589 ; 590 591 mon_group_list : T_NUMBER 592 { 593 reset_monetary_group(); 594 add_monetary_group($1); 595 } 596 | mon_group_list T_SEMI T_NUMBER 597 { 598 add_monetary_group($3); 599 } 600 ; 601 602 603 numeric : T_NUMERIC T_NL numeric_list T_END T_NUMERIC T_NL 604 { 605 dump_numeric(); 606 } 607 | T_NUMERIC T_NL copycat T_END T_NUMERIC T_NL 608 ; 609 610 611 numeric_list : numeric_list numeric_item 612 | numeric_item 613 ; 614 615 616 numeric_item : numeric_strkw string T_NL 617 { 618 add_numeric_str(get_wcs()); 619 } 620 | T_GROUPING group_list T_NL 621 ; 622 623 numeric_strkw : T_DECIMAL_POINT 624 | T_THOUSANDS_SEP 625 ; 626 627 628 group_list : T_NUMBER 629 { 630 reset_numeric_group(); 631 add_numeric_group($1); 632 } 633 | group_list T_SEMI T_NUMBER 634 { 635 add_numeric_group($3); 636 } 637 ; 638 639 640 time : T_TIME T_NL time_kwlist T_END T_TIME T_NL 641 { 642 dump_time(); 643 } 644 | T_TIME T_NL copycat T_END T_NUMERIC T_NL 645 ; 646 647 time_kwlist : time_kwlist time_kw 648 | time_kw 649 ; 650 651 time_kw : time_strkw string T_NL 652 { 653 add_time_str(get_wcs()); 654 } 655 | time_listkw time_list T_NL 656 { 657 check_time_list(); 658 } 659 ; 660 661 time_listkw : T_ABDAY 662 | T_DAY 663 | T_ABMON 664 | T_MON 665 | T_ERA 666 | T_ALT_DIGITS 667 | T_AM_PM 668 ; 669 670 time_strkw : T_ERA_D_T_FMT 671 | T_ERA_T_FMT 672 | T_ERA_D_FMT 673 | T_D_T_FMT 674 | T_D_FMT 675 | T_T_FMT 676 | T_T_FMT_AMPM 677 | T_DATE_FMT 678 ; 679 680 time_list : time_list T_SEMI string 681 { 682 add_time_list(get_wcs()); 683 } 684 | string 685 { 686 reset_time_list(); 687 add_time_list(get_wcs()); 688 } 689 ; 690