1 %{ 2 /* 3 * This file and its contents are supplied under the terms of the 4 * Common Development and Distribution License ("CDDL"), version 1.0. 5 * You may only use this file in accordance with the terms version 1.0 6 * of the CDDL. 7 * 8 * A full copy of the text of the CDDL should have accompanied this 9 * source. A copy of the CDDL is also available via the Internet at 10 * http://www.illumos.org/license/CDDL. 11 */ 12 13 /* 14 * Copyright 2010 Nexenta Systems, Inc. All rights reserved. 15 */ 16 17 /* 18 * POSIX localedef grammar. 19 */ 20 21 #include <wchar.h> 22 #include <stdio.h> 23 #include <limits.h> 24 #include "localedef.h" 25 26 %} 27 %union { 28 int num; 29 wchar_t wc; 30 char *token; 31 collsym_t *collsym; 32 collelem_t *collelem; 33 } 34 35 %token T_CODE_SET 36 %token T_MB_CUR_MAX 37 %token T_MB_CUR_MIN 38 %token T_COM_CHAR 39 %token T_ESC_CHAR 40 %token T_LT 41 %token T_GT 42 %token T_NL 43 %token T_SEMI 44 %token T_COMMA 45 %token T_ELLIPSIS 46 %token T_RPAREN 47 %token T_LPAREN 48 %token T_QUOTE 49 %token T_NULL 50 %token T_WS 51 %token T_END 52 %token T_COPY 53 %token T_CHARMAP 54 %token T_WIDTH 55 %token T_WIDTH_DEFAULT 56 %token T_CTYPE 57 %token T_ISUPPER 58 %token T_ISLOWER 59 %token T_ISALPHA 60 %token T_ISDIGIT 61 %token T_ISPUNCT 62 %token T_ISXDIGIT 63 %token T_ISSPACE 64 %token T_ISPRINT 65 %token T_ISGRAPH 66 %token T_ISBLANK 67 %token T_ISCNTRL 68 %token T_ISALNUM 69 %token T_ISSPECIAL 70 %token T_ISPHONOGRAM 71 %token T_ISIDEOGRAM 72 %token T_ISENGLISH 73 %token T_ISNUMBER 74 %token T_TOUPPER 75 %token T_TOLOWER 76 %token T_COLLATE 77 %token T_COLLATING_SYMBOL 78 %token T_COLLATING_ELEMENT 79 %token T_ORDER_START 80 %token T_ORDER_END 81 %token T_FORWARD 82 %token T_BACKWARD 83 %token T_POSITION 84 %token T_FROM 85 %token T_UNDEFINED 86 %token T_IGNORE 87 %token T_MESSAGES 88 %token T_YESSTR 89 %token T_NOSTR 90 %token T_YESEXPR 91 %token T_NOEXPR 92 %token T_MONETARY 93 %token T_INT_CURR_SYMBOL 94 %token T_CURRENCY_SYMBOL 95 %token T_MON_DECIMAL_POINT 96 %token T_MON_THOUSANDS_SEP 97 %token T_POSITIVE_SIGN 98 %token T_NEGATIVE_SIGN 99 %token T_MON_GROUPING 100 %token T_INT_FRAC_DIGITS 101 %token T_FRAC_DIGITS 102 %token T_P_CS_PRECEDES 103 %token T_P_SEP_BY_SPACE 104 %token T_N_CS_PRECEDES 105 %token T_N_SEP_BY_SPACE 106 %token T_P_SIGN_POSN 107 %token T_N_SIGN_POSN 108 %token T_INT_P_CS_PRECEDES 109 %token T_INT_N_CS_PRECEDES 110 %token T_INT_P_SEP_BY_SPACE 111 %token T_INT_N_SEP_BY_SPACE 112 %token T_INT_P_SIGN_POSN 113 %token T_INT_N_SIGN_POSN 114 %token T_NUMERIC 115 %token T_DECIMAL_POINT 116 %token T_THOUSANDS_SEP 117 %token T_GROUPING 118 %token T_TIME 119 %token T_ABDAY 120 %token T_DAY 121 %token T_ABMON 122 %token T_MON 123 %token T_ERA 124 %token T_ERA_D_FMT 125 %token T_ERA_T_FMT 126 %token T_ERA_D_T_FMT 127 %token T_ALT_DIGITS 128 %token T_D_T_FMT 129 %token T_D_FMT 130 %token T_T_FMT 131 %token T_AM_PM 132 %token T_T_FMT_AMPM 133 %token T_DATE_FMT 134 %token <wc> T_CHAR 135 %token <token> T_NAME 136 %token <num> T_NUMBER 137 %token <token> T_SYMBOL 138 %token <collsym> T_COLLSYM 139 %token <collelem> T_COLLELEM 140 141 %% 142 143 localedef : setting_list categories 144 | categories 145 ; 146 147 string : T_QUOTE charlist T_QUOTE 148 | T_QUOTE T_QUOTE 149 ; 150 151 charlist : charlist T_CHAR 152 { 153 add_wcs($2); 154 } 155 | T_CHAR 156 { 157 add_wcs($1); 158 } 159 ; 160 161 setting_list : setting_list setting 162 | setting 163 ; 164 165 166 setting : T_COM_CHAR T_CHAR T_NL 167 { 168 com_char = $2; 169 } 170 | T_ESC_CHAR T_CHAR T_NL 171 { 172 esc_char = $2; 173 } 174 | T_MB_CUR_MAX T_NUMBER T_NL 175 { 176 mb_cur_max = $2; 177 } 178 | T_MB_CUR_MIN T_NUMBER T_NL 179 { 180 mb_cur_min = $2; 181 } 182 | T_CODE_SET string T_NL 183 { 184 wchar_t *w = get_wcs(); 185 set_wide_encoding(to_mb_string(w)); 186 free(w); 187 } 188 | T_CODE_SET T_NAME T_NL 189 { 190 set_wide_encoding($2); 191 } 192 ; 193 194 copycat : T_COPY T_NAME T_NL 195 { 196 copy_category($2); 197 } 198 | T_COPY string T_NL 199 { 200 wchar_t *w = get_wcs(); 201 copy_category(to_mb_string(w)); 202 free(w); 203 } 204 ; 205 206 categories : categories category 207 | category 208 ; 209 210 211 category : charmap 212 | messages 213 | monetary 214 | ctype 215 | collate 216 | numeric 217 | time 218 ; 219 220 221 charmap : T_CHARMAP T_NL charmap_list T_END T_CHARMAP T_NL 222 223 224 charmap_list : charmap_list charmap_entry 225 | charmap_entry 226 ; 227 228 229 charmap_entry : T_SYMBOL T_CHAR 230 { 231 add_charmap($1, $2); 232 scan_to_eol(); 233 } 234 | T_SYMBOL T_ELLIPSIS T_SYMBOL T_CHAR 235 { 236 add_charmap_range($1, $3, $4); 237 scan_to_eol(); 238 } 239 | T_NL 240 ; 241 242 ctype : T_CTYPE T_NL ctype_list T_END T_CTYPE T_NL 243 { 244 dump_ctype(); 245 } 246 | T_CTYPE T_NL copycat T_END T_CTYPE T_NL 247 ; 248 249 ctype_list : ctype_list ctype_kw 250 | ctype_kw 251 ; 252 253 ctype_kw : T_ISUPPER cc_list T_NL 254 | T_ISLOWER cc_list T_NL 255 | T_ISALPHA cc_list T_NL 256 | T_ISDIGIT cc_list T_NL 257 | T_ISPUNCT cc_list T_NL 258 | T_ISXDIGIT cc_list T_NL 259 | T_ISSPACE cc_list T_NL 260 | T_ISPRINT cc_list T_NL 261 | T_ISGRAPH cc_list T_NL 262 | T_ISBLANK cc_list T_NL 263 | T_ISCNTRL cc_list T_NL 264 | T_ISALNUM cc_list T_NL 265 | T_ISSPECIAL cc_list T_NL 266 | T_ISENGLISH cc_list T_NL 267 | T_ISNUMBER cc_list T_NL 268 | T_ISIDEOGRAM cc_list T_NL 269 | T_ISPHONOGRAM cc_list T_NL 270 | T_TOUPPER conv_list T_NL 271 | T_TOLOWER conv_list T_NL 272 ; 273 274 275 cc_list : cc_list T_SEMI T_CHAR 276 { 277 add_ctype($3); 278 } 279 | cc_list T_SEMI T_SYMBOL 280 { 281 add_charmap_undefined($3); 282 } 283 | cc_list T_SEMI T_ELLIPSIS T_SEMI T_CHAR 284 { 285 /* note that the endpoints *must* be characters */ 286 add_ctype_range($5); 287 } 288 | T_CHAR 289 { 290 add_ctype($1); 291 } 292 | T_SYMBOL 293 { 294 add_charmap_undefined($1); 295 } 296 ; 297 298 conv_list : conv_list T_SEMI conv_pair 299 | conv_pair 300 ; 301 302 303 conv_pair : T_LPAREN T_CHAR T_COMMA T_CHAR T_RPAREN 304 { 305 add_caseconv($2, $4); 306 } 307 | T_LPAREN T_SYMBOL T_COMMA T_CHAR T_RPAREN 308 { 309 add_charmap_undefined($2); 310 } 311 | T_LPAREN T_SYMBOL T_COMMA T_SYMBOL T_RPAREN 312 { 313 add_charmap_undefined($2); 314 add_charmap_undefined($4); 315 } 316 | T_LPAREN T_CHAR T_COMMA T_SYMBOL T_RPAREN 317 { 318 add_charmap_undefined($4); 319 } 320 ; 321 322 collate : T_COLLATE T_NL coll_order T_END T_COLLATE T_NL 323 { 324 dump_collate(); 325 } 326 | T_COLLATE T_NL coll_optional coll_order T_END T_COLLATE T_NL 327 { 328 dump_collate(); 329 } 330 | T_COLLATE T_NL copycat T_END T_COLLATE T_NL 331 ; 332 333 334 coll_optional : coll_optional coll_symbols 335 | coll_optional coll_elements 336 | coll_symbols 337 | coll_elements 338 ; 339 340 341 coll_symbols : T_COLLATING_SYMBOL T_SYMBOL T_NL 342 { 343 define_collsym($2); 344 } 345 ; 346 347 348 coll_elements : T_COLLATING_ELEMENT T_SYMBOL T_FROM string T_NL 349 { 350 define_collelem($2, get_wcs()); 351 } 352 ; 353 354 coll_order : T_ORDER_START T_NL order_list T_ORDER_END T_NL 355 { 356 /* If no order list supplied default to one forward */ 357 add_order_bit(T_FORWARD); 358 add_order_directive(); 359 } 360 | T_ORDER_START order_args T_NL order_list T_ORDER_END T_NL 361 ; 362 363 364 order_args : order_args T_SEMI order_arg 365 { 366 add_order_directive(); 367 } 368 | order_arg 369 { 370 add_order_directive(); 371 } 372 ; 373 374 order_arg : order_arg T_COMMA order_dir 375 | order_dir 376 ; 377 378 order_dir : T_FORWARD 379 { 380 add_order_bit(T_FORWARD); 381 } 382 | T_BACKWARD 383 { 384 add_order_bit(T_BACKWARD); 385 } 386 | T_POSITION 387 { 388 add_order_bit(T_POSITION); 389 } 390 ; 391 392 order_list : order_list order_item 393 | order_item 394 ; 395 396 order_item : T_COLLSYM T_NL 397 { 398 end_order_collsym($1); 399 } 400 | order_itemkw T_NL 401 { 402 end_order(); 403 } 404 | order_itemkw order_weights T_NL 405 { 406 end_order(); 407 } 408 ; 409 410 order_itemkw : T_CHAR 411 { 412 start_order_char($1); 413 } 414 | T_ELLIPSIS 415 { 416 start_order_ellipsis(); 417 } 418 | T_COLLELEM 419 { 420 start_order_collelem($1); 421 } 422 | T_UNDEFINED 423 { 424 start_order_undefined(); 425 } 426 | T_SYMBOL 427 { 428 start_order_symbol($1); 429 } 430 ; 431 432 order_weights : order_weights T_SEMI order_weight 433 | order_weights T_SEMI 434 | order_weight 435 ; 436 437 order_weight : T_COLLELEM 438 { 439 add_order_collelem($1); 440 } 441 | T_COLLSYM 442 { 443 add_order_collsym($1); 444 } 445 | T_CHAR 446 { 447 add_order_char($1); 448 } 449 | T_ELLIPSIS 450 { 451 add_order_ellipsis(); 452 } 453 | T_IGNORE 454 { 455 add_order_ignore(); 456 } 457 | T_SYMBOL 458 { 459 add_order_symbol($1); 460 } 461 | T_QUOTE order_str T_QUOTE 462 { 463 add_order_subst(); 464 } 465 ; 466 467 order_str : order_str order_stritem 468 | order_stritem 469 ; 470 471 order_stritem : T_CHAR 472 { 473 add_subst_char($1); 474 } 475 | T_COLLSYM 476 { 477 add_subst_collsym($1); 478 } 479 | T_COLLELEM 480 { 481 add_subst_collelem($1); 482 } 483 | T_SYMBOL 484 { 485 add_subst_symbol($1); 486 } 487 ; 488 489 messages : T_MESSAGES T_NL messages_list T_END T_MESSAGES T_NL 490 { 491 dump_messages(); 492 } 493 | T_MESSAGES T_NL copycat T_END T_MESSAGES T_NL 494 ; 495 496 messages_list : messages_list messages_item 497 | messages_item 498 ; 499 500 messages_kw : T_YESSTR 501 | T_NOSTR 502 | T_YESEXPR 503 | T_NOEXPR 504 ; 505 506 messages_item : messages_kw string T_NL 507 { 508 add_message(get_wcs()); 509 } 510 ; 511 512 monetary : T_MONETARY T_NL monetary_list T_END T_MONETARY T_NL 513 { 514 dump_monetary(); 515 } 516 | T_MONETARY T_NL copycat T_END T_MONETARY T_NL 517 ; 518 519 monetary_list : monetary_list monetary_kw 520 | monetary_kw 521 ; 522 523 monetary_strkw : T_INT_CURR_SYMBOL 524 | T_CURRENCY_SYMBOL 525 | T_MON_DECIMAL_POINT 526 | T_MON_THOUSANDS_SEP 527 | T_POSITIVE_SIGN 528 | T_NEGATIVE_SIGN 529 ; 530 531 monetary_numkw : T_INT_FRAC_DIGITS 532 | T_FRAC_DIGITS 533 | T_P_CS_PRECEDES 534 | T_P_SEP_BY_SPACE 535 | T_N_CS_PRECEDES 536 | T_N_SEP_BY_SPACE 537 | T_P_SIGN_POSN 538 | T_N_SIGN_POSN 539 | T_INT_P_CS_PRECEDES 540 | T_INT_N_CS_PRECEDES 541 | T_INT_P_SEP_BY_SPACE 542 | T_INT_N_SEP_BY_SPACE 543 | T_INT_P_SIGN_POSN 544 | T_INT_N_SIGN_POSN 545 ; 546 547 monetary_kw : monetary_strkw string T_NL 548 { 549 add_monetary_str(get_wcs()); 550 } 551 | monetary_numkw T_NUMBER T_NL 552 { 553 add_monetary_num($2); 554 } 555 | T_MON_GROUPING mon_group_list T_NL 556 ; 557 558 mon_group_list : T_NUMBER 559 { 560 reset_monetary_group(); 561 add_monetary_group($1); 562 } 563 | mon_group_list T_SEMI T_NUMBER 564 { 565 add_monetary_group($3); 566 } 567 ; 568 569 570 numeric : T_NUMERIC T_NL numeric_list T_END T_NUMERIC T_NL 571 { 572 dump_numeric(); 573 } 574 | T_NUMERIC T_NL copycat T_END T_NUMERIC T_NL 575 ; 576 577 578 numeric_list : numeric_list numeric_item 579 | numeric_item 580 ; 581 582 583 numeric_item : numeric_strkw string T_NL 584 { 585 add_numeric_str(get_wcs()); 586 } 587 | T_GROUPING group_list T_NL 588 ; 589 590 numeric_strkw : T_DECIMAL_POINT 591 | T_THOUSANDS_SEP 592 ; 593 594 595 group_list : T_NUMBER 596 { 597 reset_numeric_group(); 598 add_numeric_group($1); 599 } 600 | group_list T_SEMI T_NUMBER 601 { 602 add_numeric_group($3); 603 } 604 ; 605 606 607 time : T_TIME T_NL time_kwlist T_END T_TIME T_NL 608 { 609 dump_time(); 610 } 611 | T_TIME T_NL copycat T_END T_NUMERIC T_NL 612 ; 613 614 time_kwlist : time_kwlist time_kw 615 | time_kw 616 ; 617 618 time_kw : time_strkw string T_NL 619 { 620 add_time_str(get_wcs()); 621 } 622 | time_listkw time_list T_NL 623 { 624 check_time_list(); 625 } 626 ; 627 628 time_listkw : T_ABDAY 629 | T_DAY 630 | T_ABMON 631 | T_MON 632 | T_ERA 633 | T_ALT_DIGITS 634 | T_AM_PM 635 ; 636 637 time_strkw : T_ERA_D_T_FMT 638 | T_ERA_T_FMT 639 | T_ERA_D_FMT 640 | T_D_T_FMT 641 | T_D_FMT 642 | T_T_FMT 643 | T_T_FMT_AMPM 644 | T_DATE_FMT 645 ; 646 647 time_list : time_list T_SEMI string 648 { 649 add_time_list(get_wcs()); 650 } 651 | string 652 { 653 reset_time_list(); 654 add_time_list(get_wcs()); 655 } 656 ; 657