1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License, Version 1.0 only 6 * (the "License"). You may not use this file except in compliance 7 * with the License. 8 * 9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10 * or http://www.opensolaris.org/os/licensing. 11 * See the License for the specific language governing permissions 12 * and limitations under the License. 13 * 14 * When distributing Covered Code, include this CDDL HEADER in each 15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16 * If applicable, add the following below this CDDL HEADER, with the 17 * fields enclosed by brackets "[]" replaced with your own identifying 18 * information: Portions Copyright [yyyy] [name of copyright owner] 19 * 20 * CDDL HEADER END 21 */ 22 /* 23 * Copyright 2005 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #include "options.h" 28 29 /* 30 * options 31 * 32 * Overview 33 * sort(1) supports two methods for specifying the sort key: the original, 34 * now-obsolete, +n -m form and the POSIX -k n,m form. We refer to the former 35 * as "old specifiers" and the latter as "new specifiers". The options() 36 * function parses the command line arguments given to sort, placing the sort 37 * key specifiers in the internal representation used in fields.c. 38 * 39 * Equivalence of specifiers 40 * One of sort(1)'s standard peculiarities is the transformation of the 41 * character offsets and field numbering between the new and old style field 42 * specifications. We simply quote from the Single Unix standard: 43 * 44 * +w.xT -y.zU 45 * 46 * is equivalent to 47 * 48 * undefined when z == 0, U contains b, and -t is set 49 * -k w+1.x+1T,y.0U when z == 0 otherwise 50 * -k w+1.x+1T,y+1.zU when z > 0 51 * 52 * Undoubtedly, this seemed logical at the time. (Using only the field head 53 * as the coordinate, as done in the obsolete version, seems much simpler.) 54 * The reverse map is where the key specifier 55 * 56 * -k w.xT,y.zU 57 * 58 * is equivalent to 59 * 60 * undefined when z == 0, U contains b, and -t is set 61 * +w-1.x-1T,y.0U when z == 0 otherwise 62 * +w-1.x-1T,y-1.z when z > 0 63 * 64 * in the obsolete syntax. Because the original key specifiers lead to a 65 * simpler implementation, the internal representation of a field in this 66 * implementation of sort is mostly that given by the obsolete syntax. 67 */ 68 69 /* 70 * While a key specifier in the obsolete +m ... -n form is being defined (that 71 * is, before the closing -n is seen), a narrower set of options is permitted. 72 * We specify this smaller set of options in OLD_SPEC_OPTIONS_STRING. 73 */ 74 #define OPTIONS_STRING "cmuo:T:z:dfiMnrbt:k:S:0123456789" 75 #define OLD_SPEC_OPTIONS_STRING "bdfiMnrcmuo:T:z:t:k:S:" 76 77 #define OPTIONS_OLDSPEC 0x1 /* else new-style spec */ 78 #define OPTIONS_STARTSPEC 0x2 /* else end spec */ 79 80 static int 81 is_number(char *C) 82 { 83 size_t i; 84 85 for (i = 0; i < strlen(C); i++) 86 if (!isdigit((uchar_t)C[i])) 87 return (0); 88 89 return (1); 90 } 91 92 /* 93 * If a field specified by the -k option or by the +n syntax contains any 94 * modifiers, then the current global field modifiers are not inherited. 95 */ 96 static int 97 field_spec_has_modifiers(char *C, int length) 98 { 99 int p_nonmodifiers = strspn(C, ",.1234567890"); 100 101 if (p_nonmodifiers == length) 102 return (0); 103 104 return (1); 105 } 106 107 static void 108 field_apply_all(field_t *fc, flag_t flags) 109 { 110 field_t *f; 111 112 for (f = fc; f; f = f->f_next) 113 if ((f->f_options & FIELD_MODIFIERS_DEFINED) == 0) 114 f->f_options |= flags; 115 } 116 117 static int 118 parse_field_spec(field_t *F, char *C, int flags, int length) 119 { 120 int p_period = MIN(length, strcspn(C, ".")); 121 int p_modifiers = MIN(length, strspn(C, ".1234567890")); 122 int p_boundary = MIN(p_period, p_modifiers); 123 int field = 0; 124 int offset = 0; 125 int offset_seen = 0; 126 int i; 127 int blanks_flag = 0; 128 129 for (i = 0; i < p_boundary; i++) { 130 if (isdigit((uchar_t)C[i])) 131 field = (10 * field) + (C[i] - '0'); 132 else 133 return (1); 134 } 135 136 if (p_period < p_modifiers) { 137 for (i = p_period + 1; i < p_modifiers; i++) { 138 if (isdigit((uchar_t)C[i])) { 139 offset_seen++; 140 offset = (10 * offset) + (C[i] - '0'); 141 } else { 142 return (1); 143 } 144 } 145 } 146 147 if (p_modifiers < length) { 148 for (i = p_modifiers; i < length; i++) { 149 switch (C[i]) { 150 case 'b': 151 blanks_flag = 1; 152 break; 153 case 'd': 154 F->f_options |= FIELD_DICTIONARY_ORDER; 155 break; 156 case 'f': 157 F->f_options |= FIELD_FOLD_UPPERCASE; 158 break; 159 case 'i': 160 F->f_options |= 161 FIELD_IGNORE_NONPRINTABLES; 162 break; 163 case 'M': 164 F->f_species = MONTH; 165 break; 166 case 'n': 167 F->f_species = NUMERIC; 168 break; 169 case 'r': 170 F->f_options |= 171 FIELD_REVERSE_COMPARISONS; 172 break; 173 default: 174 usage(); 175 break; 176 } 177 } 178 } 179 180 if (flags & OPTIONS_STARTSPEC) { 181 F->f_start_field = field; 182 F->f_start_offset = offset; 183 if ((flags & OPTIONS_OLDSPEC) != OPTIONS_OLDSPEC) { 184 F->f_start_field--; 185 if (offset_seen) 186 F->f_start_offset--; 187 } 188 F->f_options |= blanks_flag ? FIELD_IGNORE_BLANKS_START : 0; 189 } else { 190 F->f_end_field = field; 191 F->f_end_offset = offset; 192 if ((flags & OPTIONS_OLDSPEC) != OPTIONS_OLDSPEC && 193 offset_seen && offset != 0) 194 F->f_end_field--; 195 F->f_options |= blanks_flag ? FIELD_IGNORE_BLANKS_END : 0; 196 } 197 198 return (0); 199 } 200 201 static void 202 parse_new_field_spec(sort_t *S, char *arg) 203 { 204 int length = strlen(arg); 205 int p_comma = MIN(length, strcspn(arg, ",")); 206 field_t *nF; 207 int p; 208 209 /* 210 * New field specifiers do not inherit from the general specifier if 211 * they have any modifiers set. (This is specifically tested in the VSC 212 * test suite, assertion 32 for POSIX.cmd/sort.) 213 */ 214 if (field_spec_has_modifiers(arg, length)) { 215 nF = field_new(NULL); 216 nF->f_options = FIELD_MODIFIERS_DEFINED; 217 } else { 218 nF = field_new(S); 219 } 220 p = parse_field_spec(nF, arg, OPTIONS_STARTSPEC, p_comma); 221 222 if (p != 0) 223 usage(); 224 225 if (p_comma < length) { 226 p = parse_field_spec(nF, &(arg[p_comma + 1]), 0, 227 strlen(&(arg[p_comma + 1]))); 228 if (p != 0) 229 usage(); 230 } 231 232 if (nF->f_start_field < 0 || nF->f_start_offset < 0) { 233 if (S->m_verbose) 234 warn("-k %s is not a supported field specifier\n", arg); 235 } 236 nF->f_start_field = MAX(nF->f_start_field, 0); 237 nF->f_start_offset = MAX(nF->f_start_offset, 0); 238 239 /* 240 * If the starting field exceeds a defined ending field, convention 241 * dictates that the field is ignored. 242 */ 243 if (nF->f_end_field == -1 || nF->f_start_field < nF->f_end_field || 244 (nF->f_start_field == nF->f_end_field && 245 nF->f_start_offset < nF->f_end_offset)) { 246 field_add_to_chain(&(S->m_fields_head), nF); 247 } else if (S->m_verbose) { 248 warn("illegal field -k %s omitted", arg); 249 } 250 } 251 252 /* 253 * parse_old_field_spec() is getopt()-aware; it may modify the values of optind, 254 * optarg, and so forth, to correctly determine the characteristics being 255 * assigned to the current field. 256 */ 257 static int 258 parse_old_field_spec(sort_t *S, int argc, char *argv[]) 259 { 260 field_t *nF; 261 int c, p; 262 char *arg = argv[optind]; 263 264 if (field_spec_has_modifiers(arg + 1, strlen(arg + 1))) { 265 nF = field_new(NULL); 266 nF->f_options = FIELD_MODIFIERS_DEFINED; 267 } else { 268 nF = field_new(S); 269 } 270 271 p = parse_field_spec(nF, arg + 1, OPTIONS_OLDSPEC | OPTIONS_STARTSPEC, 272 strlen(arg + 1)); 273 274 if (p != 0) { 275 field_delete(nF); 276 return (0); 277 } 278 279 /* 280 * In the case that getopt() returns '?' (unrecognized option) or EOF 281 * (non-option argument), the field is considered closed. 282 */ 283 for (arg = argv[++optind]; optind < argc; arg = argv[optind]) { 284 if (strlen(arg) >= 2 && *arg == '-' && 285 isdigit(*(uchar_t *)(arg + 1))) { 286 (void) parse_field_spec(nF, arg + 1, 287 OPTIONS_OLDSPEC, strlen(arg) - 1); 288 field_add_to_chain(&(S->m_fields_head), nF); 289 optind++; 290 return (1); 291 } 292 293 if ((c = getopt(argc, argv, OLD_SPEC_OPTIONS_STRING)) != EOF) { 294 switch (c) { 295 case 'b': 296 nF->f_options |= FIELD_IGNORE_BLANKS_START; 297 break; 298 case 'd': 299 nF->f_options |= FIELD_DICTIONARY_ORDER; 300 break; 301 case 'f': 302 nF->f_options |= FIELD_FOLD_UPPERCASE; 303 break; 304 case 'i': 305 nF->f_options |= FIELD_IGNORE_NONPRINTABLES; 306 break; 307 case 'M': 308 nF->f_species = MONTH; 309 break; 310 case 'n': 311 nF->f_species = NUMERIC; 312 break; 313 case 'r': 314 nF->f_options |= FIELD_REVERSE_COMPARISONS; 315 break; 316 case '?': 317 case 'c': 318 case 'm': 319 case 'u': 320 /* 321 * Options without arguments. 322 */ 323 optind -= 1; 324 field_add_to_chain(&(S->m_fields_head), nF); 325 return (1); 326 /*NOTREACHED*/ 327 case 'o': 328 case 'T': 329 case 'z': 330 case 't': 331 case 'k': 332 case 'S': 333 /* 334 * Options with arguments. 335 */ 336 if (optarg == argv[optind - 1] + 2) { 337 optind -= 1; 338 } else { 339 optind -= 2; 340 } 341 field_add_to_chain(&(S->m_fields_head), nF); 342 return (1); 343 /*NOTREACHED*/ 344 default: 345 die(EMSG_UNKN_OPTION); 346 /*NOTREACHED*/ 347 } 348 } else { 349 break; 350 } 351 } 352 353 field_add_to_chain(&(S->m_fields_head), nF); 354 return (1); 355 } 356 357 int 358 options(sort_t *S, int argc, char *argv[]) 359 { 360 int c; 361 362 optind = 1; 363 while (optind < argc) { 364 if (strncmp("-y", argv[optind], strlen("-y")) == 0) { 365 /* 366 * The -y [kmem] option violates the standard syntax 367 * outlined in intro(1). we have to be a little fancy 368 * to determine if the next argument is a valid integer. 369 * (note, of course, that the previous sort(1) had no 370 * mechanism to resolve a final 371 * -y 99999 372 * into 373 * -y, file 99999 374 * or 375 * -y 99999, file stdin 376 * 377 * Now one can unambiguously use 378 * -y -- 99999 379 * and 380 * -y 99999 - 381 * to distinguish these cases. 382 * 383 * That said, we do not use the information passed using 384 * -y option in sort(1); we provide the argument to 385 * preserve compatibility for existing scripts. 386 */ 387 if (strlen(argv[optind]) == strlen("-y") && 388 optind + 1 < argc && 389 is_number(argv[optind + 1])) 390 optind += 2; 391 else 392 optind += 1; 393 } 394 395 if ((c = getopt(argc, argv, OPTIONS_STRING)) != EOF) { 396 switch (c) { 397 case 'c': 398 S->m_check_if_sorted_only = 1; 399 break; 400 401 case 'm': 402 S->m_merge_only = 1; 403 break; 404 405 case 'u': 406 S->m_unique_lines = 1; 407 break; 408 409 case 'o': 410 S->m_output_filename = optarg; 411 break; 412 413 case 'T': 414 S->m_tmpdir_template = optarg; 415 break; 416 417 case 'z': 418 /* 419 * ignore optarg -- obsolete 420 */ 421 break; 422 423 case 'd': 424 S->m_field_options |= FIELD_DICTIONARY_ORDER; 425 field_apply_all(S->m_fields_head, 426 FIELD_DICTIONARY_ORDER); 427 break; 428 429 case 'f': 430 S->m_field_options |= FIELD_FOLD_UPPERCASE; 431 field_apply_all(S->m_fields_head, 432 FIELD_FOLD_UPPERCASE); 433 break; 434 435 case 'i': 436 S->m_field_options |= 437 FIELD_IGNORE_NONPRINTABLES; 438 field_apply_all(S->m_fields_head, 439 FIELD_IGNORE_NONPRINTABLES); 440 break; 441 442 case 'M': 443 S->m_default_species = MONTH; 444 S->m_field_options &= 445 ~FIELD_IGNORE_BLANKS_START; 446 break; 447 448 case 'n': 449 S->m_default_species = NUMERIC; 450 { 451 field_t *f; 452 453 for (f = S->m_fields_head; f; 454 f = f->f_next) 455 if ((f->f_options & 456 FIELD_MODIFIERS_DEFINED) == 457 0) 458 f->f_species = NUMERIC; 459 } 460 break; 461 462 case 'b': 463 S->m_field_options |= 464 FIELD_IGNORE_BLANKS_START | 465 FIELD_IGNORE_BLANKS_END; 466 break; 467 468 case 'r': 469 S->m_field_options |= 470 FIELD_REVERSE_COMPARISONS; 471 field_apply_all(S->m_fields_head, 472 FIELD_REVERSE_COMPARISONS); 473 break; 474 475 case 't': 476 /* 477 * delimiter 478 */ 479 if (S->m_single_byte_locale) { 480 /* 481 * Most debuggers can't take tabs as 482 * input arguments, so we provide an 483 * escape sequence to allow testing of 484 * this special case for the DEBUG 485 * version. 486 */ 487 S->m_field_separator.sc = 488 #ifdef DEBUG 489 xstreql(optarg, "\\t") ? '\t' : 490 #endif 491 optarg[0]; 492 } else 493 (void) mbtowc(&S->m_field_separator.wc, 494 optarg, MB_CUR_MAX); 495 break; 496 497 case 'k': 498 /* 499 * key 500 */ 501 (void) parse_new_field_spec(S, optarg); 502 break; 503 504 case 'S': 505 S->m_memory_limit = strtomem(optarg); 506 #ifdef DEBUG 507 (void) fprintf(stderr, CMDNAME 508 ": limiting size to %d bytes\n", 509 S->m_memory_limit); 510 #endif /* DEBUG */ 511 break; 512 513 /* 514 * We never take a naked -999; these should always be 515 * associated with a preceding +000. 516 */ 517 case '0': 518 case '1': 519 case '2': 520 case '3': 521 case '4': 522 case '5': 523 case '6': 524 case '7': 525 case '8': 526 case '9': 527 usage(); 528 break; 529 case '?': 530 /* error case */ 531 usage(); 532 break; 533 } 534 535 /* 536 * Go back for next argument. 537 */ 538 continue; 539 } 540 541 /* 542 * There are three (interpretable) possibilities for getopt() to 543 * return EOF with arguments on the command line: we have seen 544 * the "end-of-options" token, --, we have encountered the 545 * old-style field definition, +NNN, or we have found a 546 * filename. 547 * 548 * In the second case, we must also search for the optional -NNN 549 * field terminal definition. (since "+joe", for instance, is 550 * a valid filename, we must handle this pattern as well.) This 551 * is performed by parse_old_field_spec(). 552 */ 553 if (xstreql(argv[optind - 1], "--")) { 554 /* 555 * Process all arguments following end-of-options token 556 * as filenames. 557 */ 558 while (optind < argc) { 559 if (xstreql(argv[optind], "-")) 560 S->m_input_from_stdin = 1; 561 else 562 stream_add_file_to_chain( 563 &(S->m_input_streams), 564 argv[optind]); 565 optind++; 566 } 567 568 break; 569 } 570 571 if (optind < argc) { 572 if (xstreql(argv[optind], "-")) { 573 S->m_input_from_stdin = 1; 574 optind++; 575 } else if (*(argv[optind]) != '+' || 576 !parse_old_field_spec(S, argc, argv)) { 577 /* 578 * It's a filename, because it either doesn't 579 * start with '+', or if it did, it wasn't an 580 * actual field specifier. 581 */ 582 stream_add_file_to_chain(&(S->m_input_streams), 583 argv[optind]); 584 optind++; 585 } 586 } 587 } 588 589 if (S->m_input_streams == NULL) 590 S->m_input_from_stdin = 1; 591 592 if (S->m_output_filename == NULL) 593 S->m_output_to_stdout = 1; 594 595 /* 596 * If no fields, then one great field. However, if the -b option was 597 * set globally, be sure to ignore it, as per UNIX98. 598 */ 599 if (S->m_fields_head == NULL) { 600 S->m_field_options &= ~FIELD_IGNORE_BLANKS_START; 601 602 (void) parse_new_field_spec(S, "1"); 603 /* 604 * "Entire line" fast path is only valid if no delimiter has 605 * been set and no modifiers have been applied. 606 */ 607 if (S->m_field_separator.wc == 0 && 608 S->m_default_species == ALPHA && 609 S->m_field_options == 0) 610 S->m_entire_line = 1; 611 } 612 613 return (0); 614 } 615