1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License, Version 1.0 only 6 * (the "License"). You may not use this file except in compliance 7 * with the License. 8 * 9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10 * or http://www.opensolaris.org/os/licensing. 11 * See the License for the specific language governing permissions 12 * and limitations under the License. 13 * 14 * When distributing Covered Code, include this CDDL HEADER in each 15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16 * If applicable, add the following below this CDDL HEADER, with the 17 * fields enclosed by brackets "[]" replaced with your own identifying 18 * information: Portions Copyright [yyyy] [name of copyright owner] 19 * 20 * CDDL HEADER END 21 */ 22 /* 23 * Copyright 2005 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #pragma ident "%Z%%M% %I% %E% SMI" 28 29 #include "utility.h" 30 31 #include "initialize.h" 32 #include "statistics.h" 33 #include "streams_common.h" 34 #include "streams.h" 35 36 /* 37 * utility 38 * 39 * Overview 40 * utility.c contains the general purpose routines used in various locations 41 * throughout sort. It provides a number of interfaces that maintain local 42 * state relevant to this instance of sort. We discuss the more significant 43 * of these interfaces below. 44 * 45 * Output guard 46 * sort is one of the few Unix utilities that is capable of working "in 47 * place"; that is, sort can manipulate an input file and place its output in 48 * a file of the same name safely. This is handled in this implementation by 49 * the output guard facility. In the case of an interrupt or other fatal 50 * signal, sort essays to restore the original input file. 51 * 52 * Temporary file cleanup 53 * Similar to the output guard facility, sort cleans up its temporary files in 54 * the case of interruption (or normal exit, for that matter); this is handled 55 * by registering a list of file pointers for later use by the atexit handler. 56 * 57 * Temporary filename security 58 * sort protects against "open-through-link" security attacks by verifying 59 * that the selected temporary file name is unused. If the file name is in 60 * use, the pattern is readjusted until an available name pattern is 61 * discovered. 62 * 63 * Buffered I/O 64 * sort has a simple buffered I/O facility of its own, to facilitate writing 65 * data in large quantities (particularly for multibyte locales). cxwrite() 66 * is the base routine, while wxwrite(), which handles multibyte buffers, is 67 * built on top of cxwrite(). 68 */ 69 70 #define XBUFFER_SIZE (32 * KILOBYTE) 71 72 #define EXIT_OK 0 73 #define EXIT_FAILURE 1 74 #define EXIT_ERROR 2 75 #define EXIT_INTERNAL 3 76 77 static int held_fd = -1; 78 79 static stream_t **cleanup_chain = NULL; 80 81 static char *output_guard_tempname = NULL; 82 static ssize_t output_guard_size = 0; 83 static char *output_guard_filename = NULL; 84 static int output_guard_copy_complete = 0; 85 86 static const char *default_tmpdir = "/var/tmp"; 87 static const char *default_template = "/stmAAAXXXXXX"; 88 static const char *default_template_count = ".00000000"; 89 static char *current_tmpdir; 90 static char *current_template; 91 92 static const char PNAME_FMT[] = "%s: "; 93 static const char ERRNO_FMT[] = ": %s\n"; 94 static const char *pname = "sort"; 95 96 void 97 swap(void **a, void **b) 98 { 99 void *t; 100 101 t = *a; 102 *a = *b; 103 *b = t; 104 105 __S(stats_incr_swaps()); 106 } 107 108 /* 109 * Temporary file name template handling. 110 */ 111 static void 112 reset_file_template() 113 { 114 struct stat s; 115 116 do { 117 (void) strcpy(current_template, current_tmpdir); 118 (void) strcat(current_template, default_template); 119 (void) mktemp(current_template); 120 (void) strcat(current_template, default_template_count); 121 } while (lstat(current_template, &s) != -1); 122 } 123 124 int 125 bump_file_template() 126 { 127 struct stat s; 128 int n = strlen(current_template); 129 int i; 130 131 for (i = n - 1; isdigit((uchar_t)current_template[i]); i--) { 132 current_template[i]++; 133 if (current_template[i] > '9') 134 current_template[i] = '0'; 135 else 136 break; 137 } 138 139 if (!isdigit((uchar_t)current_template[i])) { 140 /* 141 * Template has been exhausted, so reset. 142 */ 143 reset_file_template(); 144 } 145 146 if (lstat(current_template, &s) == 0) { 147 /* 148 * Our newly bumped template has been anticipated; reset to 149 * avoid possible "link-through" attack. 150 */ 151 reset_file_template(); 152 } 153 154 return (0); 155 } 156 157 void 158 set_file_template(char **T) 159 { 160 struct stat s; 161 int check_tmpdir = 0; 162 163 if (*T != NULL) { 164 current_tmpdir = strdup(*T); 165 check_tmpdir = 1; 166 } else if ((current_tmpdir = getenv("TMPDIR")) != NULL) { 167 check_tmpdir = 1; 168 } else { 169 current_tmpdir = (char *)default_tmpdir; 170 } 171 172 /* 173 * Check that the temporary directory given exists, and is a directory. 174 */ 175 if (check_tmpdir) { 176 if (stat(current_tmpdir, &s) != 0) { 177 warn(gettext("cannot stat temporary directory %s"), 178 current_tmpdir); 179 180 current_tmpdir = (char *)default_tmpdir; 181 } else if (!S_ISDIR(s.st_mode)) { 182 warn(gettext("%s is not a directory; " 183 "using default temporary directory"), 184 current_tmpdir); 185 186 current_tmpdir = (char *)default_tmpdir; 187 } 188 } 189 190 ASSERT(current_tmpdir != NULL); 191 192 current_template = safe_realloc(NULL, strlen(current_tmpdir) 193 + strlen(default_template) + strlen(default_template_count) + 1); 194 195 reset_file_template(); 196 } 197 198 char * 199 get_file_template() 200 { 201 return (current_template); 202 } 203 204 /* 205 * Output guard routines. 206 */ 207 void 208 establish_output_guard(sort_t *S) 209 { 210 struct stat output_stat; 211 212 if (S->m_output_to_stdout) 213 return; 214 215 if (stat(S->m_output_filename, &output_stat) == 0) { 216 stream_t *strp = S->m_input_streams; 217 218 while (strp != NULL) { 219 /* 220 * We needn't protect an empty file. 221 */ 222 if (!(strp->s_status & STREAM_NOTFILE) && 223 strp->s_dev == output_stat.st_dev && 224 strp->s_ino == output_stat.st_ino && 225 strp->s_filesize > 0) { 226 output_guard_filename = S->m_output_filename; 227 output_guard_size = strp->s_filesize; 228 229 ASSERT(output_guard_filename != NULL); 230 231 if (bump_file_template() < 0) 232 die(EMSG_TEMPORARY); 233 234 if ((strp->s_filename = output_guard_tempname = 235 strdup(get_file_template())) == NULL) 236 die(EMSG_ALLOC); 237 238 xcp(output_guard_tempname, 239 output_guard_filename, output_guard_size); 240 241 output_guard_copy_complete = 1; 242 243 return; 244 } 245 strp = strp->s_next; 246 } 247 } 248 } 249 250 void 251 remove_output_guard() 252 { 253 if (output_guard_tempname && unlink(output_guard_tempname) == -1) 254 warn(gettext("unable to unlink %s"), output_guard_tempname); 255 256 output_guard_tempname = NULL; 257 } 258 259 void 260 set_cleanup_chain(stream_t **strp) 261 { 262 ASSERT(strp != NULL); 263 264 cleanup_chain = strp; 265 } 266 267 /* 268 * atexit_handler() cleans up any temporary files outstanding after a fatal 269 * signal, a call to die() or at exit(). To preserve the input file under low 270 * storage conditions (and both the output file and the temporary files are 271 * directed at the same filesystem), we remove all temporary files but the 272 * output guard first, and then restore the original file. Of course, this is 273 * not foolproof, as another writer may have exhausted storage. 274 */ 275 void 276 atexit_handler() 277 { 278 stream_t *strp; 279 280 if (cleanup_chain && *cleanup_chain) 281 for (strp = *cleanup_chain; strp != NULL; strp = strp->s_next) 282 stream_unlink_temporary(strp); 283 284 if (output_guard_tempname) { 285 if (output_guard_copy_complete) 286 xcp(output_guard_filename, output_guard_tempname, 287 output_guard_size); 288 289 remove_output_guard(); 290 } 291 292 __S(stats_display()); 293 } 294 295 size_t 296 strtomem(char *S) 297 { 298 const char *format_str = "%lf%c"; 299 double val = 0.0; 300 size_t retval; 301 char units = 'k'; 302 size_t phys_total = sysconf(_SC_PHYS_PAGES) * sysconf(_SC_PAGESIZE); 303 304 if (sscanf(S, format_str, &val, &units) < 1 || val < 0) 305 return (0); 306 307 if (units == '%') { 308 if (val < 0 || val > 100) 309 return (0); 310 val *= phys_total / 100; 311 } else 312 switch (units) { 313 case 't' : /* terabytes */ 314 case 'T' : 315 val *= 1024; 316 /*FALLTHROUGH*/ 317 case 'g' : /* gigabytes */ 318 case 'G' : 319 val *= 1024; 320 /*FALLTHROUGH*/ 321 case 'm' : /* megabytes */ 322 case 'M' : 323 val *= 1024; 324 /*FALLTHROUGH*/ 325 case 'k' : /* kilobytes */ 326 case 'K' : 327 val *= 1024; 328 /*FALLTHROUGH*/ 329 case 'b' : /* bytes */ 330 case 'B' : 331 break; 332 default : 333 /* 334 * default is kilobytes 335 */ 336 val *= 1024; 337 break; 338 } 339 340 if (val > SIZE_MAX) 341 return (0); 342 343 retval = (size_t)val; 344 345 return (retval); 346 } 347 348 size_t 349 available_memory(size_t mem_limit) 350 { 351 size_t phys_avail = sysconf(_SC_AVPHYS_PAGES) * sysconf(_SC_PAGESIZE); 352 size_t avail; 353 354 if (mem_limit != 0) { 355 #ifdef DEBUG 356 /* 357 * In the debug case, we want to test the temporary files 358 * handling, so no lower bound on the memory limit is imposed. 359 */ 360 avail = mem_limit; 361 #else 362 avail = MAX(64 * KILOBYTE, mem_limit); 363 #endif /* DEBUG */ 364 } else { 365 avail = MAX(64 * KILOBYTE, MIN(AV_MEM_MULTIPLIER * phys_avail / 366 AV_MEM_DIVISOR, 16 * MEGABYTE)); 367 } 368 369 __S(stats_set_available_memory(avail)); 370 371 return (avail); 372 } 373 374 void 375 set_memory_ratio(sort_t *S, int *numerator, int *denominator) 376 { 377 if (S->m_c_locale) { 378 *numerator = CHAR_AVG_LINE; 379 *denominator = sizeof (line_rec_t) + sizeof (line_rec_t *) + 380 CHAR_AVG_LINE + CHAR_AVG_LINE; 381 return; 382 } 383 384 if (S->m_single_byte_locale) { 385 *numerator = CHAR_AVG_LINE; 386 *denominator = sizeof (line_rec_t) + sizeof (line_rec_t *) + 387 CHAR_AVG_LINE + XFRM_MULTIPLIER * CHAR_AVG_LINE; 388 return; 389 } 390 391 *numerator = WCHAR_AVG_LINE; 392 *denominator = sizeof (line_rec_t) + sizeof (line_rec_t *) + 393 WCHAR_AVG_LINE + WCHAR_AVG_LINE; 394 } 395 396 void * 397 safe_realloc(void *ptr, size_t sz) 398 { 399 /* 400 * safe_realloc() is not meant as an alternative free() mechanism--we 401 * disallow reallocations to size zero. 402 */ 403 ASSERT(sz != 0); 404 405 if ((ptr = realloc(ptr, sz)) != NULL) 406 return (ptr); 407 408 die(gettext("unable to reallocate buffer")); 409 /*NOTREACHED*/ 410 return (NULL); /* keep gcc happy */ 411 } 412 413 void 414 safe_free(void *ptr) 415 { 416 if (ptr) 417 free(ptr); 418 } 419 420 void * 421 xzmap(void *addr, size_t len, int prot, int flags, off_t off) 422 { 423 void *pa; 424 425 pa = mmap(addr, len, prot, flags | MAP_ANON, -1, off); 426 if (pa == MAP_FAILED) 427 die(gettext("can't mmap anonymous memory")); 428 429 return (pa); 430 } 431 432 void 433 usage() 434 { 435 (void) fprintf(stderr, 436 gettext("usage: %s [-cmu] [-o output] [-T directory] [-S mem]" 437 " [-z recsz]\n\t[-dfiMnr] [-b] [-t char] [-k keydef]" 438 " [+pos1 [-pos2]] files...\n"), CMDNAME); 439 exit(E_USAGE); 440 } 441 442 /* 443 * hold_file_descriptor() and release_file_descriptor() reserve a single file 444 * descriptor entry for later use. We issue the hold prior to any loop that has 445 * an exit condition based on the receipt of EMFILE from an open() call; once we 446 * have exited, we can release, typically prior to opening a file for output. 447 */ 448 void 449 hold_file_descriptor() 450 { 451 ASSERT(held_fd == -1); 452 453 if ((held_fd = open("/dev/null", O_RDONLY)) == -1) 454 die(gettext("insufficient available file descriptors\n")); 455 } 456 457 void 458 release_file_descriptor() 459 { 460 ASSERT(held_fd != -1); 461 462 (void) close(held_fd); 463 held_fd = -1; 464 } 465 466 void 467 copy_line_rec(const line_rec_t *a, line_rec_t *b) 468 { 469 (void) memcpy(b, a, sizeof (line_rec_t)); 470 } 471 472 void 473 trip_eof(FILE *f) 474 { 475 if (feof(f)) 476 return; 477 478 (void) ungetc(fgetc(f), f); 479 } 480 481 /* 482 * int cxwrite(int, char *, size_t) 483 * 484 * Overview 485 * cxwrite() implements a buffered version of fwrite(ptr, nbytes, 1, .) on 486 * file descriptors. It returns -1 in the case that the write() fails to 487 * write the current buffer contents. cxwrite() must be flushed before being 488 * applied to a new file descriptor. 489 * 490 * Return values 491 * 0 on success, -1 on error. 492 */ 493 int 494 cxwrite(int fd, char *ptr, size_t nbytes) 495 { 496 static char buffer[XBUFFER_SIZE]; 497 static size_t offset = 0; 498 size_t mbytes; 499 500 if (ptr == NULL) { 501 errno = 0; 502 while (offset -= write(fd, buffer, offset)) { 503 if (errno) 504 break; 505 } 506 507 if (offset) 508 return (-1); 509 510 return (0); 511 } 512 513 while (nbytes != 0) { 514 if (offset + nbytes > XBUFFER_SIZE) 515 mbytes = XBUFFER_SIZE - offset; 516 else 517 mbytes = nbytes; 518 519 (void) memcpy(buffer + offset, ptr, mbytes); 520 nbytes -= mbytes; 521 offset += mbytes; 522 ptr += mbytes; 523 524 if (nbytes) { 525 errno = 0; 526 while (offset -= write(fd, buffer, offset)) { 527 if (errno) 528 break; 529 } 530 531 if (offset) 532 return (-1); 533 } 534 } 535 536 return (0); 537 } 538 539 /* 540 * int wxwrite(int, wchar_t *) 541 * 542 * Overview 543 * wxwrite() implements a buffered write() function for null-terminated wide 544 * character buffers with similar calling semantics to cxwrite(). It returns 545 * -1 in the case that it fails to write the current buffer contents. 546 * wxwrite() must be flushed before being applied to a new file descriptor. 547 * 548 * Return values 549 * 0 on success, -1 on error. 550 */ 551 int 552 wxwrite(int fd, wchar_t *ptr) 553 { 554 static char *convert_buffer; 555 static size_t convert_bufsize = 1024; 556 size_t req_bufsize; 557 558 if (ptr == NULL) 559 return (cxwrite(NULL, 0, 1)); 560 561 if (convert_buffer == NULL) 562 convert_buffer = safe_realloc(NULL, convert_bufsize); 563 /* 564 * We use wcstombs(NULL, ., .) to verify that we have an adequate 565 * buffer size for the conversion. Since this buffer was converted into 566 * wide character format earlier, we can safely assume that the buffer 567 * can be converted back to the external multibyte form. 568 */ 569 req_bufsize = wcstombs(NULL, ptr, convert_bufsize); 570 if (req_bufsize > convert_bufsize) { 571 convert_bufsize = req_bufsize + 1; 572 convert_buffer = safe_realloc(convert_buffer, convert_bufsize); 573 } 574 575 (void) wcstombs(convert_buffer, ptr, convert_bufsize); 576 577 return (cxwrite(fd, convert_buffer, req_bufsize)); 578 } 579 580 int 581 xstreql(const char *a, const char *b) 582 { 583 return (strcmp(a, b) == 0); 584 } 585 586 int 587 xstrneql(const char *a, const char *b, const size_t l) 588 { 589 return (strncmp(a, b, l) == 0); 590 } 591 592 char * 593 xstrnchr(const char *S, const int c, const size_t n) 594 { 595 const char *eS = S + n; 596 597 do { 598 if (*S == (char)c) 599 return ((char *)S); 600 } while (++S < eS); 601 602 return (NULL); 603 } 604 605 void 606 xstrninv(char *s, ssize_t start, ssize_t length) 607 { 608 ssize_t i; 609 610 for (i = start; i < start + length; i++) 611 s[i] = UCHAR_MAX - s[i]; 612 } 613 614 int 615 xwcsneql(const wchar_t *a, const wchar_t *b, const size_t length) 616 { 617 return (wcsncmp(a, b, length) == 0); 618 } 619 620 wchar_t * 621 xwsnchr(const wchar_t *ws, const wint_t wc, const size_t n) 622 { 623 const wchar_t *ews = ws + n; 624 625 do { 626 if (*ws == (wchar_t)wc) 627 return ((wchar_t *)ws); 628 } while (++ws < ews); 629 630 return (NULL); 631 } 632 633 void 634 xwcsninv(wchar_t *s, ssize_t start, ssize_t length) 635 { 636 ssize_t i; 637 638 for (i = start; i < start + length; i++) 639 s[i] = WCHAR_MAX - s[i]; 640 } 641 642 #ifdef _LITTLE_ENDIAN 643 void 644 xwcsntomsb(wchar_t *s, ssize_t length) 645 { 646 ssize_t i; 647 648 ASSERT(sizeof (wchar_t) == sizeof (uint32_t)); 649 650 for (i = 0; i < length; i++, s++) { 651 char *t = (char *)s; 652 char u; 653 654 u = *t; 655 *t = *(t + 3); 656 *(t + 3) = u; 657 658 u = *(t + 1); 659 *(t + 1) = *(t + 2); 660 *(t + 2) = u; 661 } 662 } 663 #endif /* _LITTLE_ENDIAN */ 664 665 wchar_t * 666 xmemwchar(wchar_t *s, wchar_t w, ssize_t length) 667 { 668 ssize_t i = length; 669 670 while (--i > 0) { 671 if (*s == w) 672 return (s); 673 s++; 674 } 675 676 return (NULL); 677 } 678 679 void 680 xcp(char *dst, char *src, off_t size) 681 { 682 int fd_in, fd_out; 683 void *mm_in; 684 size_t chunksize = 2 * MEGABYTE; 685 int i; 686 ssize_t nchunks = size / chunksize; 687 ssize_t lastchunk = size % chunksize; 688 689 if (dst == NULL || src == NULL) 690 return; 691 692 if ((fd_in = open(src, O_RDONLY)) < 0) 693 die(EMSG_OPEN, src); 694 if ((fd_out = open(dst, O_RDWR | O_CREAT | O_TRUNC, OUTPUT_MODE)) < 0) 695 die(EMSG_OPEN, dst); 696 697 for (i = 0; i < nchunks; i++) { 698 if ((mm_in = mmap(0, chunksize, PROT_READ, MAP_SHARED, fd_in, 699 i * chunksize)) == MAP_FAILED) 700 die(EMSG_MMAP, src); 701 702 if (write(fd_out, mm_in, chunksize) != chunksize) 703 die(EMSG_WRITE, dst); 704 705 (void) munmap(mm_in, chunksize); 706 } 707 708 if (lastchunk) { 709 if ((mm_in = mmap(0, lastchunk, PROT_READ, MAP_SHARED, fd_in, 710 nchunks * chunksize)) == MAP_FAILED) 711 die(EMSG_MMAP, src); 712 713 if (write(fd_out, mm_in, lastchunk) != lastchunk) 714 die(EMSG_WRITE, dst); 715 716 (void) munmap(mm_in, lastchunk); 717 } 718 719 (void) close(fd_in); 720 721 if (close(fd_out) == -1) 722 die(EMSG_CLOSE, dst); 723 } 724 725 /*PRINTFLIKE1*/ 726 void 727 warn(const char *format, ...) 728 { 729 int err = errno; 730 va_list alist; 731 732 if (pname != NULL) 733 (void) fprintf(stderr, gettext(PNAME_FMT), pname); 734 735 va_start(alist, format); 736 (void) vfprintf(stderr, format, alist); 737 va_end(alist); 738 739 if (strrchr(format, '\n') == NULL) 740 (void) fprintf(stderr, gettext(ERRNO_FMT), strerror(err)); 741 } 742 743 /*PRINTFLIKE1*/ 744 void 745 die(const char *format, ...) 746 { 747 int err = errno; 748 va_list alist; 749 750 if (pname != NULL) 751 (void) fprintf(stderr, gettext(PNAME_FMT), pname); 752 753 va_start(alist, format); 754 (void) vfprintf(stderr, format, alist); 755 va_end(alist); 756 757 if (strrchr(format, '\n') == NULL) 758 (void) fprintf(stderr, gettext(ERRNO_FMT), strerror(err)); 759 760 exit(E_ERROR); 761 } 762 763 #ifdef DEBUG 764 /* 765 * pprintc() is called only by xdump(). 766 */ 767 #define BYTES_PER_LINE 16 768 static void 769 pprintc(FILE *fp, char c) 770 { 771 if (isspace((uchar_t)c)) 772 (void) fprintf(fp, " "); 773 else if (isprint((uchar_t)c)) 774 (void) fprintf(fp, "%c", c); 775 else 776 (void) fprintf(fp, "."); 777 } 778 779 static void 780 pprintwc(FILE *fp, wchar_t c) 781 { 782 if (iswspace(c)) 783 (void) fprintf(fp, " "); 784 else if (iswprint(c)) 785 (void) fprintf(fp, "%wc", c); 786 else 787 (void) fprintf(fp, "."); 788 } 789 790 /* 791 * xdump() is used only for debugging purposes. 792 */ 793 void 794 xdump(FILE *fp, uchar_t *buf, size_t bufsize, int wide) 795 { 796 int i; 797 size_t nc = 0; 798 uchar_t d[BYTES_PER_LINE]; 799 800 for (; nc < bufsize; buf++) { 801 d[nc % BYTES_PER_LINE] = *buf; 802 if (nc % BYTES_PER_LINE == 0) { 803 (void) fprintf(fp, "%08x:", nc); 804 } 805 (void) fprintf(fp, " %02x", *buf); 806 nc++; 807 if (nc % BYTES_PER_LINE == 0) { 808 (void) fprintf(fp, " "); 809 if (wide) { 810 for (i = 0; i < BYTES_PER_LINE; 811 i += sizeof (wchar_t)) 812 pprintwc(fp, *(wchar_t *)(d + i)); 813 } else { 814 for (i = 0; i < BYTES_PER_LINE; i++) 815 pprintc(fp, d[i]); 816 } 817 (void) fprintf(fp, "\n"); 818 } 819 } 820 821 for (i = nc % BYTES_PER_LINE; i < BYTES_PER_LINE; i++) 822 (void) fprintf(fp, " "); 823 824 (void) fprintf(fp, " "); 825 826 if (wide) { 827 for (i = 0; i < nc % BYTES_PER_LINE; i += sizeof (wchar_t)) 828 pprintwc(fp, *(wchar_t *)(d + i)); 829 } else { 830 for (i = 0; i < nc % BYTES_PER_LINE; i++) 831 pprintc(fp, d[i]); 832 } 833 834 (void) fprintf(fp, "\n"); 835 } 836 #endif /* DEBUG */ 837