1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License, Version 1.0 only 6 * (the "License"). You may not use this file except in compliance 7 * with the License. 8 * 9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10 * or http://www.opensolaris.org/os/licensing. 11 * See the License for the specific language governing permissions 12 * and limitations under the License. 13 * 14 * When distributing Covered Code, include this CDDL HEADER in each 15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16 * If applicable, add the following below this CDDL HEADER, with the 17 * fields enclosed by brackets "[]" replaced with your own identifying 18 * information: Portions Copyright [yyyy] [name of copyright owner] 19 * 20 * CDDL HEADER END 21 */ 22 /* 23 * Copyright 2005 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #include "utility.h" 28 29 #include "initialize.h" 30 #include "statistics.h" 31 #include "streams_common.h" 32 #include "streams.h" 33 34 /* 35 * utility 36 * 37 * Overview 38 * utility.c contains the general purpose routines used in various locations 39 * throughout sort. It provides a number of interfaces that maintain local 40 * state relevant to this instance of sort. We discuss the more significant 41 * of these interfaces below. 42 * 43 * Output guard 44 * sort is one of the few Unix utilities that is capable of working "in 45 * place"; that is, sort can manipulate an input file and place its output in 46 * a file of the same name safely. This is handled in this implementation by 47 * the output guard facility. In the case of an interrupt or other fatal 48 * signal, sort essays to restore the original input file. 49 * 50 * Temporary file cleanup 51 * Similar to the output guard facility, sort cleans up its temporary files in 52 * the case of interruption (or normal exit, for that matter); this is handled 53 * by registering a list of file pointers for later use by the atexit handler. 54 * 55 * Temporary filename security 56 * sort protects against "open-through-link" security attacks by verifying 57 * that the selected temporary file name is unused. If the file name is in 58 * use, the pattern is readjusted until an available name pattern is 59 * discovered. 60 * 61 * Buffered I/O 62 * sort has a simple buffered I/O facility of its own, to facilitate writing 63 * data in large quantities (particularly for multibyte locales). cxwrite() 64 * is the base routine, while wxwrite(), which handles multibyte buffers, is 65 * built on top of cxwrite(). 66 */ 67 68 #define XBUFFER_SIZE (32 * KILOBYTE) 69 70 #define EXIT_OK 0 71 #define EXIT_FAILURE 1 72 #define EXIT_ERROR 2 73 #define EXIT_INTERNAL 3 74 75 static int held_fd = -1; 76 77 static stream_t **cleanup_chain = NULL; 78 79 static char *output_guard_tempname = NULL; 80 static ssize_t output_guard_size = 0; 81 static char *output_guard_filename = NULL; 82 static int output_guard_copy_complete = 0; 83 84 static const char *default_tmpdir = "/var/tmp"; 85 static const char *default_template = "/stmAAAXXXXXX"; 86 static const char *default_template_count = ".00000000"; 87 static char *current_tmpdir; 88 static char *current_template; 89 90 static const char PNAME_FMT[] = "%s: "; 91 static const char ERRNO_FMT[] = ": %s\n"; 92 static const char *pname = "sort"; 93 94 void 95 swap(void **a, void **b) 96 { 97 void *t; 98 99 t = *a; 100 *a = *b; 101 *b = t; 102 103 __S(stats_incr_swaps()); 104 } 105 106 /* 107 * Temporary file name template handling. 108 */ 109 static void 110 reset_file_template() 111 { 112 struct stat s; 113 114 do { 115 (void) strcpy(current_template, current_tmpdir); 116 (void) strcat(current_template, default_template); 117 (void) mktemp(current_template); 118 (void) strcat(current_template, default_template_count); 119 } while (lstat(current_template, &s) != -1); 120 } 121 122 int 123 bump_file_template() 124 { 125 struct stat s; 126 int n = strlen(current_template); 127 int i; 128 129 for (i = n - 1; isdigit((uchar_t)current_template[i]); i--) { 130 current_template[i]++; 131 if (current_template[i] > '9') 132 current_template[i] = '0'; 133 else 134 break; 135 } 136 137 if (!isdigit((uchar_t)current_template[i])) { 138 /* 139 * Template has been exhausted, so reset. 140 */ 141 reset_file_template(); 142 } 143 144 if (lstat(current_template, &s) == 0) { 145 /* 146 * Our newly bumped template has been anticipated; reset to 147 * avoid possible "link-through" attack. 148 */ 149 reset_file_template(); 150 } 151 152 return (0); 153 } 154 155 void 156 set_file_template(char **T) 157 { 158 struct stat s; 159 int check_tmpdir = 0; 160 161 if (*T != NULL) { 162 current_tmpdir = strdup(*T); 163 check_tmpdir = 1; 164 } else if ((current_tmpdir = getenv("TMPDIR")) != NULL) { 165 check_tmpdir = 1; 166 } else { 167 current_tmpdir = (char *)default_tmpdir; 168 } 169 170 /* 171 * Check that the temporary directory given exists, and is a directory. 172 */ 173 if (check_tmpdir) { 174 if (stat(current_tmpdir, &s) != 0) { 175 warn(gettext("cannot stat temporary directory %s"), 176 current_tmpdir); 177 178 current_tmpdir = (char *)default_tmpdir; 179 } else if (!S_ISDIR(s.st_mode)) { 180 warn(gettext("%s is not a directory; " 181 "using default temporary directory"), 182 current_tmpdir); 183 184 current_tmpdir = (char *)default_tmpdir; 185 } 186 } 187 188 ASSERT(current_tmpdir != NULL); 189 190 current_template = safe_realloc(NULL, strlen(current_tmpdir) 191 + strlen(default_template) + strlen(default_template_count) + 1); 192 193 reset_file_template(); 194 } 195 196 char * 197 get_file_template() 198 { 199 return (current_template); 200 } 201 202 /* 203 * Output guard routines. 204 */ 205 void 206 establish_output_guard(sort_t *S) 207 { 208 struct stat output_stat; 209 210 if (S->m_output_to_stdout) 211 return; 212 213 if (stat(S->m_output_filename, &output_stat) == 0) { 214 stream_t *strp = S->m_input_streams; 215 216 while (strp != NULL) { 217 /* 218 * We needn't protect an empty file. 219 */ 220 if (!(strp->s_status & STREAM_NOTFILE) && 221 strp->s_dev == output_stat.st_dev && 222 strp->s_ino == output_stat.st_ino && 223 strp->s_filesize > 0) { 224 output_guard_filename = S->m_output_filename; 225 output_guard_size = strp->s_filesize; 226 227 ASSERT(output_guard_filename != NULL); 228 229 if (bump_file_template() < 0) 230 die(EMSG_TEMPORARY); 231 232 if ((strp->s_filename = output_guard_tempname = 233 strdup(get_file_template())) == NULL) 234 die(EMSG_ALLOC); 235 236 xcp(output_guard_tempname, 237 output_guard_filename, output_guard_size); 238 239 output_guard_copy_complete = 1; 240 241 return; 242 } 243 strp = strp->s_next; 244 } 245 } 246 } 247 248 void 249 remove_output_guard() 250 { 251 if (output_guard_tempname && unlink(output_guard_tempname) == -1) 252 warn(gettext("unable to unlink %s"), output_guard_tempname); 253 254 output_guard_tempname = NULL; 255 } 256 257 void 258 set_cleanup_chain(stream_t **strp) 259 { 260 ASSERT(strp != NULL); 261 262 cleanup_chain = strp; 263 } 264 265 /* 266 * atexit_handler() cleans up any temporary files outstanding after a fatal 267 * signal, a call to die() or at exit(). To preserve the input file under low 268 * storage conditions (and both the output file and the temporary files are 269 * directed at the same filesystem), we remove all temporary files but the 270 * output guard first, and then restore the original file. Of course, this is 271 * not foolproof, as another writer may have exhausted storage. 272 */ 273 void 274 atexit_handler() 275 { 276 stream_t *strp; 277 278 if (cleanup_chain && *cleanup_chain) 279 for (strp = *cleanup_chain; strp != NULL; strp = strp->s_next) 280 stream_unlink_temporary(strp); 281 282 if (output_guard_tempname) { 283 if (output_guard_copy_complete) 284 xcp(output_guard_filename, output_guard_tempname, 285 output_guard_size); 286 287 remove_output_guard(); 288 } 289 290 __S(stats_display()); 291 } 292 293 size_t 294 strtomem(char *S) 295 { 296 const char *format_str = "%lf%c"; 297 double val = 0.0; 298 size_t retval; 299 char units = 'k'; 300 size_t phys_total = sysconf(_SC_PHYS_PAGES) * sysconf(_SC_PAGESIZE); 301 302 if (sscanf(S, format_str, &val, &units) < 1 || val < 0) 303 return (0); 304 305 if (units == '%') { 306 if (val < 0 || val > 100) 307 return (0); 308 val *= phys_total / 100; 309 } else 310 switch (units) { 311 case 't' : /* terabytes */ 312 case 'T' : 313 val *= 1024; 314 /*FALLTHROUGH*/ 315 case 'g' : /* gigabytes */ 316 case 'G' : 317 val *= 1024; 318 /*FALLTHROUGH*/ 319 case 'm' : /* megabytes */ 320 case 'M' : 321 val *= 1024; 322 /*FALLTHROUGH*/ 323 case 'k' : /* kilobytes */ 324 case 'K' : 325 val *= 1024; 326 /*FALLTHROUGH*/ 327 case 'b' : /* bytes */ 328 case 'B' : 329 break; 330 default : 331 /* 332 * default is kilobytes 333 */ 334 val *= 1024; 335 break; 336 } 337 338 if (val > SIZE_MAX) 339 return (0); 340 341 retval = (size_t)val; 342 343 return (retval); 344 } 345 346 size_t 347 available_memory(size_t mem_limit) 348 { 349 size_t phys_avail = sysconf(_SC_AVPHYS_PAGES) * sysconf(_SC_PAGESIZE); 350 size_t avail; 351 352 if (mem_limit != 0) { 353 #ifdef DEBUG 354 /* 355 * In the debug case, we want to test the temporary files 356 * handling, so no lower bound on the memory limit is imposed. 357 */ 358 avail = mem_limit; 359 #else 360 avail = MAX(64 * KILOBYTE, mem_limit); 361 #endif /* DEBUG */ 362 } else { 363 avail = MAX(64 * KILOBYTE, MIN(AV_MEM_MULTIPLIER * phys_avail / 364 AV_MEM_DIVISOR, 16 * MEGABYTE)); 365 } 366 367 __S(stats_set_available_memory(avail)); 368 369 return (avail); 370 } 371 372 void 373 set_memory_ratio(sort_t *S, int *numerator, int *denominator) 374 { 375 if (S->m_c_locale) { 376 *numerator = CHAR_AVG_LINE; 377 *denominator = sizeof (line_rec_t) + sizeof (line_rec_t *) + 378 CHAR_AVG_LINE + CHAR_AVG_LINE; 379 return; 380 } 381 382 if (S->m_single_byte_locale) { 383 *numerator = CHAR_AVG_LINE; 384 *denominator = sizeof (line_rec_t) + sizeof (line_rec_t *) + 385 CHAR_AVG_LINE + XFRM_MULTIPLIER * CHAR_AVG_LINE; 386 return; 387 } 388 389 *numerator = WCHAR_AVG_LINE; 390 *denominator = sizeof (line_rec_t) + sizeof (line_rec_t *) + 391 WCHAR_AVG_LINE + WCHAR_AVG_LINE; 392 } 393 394 void * 395 safe_realloc(void *ptr, size_t sz) 396 { 397 /* 398 * safe_realloc() is not meant as an alternative free() mechanism--we 399 * disallow reallocations to size zero. 400 */ 401 ASSERT(sz != 0); 402 403 if ((ptr = realloc(ptr, sz)) != NULL) 404 return (ptr); 405 406 die(gettext("unable to reallocate buffer")); 407 /*NOTREACHED*/ 408 return (NULL); /* keep gcc happy */ 409 } 410 411 void 412 safe_free(void *ptr) 413 { 414 if (ptr) 415 free(ptr); 416 } 417 418 void * 419 xzmap(void *addr, size_t len, int prot, int flags, off_t off) 420 { 421 void *pa; 422 423 pa = mmap(addr, len, prot, flags | MAP_ANON, -1, off); 424 if (pa == MAP_FAILED) 425 die(gettext("can't mmap anonymous memory")); 426 427 return (pa); 428 } 429 430 void 431 usage() 432 { 433 (void) fprintf(stderr, 434 gettext("usage: %s [-cmu] [-o output] [-T directory] [-S mem]" 435 " [-z recsz]\n\t[-dfiMnr] [-b] [-t char] [-k keydef]" 436 " [+pos1 [-pos2]] files...\n"), CMDNAME); 437 exit(E_USAGE); 438 } 439 440 /* 441 * hold_file_descriptor() and release_file_descriptor() reserve a single file 442 * descriptor entry for later use. We issue the hold prior to any loop that has 443 * an exit condition based on the receipt of EMFILE from an open() call; once we 444 * have exited, we can release, typically prior to opening a file for output. 445 */ 446 void 447 hold_file_descriptor() 448 { 449 ASSERT(held_fd == -1); 450 451 if ((held_fd = open("/dev/null", O_RDONLY)) == -1) 452 die(gettext("insufficient available file descriptors\n")); 453 } 454 455 void 456 release_file_descriptor() 457 { 458 ASSERT(held_fd != -1); 459 460 (void) close(held_fd); 461 held_fd = -1; 462 } 463 464 void 465 copy_line_rec(const line_rec_t *a, line_rec_t *b) 466 { 467 (void) memcpy(b, a, sizeof (line_rec_t)); 468 } 469 470 void 471 trip_eof(FILE *f) 472 { 473 if (feof(f)) 474 return; 475 476 (void) ungetc(fgetc(f), f); 477 } 478 479 /* 480 * int cxwrite(int, char *, size_t) 481 * 482 * Overview 483 * cxwrite() implements a buffered version of fwrite(ptr, nbytes, 1, .) on 484 * file descriptors. It returns -1 in the case that the write() fails to 485 * write the current buffer contents. cxwrite() must be flushed before being 486 * applied to a new file descriptor. 487 * 488 * Return values 489 * 0 on success, -1 on error. 490 */ 491 int 492 cxwrite(int fd, char *ptr, size_t nbytes) 493 { 494 static char buffer[XBUFFER_SIZE]; 495 static size_t offset = 0; 496 size_t mbytes; 497 498 if (ptr == NULL) { 499 errno = 0; 500 while (offset -= write(fd, buffer, offset)) { 501 if (errno) 502 break; 503 } 504 505 if (offset) 506 return (-1); 507 508 return (0); 509 } 510 511 while (nbytes != 0) { 512 if (offset + nbytes > XBUFFER_SIZE) 513 mbytes = XBUFFER_SIZE - offset; 514 else 515 mbytes = nbytes; 516 517 (void) memcpy(buffer + offset, ptr, mbytes); 518 nbytes -= mbytes; 519 offset += mbytes; 520 ptr += mbytes; 521 522 if (nbytes) { 523 errno = 0; 524 while (offset -= write(fd, buffer, offset)) { 525 if (errno) 526 break; 527 } 528 529 if (offset) 530 return (-1); 531 } 532 } 533 534 return (0); 535 } 536 537 /* 538 * int wxwrite(int, wchar_t *) 539 * 540 * Overview 541 * wxwrite() implements a buffered write() function for null-terminated wide 542 * character buffers with similar calling semantics to cxwrite(). It returns 543 * -1 in the case that it fails to write the current buffer contents. 544 * wxwrite() must be flushed before being applied to a new file descriptor. 545 * 546 * Return values 547 * 0 on success, -1 on error. 548 */ 549 int 550 wxwrite(int fd, wchar_t *ptr) 551 { 552 static char *convert_buffer; 553 static size_t convert_bufsize = 1024; 554 size_t req_bufsize; 555 556 if (ptr == NULL) 557 return (cxwrite(fd, NULL, 0)); 558 559 if (convert_buffer == NULL) 560 convert_buffer = safe_realloc(NULL, convert_bufsize); 561 /* 562 * We use wcstombs(NULL, ., .) to verify that we have an adequate 563 * buffer size for the conversion. Since this buffer was converted into 564 * wide character format earlier, we can safely assume that the buffer 565 * can be converted back to the external multibyte form. 566 */ 567 req_bufsize = wcstombs(NULL, ptr, convert_bufsize); 568 if (req_bufsize > convert_bufsize) { 569 convert_bufsize = req_bufsize + 1; 570 convert_buffer = safe_realloc(convert_buffer, convert_bufsize); 571 } 572 573 (void) wcstombs(convert_buffer, ptr, convert_bufsize); 574 575 return (cxwrite(fd, convert_buffer, req_bufsize)); 576 } 577 578 int 579 xstreql(const char *a, const char *b) 580 { 581 return (strcmp(a, b) == 0); 582 } 583 584 int 585 xstrneql(const char *a, const char *b, const size_t l) 586 { 587 return (strncmp(a, b, l) == 0); 588 } 589 590 char * 591 xstrnchr(const char *S, const int c, const size_t n) 592 { 593 const char *eS = S + n; 594 595 do { 596 if (*S == (char)c) 597 return ((char *)S); 598 } while (++S < eS); 599 600 return (NULL); 601 } 602 603 void 604 xstrninv(char *s, ssize_t start, ssize_t length) 605 { 606 ssize_t i; 607 608 for (i = start; i < start + length; i++) 609 s[i] = UCHAR_MAX - s[i]; 610 } 611 612 int 613 xwcsneql(const wchar_t *a, const wchar_t *b, const size_t length) 614 { 615 return (wcsncmp(a, b, length) == 0); 616 } 617 618 wchar_t * 619 xwsnchr(const wchar_t *ws, const wint_t wc, const size_t n) 620 { 621 const wchar_t *ews = ws + n; 622 623 do { 624 if (*ws == (wchar_t)wc) 625 return ((wchar_t *)ws); 626 } while (++ws < ews); 627 628 return (NULL); 629 } 630 631 void 632 xwcsninv(wchar_t *s, ssize_t start, ssize_t length) 633 { 634 ssize_t i; 635 636 for (i = start; i < start + length; i++) 637 s[i] = WCHAR_MAX - s[i]; 638 } 639 640 #ifdef _LITTLE_ENDIAN 641 void 642 xwcsntomsb(wchar_t *s, ssize_t length) 643 { 644 ssize_t i; 645 646 ASSERT(sizeof (wchar_t) == sizeof (uint32_t)); 647 648 for (i = 0; i < length; i++, s++) { 649 char *t = (char *)s; 650 char u; 651 652 u = *t; 653 *t = *(t + 3); 654 *(t + 3) = u; 655 656 u = *(t + 1); 657 *(t + 1) = *(t + 2); 658 *(t + 2) = u; 659 } 660 } 661 #endif /* _LITTLE_ENDIAN */ 662 663 wchar_t * 664 xmemwchar(wchar_t *s, wchar_t w, ssize_t length) 665 { 666 ssize_t i = length; 667 668 while (--i > 0) { 669 if (*s == w) 670 return (s); 671 s++; 672 } 673 674 return (NULL); 675 } 676 677 void 678 xcp(char *dst, char *src, off_t size) 679 { 680 int fd_in, fd_out; 681 void *mm_in; 682 size_t chunksize = 2 * MEGABYTE; 683 int i; 684 ssize_t nchunks = size / chunksize; 685 ssize_t lastchunk = size % chunksize; 686 687 if (dst == NULL || src == NULL) 688 return; 689 690 if ((fd_in = open(src, O_RDONLY)) < 0) 691 die(EMSG_OPEN, src); 692 if ((fd_out = open(dst, O_RDWR | O_CREAT | O_TRUNC, OUTPUT_MODE)) < 0) 693 die(EMSG_OPEN, dst); 694 695 for (i = 0; i < nchunks; i++) { 696 if ((mm_in = mmap(0, chunksize, PROT_READ, MAP_SHARED, fd_in, 697 i * chunksize)) == MAP_FAILED) 698 die(EMSG_MMAP, src); 699 700 if (write(fd_out, mm_in, chunksize) != chunksize) 701 die(EMSG_WRITE, dst); 702 703 (void) munmap(mm_in, chunksize); 704 } 705 706 if (lastchunk) { 707 if ((mm_in = mmap(0, lastchunk, PROT_READ, MAP_SHARED, fd_in, 708 nchunks * chunksize)) == MAP_FAILED) 709 die(EMSG_MMAP, src); 710 711 if (write(fd_out, mm_in, lastchunk) != lastchunk) 712 die(EMSG_WRITE, dst); 713 714 (void) munmap(mm_in, lastchunk); 715 } 716 717 (void) close(fd_in); 718 719 if (close(fd_out) == -1) 720 die(EMSG_CLOSE, dst); 721 } 722 723 /*PRINTFLIKE1*/ 724 void 725 warn(const char *format, ...) 726 { 727 int err = errno; 728 va_list alist; 729 730 if (pname != NULL) 731 (void) fprintf(stderr, gettext(PNAME_FMT), pname); 732 733 va_start(alist, format); 734 (void) vfprintf(stderr, format, alist); 735 va_end(alist); 736 737 if (strrchr(format, '\n') == NULL) 738 (void) fprintf(stderr, gettext(ERRNO_FMT), strerror(err)); 739 } 740 741 /*PRINTFLIKE1*/ 742 void 743 die(const char *format, ...) 744 { 745 int err = errno; 746 va_list alist; 747 748 if (pname != NULL) 749 (void) fprintf(stderr, gettext(PNAME_FMT), pname); 750 751 va_start(alist, format); 752 (void) vfprintf(stderr, format, alist); 753 va_end(alist); 754 755 if (strrchr(format, '\n') == NULL) 756 (void) fprintf(stderr, gettext(ERRNO_FMT), strerror(err)); 757 758 exit(E_ERROR); 759 } 760 761 #ifdef DEBUG 762 /* 763 * pprintc() is called only by xdump(). 764 */ 765 #define BYTES_PER_LINE 16 766 static void 767 pprintc(FILE *fp, char c) 768 { 769 if (isspace((uchar_t)c)) 770 (void) fprintf(fp, " "); 771 else if (isprint((uchar_t)c)) 772 (void) fprintf(fp, "%c", c); 773 else 774 (void) fprintf(fp, "."); 775 } 776 777 static void 778 pprintwc(FILE *fp, wchar_t c) 779 { 780 if (iswspace(c)) 781 (void) fprintf(fp, " "); 782 else if (iswprint(c)) 783 (void) fprintf(fp, "%wc", c); 784 else 785 (void) fprintf(fp, "."); 786 } 787 788 /* 789 * xdump() is used only for debugging purposes. 790 */ 791 void 792 xdump(FILE *fp, uchar_t *buf, size_t bufsize, int wide) 793 { 794 int i; 795 size_t nc = 0; 796 uchar_t d[BYTES_PER_LINE]; 797 798 for (; nc < bufsize; buf++) { 799 d[nc % BYTES_PER_LINE] = *buf; 800 if (nc % BYTES_PER_LINE == 0) { 801 (void) fprintf(fp, "%08x:", nc); 802 } 803 (void) fprintf(fp, " %02x", *buf); 804 nc++; 805 if (nc % BYTES_PER_LINE == 0) { 806 (void) fprintf(fp, " "); 807 if (wide) { 808 for (i = 0; i < BYTES_PER_LINE; 809 i += sizeof (wchar_t)) 810 pprintwc(fp, *(wchar_t *)(d + i)); 811 } else { 812 for (i = 0; i < BYTES_PER_LINE; i++) 813 pprintc(fp, d[i]); 814 } 815 (void) fprintf(fp, "\n"); 816 } 817 } 818 819 for (i = nc % BYTES_PER_LINE; i < BYTES_PER_LINE; i++) 820 (void) fprintf(fp, " "); 821 822 (void) fprintf(fp, " "); 823 824 if (wide) { 825 for (i = 0; i < nc % BYTES_PER_LINE; i += sizeof (wchar_t)) 826 pprintwc(fp, *(wchar_t *)(d + i)); 827 } else { 828 for (i = 0; i < nc % BYTES_PER_LINE; i++) 829 pprintc(fp, d[i]); 830 } 831 832 (void) fprintf(fp, "\n"); 833 } 834 #endif /* DEBUG */ 835