xref: /freebsd/usr.bin/sort/file.c (revision e39e854e27f53a784c3982cbeb68f4ad1cfd9162)
1 /*-
2  * Copyright (C) 2009 Gabor Kovesdan <gabor@FreeBSD.org>
3  * Copyright (C) 2012 Oleg Moskalenko <oleg.moskalenko@citrix.com>
4  * All rights reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  * 1. Redistributions of source code must retain the above copyright
10  *    notice, this list of conditions and the following disclaimer.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
16  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25  * SUCH DAMAGE.
26  */
27 
28 #include <sys/cdefs.h>
29 __FBSDID("$FreeBSD$");
30 
31 #include <sys/mman.h>
32 #include <sys/stat.h>
33 #include <sys/types.h>
34 #include <sys/queue.h>
35 
36 #include <err.h>
37 #include <fcntl.h>
38 #if defined(SORT_THREADS)
39 #include <pthread.h>
40 #endif
41 #include <semaphore.h>
42 #include <stdio.h>
43 #include <stdlib.h>
44 #include <string.h>
45 #include <unistd.h>
46 #include <wchar.h>
47 #include <wctype.h>
48 
49 #include "coll.h"
50 #include "file.h"
51 #include "radixsort.h"
52 
53 unsigned long long free_memory = 1000000;
54 unsigned long long available_free_memory = 1000000;
55 
56 const char *tmpdir = "/var/tmp";
57 const char *compress_program;
58 
59 size_t max_open_files = 16;
60 
61 /*
62  * How much space we read from file at once
63  */
64 #define READ_CHUNK (4096)
65 
66 /*
67  * File reader structure
68  */
69 struct file_reader
70 {
71 	struct reader_buffer	 rb;
72 	FILE			*file;
73 	char			*fname;
74 	unsigned char		*buffer;
75 	unsigned char		*mmapaddr;
76 	unsigned char		*mmapptr;
77 	size_t			 bsz;
78 	size_t			 cbsz;
79 	size_t			 mmapsize;
80 	size_t			 strbeg;
81 	int			 fd;
82 	char			 elsymb;
83 };
84 
85 /*
86  * Structure to be used in file merge process.
87  */
88 struct file_header
89 {
90 	struct file_reader		*fr;
91 	struct sort_list_item		*si; /* current top line */
92 	size_t				 file_pos;
93 };
94 
95 /*
96  * List elements of "cleanable" files list.
97  */
98 struct CLEANABLE_FILE
99 {
100 	char				*fn;
101 	LIST_ENTRY(CLEANABLE_FILE)	 files;
102 };
103 
104 /*
105  * List header of "cleanable" files list.
106  */
107 static LIST_HEAD(CLEANABLE_FILES,CLEANABLE_FILE) tmp_files;
108 
109 /*
110  * Semaphore to protect the tmp file list.
111  * We use semaphore here because it is signal-safe, according to POSIX.
112  * And semaphore does not require pthread library.
113  */
114 static sem_t tmp_files_sem;
115 
116 static void mt_sort(struct sort_list *list,
117     int (*sort_func)(void *, size_t, size_t,
118     int (*)(const void *, const void *)), const char* fn);
119 
120 /*
121  * Init tmp files list
122  */
123 void
124 init_tmp_files(void)
125 {
126 
127 	LIST_INIT(&tmp_files);
128 	sem_init(&tmp_files_sem, 0, 1);
129 }
130 
131 /*
132  * Save name of a tmp file for signal cleanup
133  */
134 void
135 tmp_file_atexit(const char *tmp_file)
136 {
137 
138 	if (tmp_file) {
139 		sem_wait(&tmp_files_sem);
140 		struct CLEANABLE_FILE *item =
141 		    sort_malloc(sizeof(struct CLEANABLE_FILE));
142 		item->fn = sort_strdup(tmp_file);
143 		LIST_INSERT_HEAD(&tmp_files, item, files);
144 		sem_post(&tmp_files_sem);
145 	}
146 }
147 
148 /*
149  * Clear tmp files
150  */
151 void
152 clear_tmp_files(void)
153 {
154 	struct CLEANABLE_FILE *item;
155 
156 	sem_wait(&tmp_files_sem);
157 	LIST_FOREACH(item,&tmp_files,files) {
158 		if ((item) && (item->fn))
159 			unlink(item->fn);
160 	}
161 	sem_post(&tmp_files_sem);
162 }
163 
164 /*
165  * Check whether a file is a temporary file
166  */
167 static bool
168 file_is_tmp(const char* fn)
169 {
170 	struct CLEANABLE_FILE *item;
171 	bool ret = false;
172 
173 	if (fn) {
174 		sem_wait(&tmp_files_sem);
175 		LIST_FOREACH(item,&tmp_files,files) {
176 			if ((item) && (item->fn))
177 				if (strcmp(item->fn, fn) == 0) {
178 					ret = true;
179 					break;
180 				}
181 		}
182 		sem_post(&tmp_files_sem);
183 	}
184 
185 	return (ret);
186 }
187 
188 /*
189  * Read zero-terminated line from a file
190  */
191 char *
192 read_file0_line(struct file0_reader *f0r)
193 {
194 	size_t pos = 0;
195 	int c;
196 
197 	if ((f0r->f == NULL) || feof(f0r->f))
198 		return (NULL);
199 
200 	if (f0r->current_line && f0r->current_sz > 0)
201 		f0r->current_line[0] = 0;
202 
203 	while (!feof(f0r->f)) {
204 		c = fgetc(f0r->f);
205 		if (feof(f0r->f) || (c == -1))
206 			break;
207 		if ((pos + 1) >= f0r->current_sz) {
208 			size_t newsz = (f0r->current_sz + 2) * 2;
209 			f0r->current_line = sort_realloc(f0r->current_line,
210 			    newsz);
211 			f0r->current_sz = newsz;
212 		}
213 		f0r->current_line[pos] = (char)c;
214 		if (c == 0)
215 			break;
216 		else
217 			f0r->current_line[pos + 1] = 0;
218 		++pos;
219 	}
220 
221 	return f0r->current_line;
222 }
223 
224 /*
225  * Generate new temporary file name
226  */
227 char *
228 new_tmp_file_name(void)
229 {
230 	static unsigned int tfcounter = 0;
231 	static const char *fn = ".bsdsort.";
232 	char *ret;
233 	size_t sz;
234 
235 	sz = strlen(tmpdir) + 1 + strlen(fn) + 32 + 1;
236 	ret = sort_malloc(sz);
237 
238 	sprintf(ret, "%s/%s%d.%u", tmpdir, fn, (int) getpid(), tfcounter++);
239 	tmp_file_atexit(ret);
240 	return (ret);
241 }
242 
243 /*
244  * Initialize file list
245  */
246 void
247 file_list_init(struct file_list *fl, bool tmp)
248 {
249 
250 	if (fl) {
251 		fl->count = 0;
252 		fl->sz = 0;
253 		fl->fns = NULL;
254 		fl->tmp = tmp;
255 	}
256 }
257 
258 /*
259  * Add a file name to the list
260  */
261 void
262 file_list_add(struct file_list *fl, char *fn, bool allocate)
263 {
264 
265 	if (fl && fn) {
266 		if (fl->count >= fl->sz || (fl->fns == NULL)) {
267 			fl->sz = (fl->sz) * 2 + 1;
268 			fl->fns = sort_realloc(fl->fns, fl->sz *
269 			    sizeof(char *));
270 		}
271 		fl->fns[fl->count] = allocate ? sort_strdup(fn) : fn;
272 		fl->count += 1;
273 	}
274 }
275 
276 /*
277  * Populate file list from array of file names
278  */
279 void
280 file_list_populate(struct file_list *fl, int argc, char **argv, bool allocate)
281 {
282 
283 	if (fl && argv) {
284 		int i;
285 
286 		for (i = 0; i < argc; i++)
287 			file_list_add(fl, argv[i], allocate);
288 	}
289 }
290 
291 /*
292  * Clean file list data and delete the files,
293  * if this is a list of temporary files
294  */
295 void
296 file_list_clean(struct file_list *fl)
297 {
298 
299 	if (fl) {
300 		if (fl->fns) {
301 			int i;
302 
303 			for (i = 0; i < fl->count; i++) {
304 				if (fl->fns[i]) {
305 					if (fl->tmp)
306 						unlink(fl->fns[i]);
307 					sort_free(fl->fns[i]);
308 					fl->fns[i] = 0;
309 				}
310 			}
311 			sort_free(fl->fns);
312 			fl->fns = NULL;
313 		}
314 		fl->sz = 0;
315 		fl->count = 0;
316 		fl->tmp = false;
317 	}
318 }
319 
320 /*
321  * Init sort list
322  */
323 void
324 sort_list_init(struct sort_list *l)
325 {
326 
327 	if (l) {
328 		l->count = 0;
329 		l->size = 0;
330 		l->memsize = sizeof(struct sort_list);
331 		l->list = NULL;
332 	}
333 }
334 
335 /*
336  * Add string to sort list
337  */
338 void
339 sort_list_add(struct sort_list *l, struct bwstring *str)
340 {
341 
342 	if (l && str) {
343 		size_t indx = l->count;
344 
345 		if ((l->list == NULL) || (indx >= l->size)) {
346 			int newsize = (l->size + 1) + 1024;
347 
348 			l->list = sort_realloc(l->list,
349 			    sizeof(struct sort_list_item*) * newsize);
350 			l->memsize += (newsize - l->size) *
351 			    sizeof(struct sort_list_item*);
352 			l->size = newsize;
353 		}
354 		l->list[indx] = sort_list_item_alloc();
355 		sort_list_item_set(l->list[indx], str);
356 		l->memsize += sort_list_item_size(l->list[indx]);
357 		l->count += 1;
358 	}
359 }
360 
361 /*
362  * Clean sort list data
363  */
364 void
365 sort_list_clean(struct sort_list *l)
366 {
367 
368 	if (l) {
369 		if (l->list) {
370 			size_t i;
371 
372 			for (i = 0; i < l->count; i++) {
373 				struct sort_list_item *item;
374 
375 				item = l->list[i];
376 
377 				if (item) {
378 					sort_list_item_clean(item);
379 					sort_free(item);
380 					l->list[i] = NULL;
381 				}
382 			}
383 			sort_free(l->list);
384 			l->list = NULL;
385 		}
386 		l->count = 0;
387 		l->size = 0;
388 		l->memsize = sizeof(struct sort_list);
389 	}
390 }
391 
392 /*
393  * Write sort list to file
394  */
395 void
396 sort_list_dump(struct sort_list *l, const char *fn)
397 {
398 
399 	if (l && fn) {
400 		FILE *f;
401 
402 		f = openfile(fn, "w");
403 		if (f == NULL)
404 			err(2, NULL);
405 
406 		if (l->list) {
407 			struct sort_list_item *last_printed_item;
408 			size_t i;
409 
410 			last_printed_item = NULL;
411 
412 			for (i = 0; i < l->count; i++) {
413 				struct sort_list_item *item;
414 
415 				item = l->list[i];
416 
417 				if (!(sort_opts_vals.uflag) ||
418 				    (last_printed_item == NULL) ||
419 				    list_coll(&last_printed_item, &item)) {
420 					bwsfwrite(item->str, f,
421 					    sort_opts_vals.zflag);
422 					if (sort_opts_vals.uflag)
423 						last_printed_item = item;
424 				}
425 			}
426 		}
427 
428 		closefile(f, fn);
429 	}
430 }
431 
432 /*
433  * Checks if the given file is sorted.  Stops at the first disorder,
434  * prints the disordered line and returns 1.
435  */
436 int
437 check(const char *fn)
438 {
439 	struct bwstring *s1, *s2, *s1disorder, *s2disorder;
440 	struct file_reader *fr;
441 	struct keys_array *ka1, *ka2;
442 	int res, pos, posdisorder;
443 
444 	s1 = s2 = s1disorder = s2disorder = NULL;
445 	ka1 = ka2 = NULL;
446 
447 	fr = file_reader_init(fn);
448 
449 	res = 0;
450 	pos = 1;
451 	posdisorder = 1;
452 
453 	if (fr == NULL) {
454 		err(2, NULL);
455 		goto end;
456 	}
457 
458 	s1 = file_reader_readline(fr);
459 	if (s1 == NULL)
460 		goto end;
461 
462 	ka1 = keys_array_alloc();
463 	preproc(s1, ka1);
464 
465 	s2 = file_reader_readline(fr);
466 	if (s2 == NULL)
467 		goto end;
468 
469 	ka2 = keys_array_alloc();
470 	preproc(s2, ka2);
471 
472 	for (;;) {
473 
474 		if (debug_sort) {
475 			bwsprintf(stdout, s2, "s1=<", ">");
476 			bwsprintf(stdout, s1, "s2=<", ">");
477 		}
478 		int cmp = key_coll(ka2, ka1, 0);
479 		if (debug_sort)
480 			printf("; cmp1=%d", cmp);
481 
482 		if (!cmp && sort_opts_vals.complex_sort &&
483 		    !(sort_opts_vals.uflag) && !(sort_opts_vals.sflag)) {
484 			cmp = top_level_str_coll(s2, s1);
485 			if (debug_sort)
486 				printf("; cmp2=%d", cmp);
487 		}
488 		if (debug_sort)
489 			printf("\n");
490 
491 		if ((sort_opts_vals.uflag && (cmp <= 0)) || (cmp < 0)) {
492 			if (!(sort_opts_vals.csilentflag)) {
493 				s2disorder = bwsdup(s2);
494 				posdisorder = pos;
495 				if (debug_sort)
496 					s1disorder = bwsdup(s1);
497 			}
498 			res = 1;
499 			goto end;
500 		}
501 
502 		pos++;
503 
504 		clean_keys_array(s1, ka1);
505 		sort_free(ka1);
506 		ka1 = ka2;
507 		ka2 = NULL;
508 
509 		bwsfree(s1);
510 		s1 = s2;
511 
512 		s2 = file_reader_readline(fr);
513 		if (s2 == NULL)
514 			goto end;
515 
516 		ka2 = keys_array_alloc();
517 		preproc(s2, ka2);
518 	}
519 
520 end:
521 	if (ka1) {
522 		clean_keys_array(s1, ka1);
523 		sort_free(ka1);
524 	}
525 
526 	if (s1)
527 		bwsfree(s1);
528 
529 	if (ka2) {
530 		clean_keys_array(s2, ka2);
531 		sort_free(ka2);
532 	}
533 
534 	if (s2)
535 		bwsfree(s2);
536 
537 	if ((fn == NULL) || (*fn == 0) || (strcmp(fn, "-") == 0)) {
538 		for (;;) {
539 			s2 = file_reader_readline(fr);
540 			if (s2 == NULL)
541 				break;
542 			bwsfree(s2);
543 		}
544 	}
545 
546 	file_reader_free(fr);
547 
548 	if (s2disorder) {
549 		bws_disorder_warnx(s2disorder, fn, posdisorder);
550 		if (s1disorder) {
551 			bws_disorder_warnx(s1disorder, fn, posdisorder);
552 			if (s1disorder != s2disorder)
553 				bwsfree(s1disorder);
554 		}
555 		bwsfree(s2disorder);
556 		s1disorder = NULL;
557 		s2disorder = NULL;
558 	}
559 
560 	if (res)
561 		exit(res);
562 
563 	return (0);
564 }
565 
566 /*
567  * Opens a file.  If the given filename is "-", stdout will be
568  * opened.
569  */
570 FILE *
571 openfile(const char *fn, const char *mode)
572 {
573 	FILE *file;
574 
575 	if (strcmp(fn, "-") == 0) {
576 		return ((mode && mode[0] == 'r') ? stdin : stdout);
577 	} else {
578 		mode_t orig_file_mask = 0;
579 		int is_tmp = file_is_tmp(fn);
580 
581 		if (is_tmp && (mode[0] == 'w'))
582 			orig_file_mask = umask(S_IWGRP | S_IWOTH |
583 			    S_IRGRP | S_IROTH);
584 
585 		if (is_tmp && (compress_program != NULL)) {
586 			char *cmd;
587 			size_t cmdsz;
588 
589 			cmdsz = strlen(fn) + 128;
590 			cmd = sort_malloc(cmdsz);
591 
592 			fflush(stdout);
593 
594 			if (mode[0] == 'r')
595 				snprintf(cmd, cmdsz - 1, "cat %s | %s -d",
596 				    fn, compress_program);
597 			else if (mode[0] == 'w')
598 				snprintf(cmd, cmdsz - 1, "%s > %s",
599 				    compress_program, fn);
600 			else
601 				err(2, "%s", getstr(7));
602 
603 			if ((file = popen(cmd, mode)) == NULL)
604 				err(2, NULL);
605 
606 			sort_free(cmd);
607 
608 		} else
609 			if ((file = fopen(fn, mode)) == NULL)
610 				err(2, NULL);
611 
612 		if (is_tmp && (mode[0] == 'w'))
613 			umask(orig_file_mask);
614 	}
615 
616 	return (file);
617 }
618 
619 /*
620  * Close file
621  */
622 void
623 closefile(FILE *f, const char *fn)
624 {
625 	if (f == NULL) {
626 		;
627 	} else if (f == stdin) {
628 		;
629 	} else if (f == stdout) {
630 		fflush(f);
631 	} else {
632 		if (file_is_tmp(fn) && compress_program != NULL) {
633 			if(pclose(f)<0)
634 				err(2,NULL);
635 		} else
636 			fclose(f);
637 	}
638 }
639 
640 /*
641  * Reads a file into the internal buffer.
642  */
643 struct file_reader *
644 file_reader_init(const char *fsrc)
645 {
646 	struct file_reader *ret;
647 
648 	if (fsrc == NULL)
649 		fsrc = "-";
650 
651 	ret = sort_malloc(sizeof(struct file_reader));
652 	memset(ret, 0, sizeof(struct file_reader));
653 
654 	ret->elsymb = '\n';
655 	if (sort_opts_vals.zflag)
656 		ret->elsymb = 0;
657 
658 	ret->fname = sort_strdup(fsrc);
659 
660 	if (strcmp(fsrc, "-") && (compress_program == NULL)) {
661 
662 		do {
663 			struct stat stat_buf;
664 			void *addr;
665 			size_t sz = 0;
666 			int fd, flags;
667 
668 			flags = MAP_NOCORE | MAP_NOSYNC;
669 			addr = MAP_FAILED;
670 
671 			fd = open(fsrc, O_RDONLY);
672 			if (fd < 0)
673 				err(2, NULL);
674 
675 			if (fstat(fd, &stat_buf) < 0) {
676 				close(fd);
677 				break;
678 			}
679 
680 			sz = stat_buf.st_size;
681 
682 #if defined(MAP_PREFAULT_READ)
683 			flags |= MAP_PREFAULT_READ;
684 #endif
685 
686 			addr = mmap(NULL, sz, PROT_READ, flags, fd, 0);
687 			if (addr == MAP_FAILED) {
688 				close(fd);
689 				break;
690 			}
691 
692 			ret->fd = fd;
693 			ret->mmapaddr = addr;
694 			ret->mmapsize = sz;
695 			ret->mmapptr = ret->mmapaddr;
696 
697 		} while (0);
698 	}
699 
700 	if (ret->mmapaddr == NULL) {
701 		ret->file = openfile(fsrc, "r");
702 		if (ret->file == NULL)
703 			err(2, NULL);
704 
705 		if (strcmp(fsrc, "-")) {
706 			ret->cbsz = READ_CHUNK;
707 			ret->buffer = sort_malloc(ret->cbsz);
708 			ret->bsz = 0;
709 			ret->strbeg = 0;
710 
711 			ret->bsz = fread(ret->buffer, 1, ret->cbsz, ret->file);
712 			if (ret->bsz == 0) {
713 				if (ferror(ret->file))
714 					err(2, NULL);
715 			}
716 		}
717 	}
718 
719 	return (ret);
720 }
721 
722 struct bwstring *
723 file_reader_readline(struct file_reader *fr)
724 {
725 	struct bwstring *ret = NULL;
726 
727 	if (fr->mmapaddr) {
728 		unsigned char *mmapend;
729 
730 		mmapend = fr->mmapaddr + fr->mmapsize;
731 		if (fr->mmapptr >= mmapend)
732 			return (NULL);
733 		else {
734 			unsigned char *strend;
735 			size_t sz;
736 
737 			sz = mmapend - fr->mmapptr;
738 			strend = memchr(fr->mmapptr, fr->elsymb, sz);
739 
740 			if (strend == NULL) {
741 				ret = bwscsbdup(fr->mmapptr, sz);
742 				fr->mmapptr = mmapend;
743 			} else {
744 				ret = bwscsbdup(fr->mmapptr, strend -
745 				    fr->mmapptr);
746 				fr->mmapptr = strend + 1;
747 			}
748 		}
749 
750 	} else if (fr->file != stdin) {
751 		unsigned char *strend;
752 		size_t bsz1, remsz, search_start;
753 
754 		search_start = 0;
755 		remsz = 0;
756 		strend = NULL;
757 
758 		if (fr->bsz > fr->strbeg)
759 			remsz = fr->bsz - fr->strbeg;
760 
761 		/* line read cycle */
762 		for (;;) {
763 			if (remsz > search_start)
764 				strend = memchr(fr->buffer + fr->strbeg +
765 				    search_start, fr->elsymb, remsz -
766 				    search_start);
767 			else
768 				strend = NULL;
769 
770 			if (strend)
771 				break;
772 			if (feof(fr->file))
773 				break;
774 
775 			if (fr->bsz != fr->cbsz)
776 				/* NOTREACHED */
777 				err(2, "File read software error 1");
778 
779 			if (remsz > (READ_CHUNK >> 1)) {
780 				search_start = fr->cbsz - fr->strbeg;
781 				fr->cbsz += READ_CHUNK;
782 				fr->buffer = sort_realloc(fr->buffer,
783 				    fr->cbsz);
784 				bsz1 = fread(fr->buffer + fr->bsz, 1,
785 				    READ_CHUNK, fr->file);
786 				if (bsz1 == 0) {
787 					if (ferror(fr->file))
788 						err(2, NULL);
789 					break;
790 				}
791 				fr->bsz += bsz1;
792 				remsz += bsz1;
793 			} else {
794 				if (remsz > 0 && fr->strbeg>0)
795 					bcopy(fr->buffer + fr->strbeg,
796 					    fr->buffer, remsz);
797 
798 				fr->strbeg = 0;
799 				search_start = remsz;
800 				bsz1 = fread(fr->buffer + remsz, 1,
801 				    fr->cbsz - remsz, fr->file);
802 				if (bsz1 == 0) {
803 					if (ferror(fr->file))
804 						err(2, NULL);
805 					break;
806 				}
807 				fr->bsz = remsz + bsz1;
808 				remsz = fr->bsz;
809 			}
810 		}
811 
812 		if (strend == NULL)
813 			strend = fr->buffer + fr->bsz;
814 
815 		if ((fr->buffer + fr->strbeg <= strend) &&
816 		    (fr->strbeg < fr->bsz) && (remsz>0))
817 			ret = bwscsbdup(fr->buffer + fr->strbeg, strend -
818 			    fr->buffer - fr->strbeg);
819 
820 		fr->strbeg = (strend - fr->buffer) + 1;
821 
822 	} else {
823 		size_t len = 0;
824 
825 		ret = bwsfgetln(fr->file, &len, sort_opts_vals.zflag,
826 		    &(fr->rb));
827 	}
828 
829 	return (ret);
830 }
831 
832 static void
833 file_reader_clean(struct file_reader *fr)
834 {
835 
836 	if (fr) {
837 		if (fr->mmapaddr)
838 			munmap(fr->mmapaddr, fr->mmapsize);
839 
840 		if (fr->fd)
841 			close(fr->fd);
842 
843 		if (fr->buffer)
844 			sort_free(fr->buffer);
845 
846 		if (fr->file)
847 			if (fr->file != stdin)
848 				closefile(fr->file, fr->fname);
849 
850 		if(fr->fname)
851 			sort_free(fr->fname);
852 
853 		memset(fr, 0, sizeof(struct file_reader));
854 	}
855 }
856 
857 void
858 file_reader_free(struct file_reader *fr)
859 {
860 
861 	if (fr) {
862 		file_reader_clean(fr);
863 		sort_free(fr);
864 	}
865 }
866 
867 int
868 procfile(const char *fsrc, struct sort_list *list, struct file_list *fl)
869 {
870 	struct file_reader *fr;
871 
872 	fr = file_reader_init(fsrc);
873 	if (fr == NULL)
874 		err(2, NULL);
875 
876 	/* file browse cycle */
877 	for (;;) {
878 		struct bwstring *bws;
879 
880 		bws = file_reader_readline(fr);
881 
882 		if (bws == NULL)
883 			break;
884 
885 		sort_list_add(list, bws);
886 
887 		if (list->memsize >= available_free_memory) {
888 			char *fn;
889 
890 			fn = new_tmp_file_name();
891 			sort_list_to_file(list, fn);
892 			file_list_add(fl, fn, false);
893 			sort_list_clean(list);
894 		}
895 	}
896 
897 	file_reader_free(fr);
898 
899 	return (0);
900 }
901 
902 /*
903  * Compare file headers. Files with EOF always go to the end of the list.
904  */
905 static int
906 file_header_cmp(struct file_header *f1, struct file_header *f2)
907 {
908 
909 	if (f1 == f2)
910 		return (0);
911 	else {
912 		if (f1->fr == NULL) {
913 			return ((f2->fr == NULL) ? 0 : +1);
914 		} else if (f2->fr == NULL)
915 			return (-1);
916 		else {
917 			int ret;
918 
919 			ret = list_coll(&(f1->si), &(f2->si));
920 			if (!ret)
921 				return ((f1->file_pos < f2->file_pos) ? -1 : +1);
922 			return (ret);
923 		}
924 	}
925 }
926 
927 /*
928  * Allocate and init file header structure
929  */
930 static void
931 file_header_init(struct file_header **fh, const char *fn, size_t file_pos)
932 {
933 
934 	if (fh && fn) {
935 		struct bwstring *line;
936 
937 		*fh = sort_malloc(sizeof(struct file_header));
938 		(*fh)->file_pos = file_pos;
939 		(*fh)->fr = file_reader_init(fn);
940 		if ((*fh)->fr == NULL) {
941 			perror(fn);
942 			err(2, "%s", getstr(8));
943 		}
944 		line = file_reader_readline((*fh)->fr);
945 		if (line == NULL) {
946 			file_reader_free((*fh)->fr);
947 			(*fh)->fr = NULL;
948 			(*fh)->si = NULL;
949 		} else {
950 			(*fh)->si = sort_list_item_alloc();
951 			sort_list_item_set((*fh)->si, line);
952 		}
953 	}
954 }
955 
956 /*
957  * Close file
958  */
959 static void
960 file_header_close(struct file_header **fh)
961 {
962 
963 	if (fh && *fh) {
964 		if ((*fh)->fr) {
965 			file_reader_free((*fh)->fr);
966 			(*fh)->fr = NULL;
967 		}
968 		if ((*fh)->si) {
969 			sort_list_item_clean((*fh)->si);
970 			sort_free((*fh)->si);
971 			(*fh)->si = NULL;
972 		}
973 		sort_free(*fh);
974 		*fh = NULL;
975 	}
976 }
977 
978 /*
979  * Swap two array elements
980  */
981 static void
982 file_header_swap(struct file_header **fh, int i1, int i2)
983 {
984 	struct file_header *tmp;
985 
986 	tmp = fh[i1];
987 	fh[i1] = fh[i2];
988 	fh[i2] = tmp;
989 }
990 
991 /* heap algorithm ==>> */
992 
993 /*
994  * See heap sort algorithm
995  * "Raises" last element to its right place
996  */
997 static void
998 file_header_heap_swim(struct file_header **fh, int indx)
999 {
1000 
1001 	if (indx > 0) {
1002 		int parent_index;
1003 
1004 		parent_index = (indx - 1) >> 1;
1005 
1006 		if (file_header_cmp(fh[indx], fh[parent_index]) < 0) {
1007 			/* swap child and parent and continue */
1008 			file_header_swap(fh, indx, parent_index);
1009 			file_header_heap_swim(fh, parent_index);
1010 		}
1011 	}
1012 }
1013 
1014 /*
1015  * Sink the top element to its correct position
1016  */
1017 static void
1018 file_header_heap_sink(struct file_header **fh, int indx, int size)
1019 {
1020 	int left_child_index;
1021 	int right_child_index;
1022 
1023 	left_child_index = indx + indx + 1;
1024 	right_child_index = left_child_index + 1;
1025 
1026 	if (left_child_index < size) {
1027 		int min_child_index;
1028 
1029 		min_child_index = left_child_index;
1030 
1031 		if ((right_child_index < size) &&
1032 		    (file_header_cmp(fh[left_child_index],
1033 		    fh[right_child_index]) > 0))
1034 			min_child_index = right_child_index;
1035 		if (file_header_cmp(fh[indx], fh[min_child_index]) > 0) {
1036 			file_header_swap(fh, indx, min_child_index);
1037 			file_header_heap_sink(fh, min_child_index, size);
1038 		}
1039 	}
1040 }
1041 
1042 /* <<== heap algorithm */
1043 
1044 /*
1045  * Adds element to the "left" end
1046  */
1047 static void
1048 file_header_list_rearrange_from_header(struct file_header **fh, int size)
1049 {
1050 
1051 	file_header_heap_sink(fh, 0, size);
1052 }
1053 
1054 /*
1055  * Adds element to the "right" end
1056  */
1057 static void
1058 file_header_list_push(struct file_header *f, struct file_header **fh, int size)
1059 {
1060 
1061 	fh[size++] = f;
1062 	file_header_heap_swim(fh, size - 1);
1063 }
1064 
1065 struct last_printed
1066 {
1067 	struct bwstring *str;
1068 };
1069 
1070 /*
1071  * Prints the current line of the file
1072  */
1073 static void
1074 file_header_print(struct file_header *fh, FILE *f_out, struct last_printed *lp)
1075 {
1076 
1077 	if (fh && fh->fr && f_out && fh->si && fh->si->str) {
1078 		if (sort_opts_vals.uflag) {
1079 			if ((lp->str == NULL) || (str_list_coll(lp->str, &(fh->si)))) {
1080 				bwsfwrite(fh->si->str, f_out, sort_opts_vals.zflag);
1081 				if (lp->str)
1082 					bwsfree(lp->str);
1083 				lp->str = bwsdup(fh->si->str);
1084 			}
1085 		} else
1086 			bwsfwrite(fh->si->str, f_out, sort_opts_vals.zflag);
1087 	}
1088 }
1089 
1090 /*
1091  * Read next line
1092  */
1093 static void
1094 file_header_read_next(struct file_header *fh)
1095 {
1096 
1097 	if (fh && fh->fr) {
1098 		struct bwstring *tmp;
1099 
1100 		tmp = file_reader_readline(fh->fr);
1101 		if (tmp == NULL) {
1102 			file_reader_free(fh->fr);
1103 			fh->fr = NULL;
1104 			if (fh->si) {
1105 				sort_list_item_clean(fh->si);
1106 				sort_free(fh->si);
1107 				fh->si = NULL;
1108 			}
1109 		} else {
1110 			if (fh->si == NULL)
1111 				fh->si = sort_list_item_alloc();
1112 			sort_list_item_set(fh->si, tmp);
1113 		}
1114 	}
1115 }
1116 
1117 /*
1118  * Merge array of "files headers"
1119  */
1120 static void
1121 file_headers_merge(int fnum, struct file_header **fh, FILE *f_out)
1122 {
1123 	struct last_printed lp;
1124 	int i;
1125 
1126 	memset(&lp, 0, sizeof(lp));
1127 
1128 	/*
1129 	 * construct the initial sort structure
1130 	 */
1131 	for (i = 0; i < fnum; i++)
1132 		file_header_list_push(fh[i], fh, i);
1133 
1134 	while (fh[0]->fr) { /* unfinished files are always in front */
1135 		/* output the smallest line: */
1136 		file_header_print(fh[0], f_out, &lp);
1137 		/* read a new line, if possible: */
1138 		file_header_read_next(fh[0]);
1139 		/* re-arrange the list: */
1140 		file_header_list_rearrange_from_header(fh, fnum);
1141 	}
1142 
1143 	if (lp.str)
1144 		bwsfree(lp.str);
1145 }
1146 
1147 /*
1148  * Merges the given files into the output file, which can be
1149  * stdout.
1150  */
1151 static void
1152 merge_files_array(int argc, char **argv, const char *fn_out)
1153 {
1154 
1155 	if (argv && fn_out) {
1156 		struct file_header **fh;
1157 		FILE *f_out;
1158 		int i;
1159 
1160 		f_out = openfile(fn_out, "w");
1161 
1162 		if (f_out == NULL)
1163 			err(2, NULL);
1164 
1165 		fh = sort_malloc((argc + 1) * sizeof(struct file_header *));
1166 
1167 		for (i = 0; i < argc; i++)
1168 			file_header_init(fh + i, argv[i], (size_t) i);
1169 
1170 		file_headers_merge(argc, fh, f_out);
1171 
1172 		for (i = 0; i < argc; i++)
1173 			file_header_close(fh + i);
1174 
1175 		sort_free(fh);
1176 
1177 		closefile(f_out, fn_out);
1178 	}
1179 }
1180 
1181 /*
1182  * Shrinks the file list until its size smaller than max number of opened files
1183  */
1184 static int
1185 shrink_file_list(struct file_list *fl)
1186 {
1187 
1188 	if ((fl == NULL) || (size_t) (fl->count) < max_open_files)
1189 		return (0);
1190 	else {
1191 		struct file_list new_fl;
1192 		int indx = 0;
1193 
1194 		file_list_init(&new_fl, true);
1195 		while (indx < fl->count) {
1196 			char *fnew;
1197 			int num;
1198 
1199 			num = fl->count - indx;
1200 			fnew = new_tmp_file_name();
1201 
1202 			if ((size_t) num >= max_open_files)
1203 				num = max_open_files - 1;
1204 			merge_files_array(num, fl->fns + indx, fnew);
1205 			if (fl->tmp) {
1206 				int i;
1207 
1208 				for (i = 0; i < num; i++)
1209 					unlink(fl->fns[indx + i]);
1210 			}
1211 			file_list_add(&new_fl, fnew, false);
1212 			indx += num;
1213 		}
1214 		fl->tmp = false; /* already taken care of */
1215 		file_list_clean(fl);
1216 
1217 		fl->count = new_fl.count;
1218 		fl->fns = new_fl.fns;
1219 		fl->sz = new_fl.sz;
1220 		fl->tmp = new_fl.tmp;
1221 
1222 		return (1);
1223 	}
1224 }
1225 
1226 /*
1227  * Merge list of files
1228  */
1229 void
1230 merge_files(struct file_list *fl, const char *fn_out)
1231 {
1232 
1233 	if (fl && fn_out) {
1234 		while (shrink_file_list(fl));
1235 
1236 		merge_files_array(fl->count, fl->fns, fn_out);
1237 	}
1238 }
1239 
1240 static const char *
1241 get_sort_method_name(int sm)
1242 {
1243 
1244 	if (sm == SORT_MERGESORT)
1245 		return "mergesort";
1246 	else if (sort_opts_vals.sort_method == SORT_RADIXSORT)
1247 		return "radixsort";
1248 	else if (sort_opts_vals.sort_method == SORT_HEAPSORT)
1249 		return "heapsort";
1250 	else
1251 		return "quicksort";
1252 }
1253 
1254 /*
1255  * Wrapper for qsort
1256  */
1257 static int sort_qsort(void *list, size_t count, size_t elem_size,
1258     int (*cmp_func)(const void *, const void *))
1259 {
1260 
1261 	qsort(list, count, elem_size, cmp_func);
1262 	return (0);
1263 }
1264 
1265 /*
1266  * Sort list of lines and writes it to the file
1267  */
1268 void
1269 sort_list_to_file(struct sort_list *list, const char *outfile)
1270 {
1271 	struct sort_mods *sm = &(keys[0].sm);
1272 
1273 	if (!(sm->Mflag) && !(sm->Rflag) && !(sm->Vflag) && !(sm->Vflag) &&
1274 	    !(sm->gflag) && !(sm->hflag) && !(sm->nflag)) {
1275 		if ((sort_opts_vals.sort_method == SORT_DEFAULT) && byte_sort)
1276 			sort_opts_vals.sort_method = SORT_RADIXSORT;
1277 
1278 	} else if (sort_opts_vals.sort_method == SORT_RADIXSORT)
1279 		err(2, "%s", getstr(9));
1280 
1281 	/*
1282 	 * to handle stable sort and the unique cases in the
1283 	 * right order, we need stable basic algorithm
1284 	 */
1285 	if (sort_opts_vals.sflag) {
1286 		switch (sort_opts_vals.sort_method){
1287 		case SORT_MERGESORT:
1288 			break;
1289 		case SORT_RADIXSORT:
1290 			break;
1291 		case SORT_DEFAULT:
1292 			sort_opts_vals.sort_method = SORT_MERGESORT;
1293 			break;
1294 		default:
1295 			errx(2, "%s", getstr(10));
1296 		};
1297 	}
1298 
1299 	if (sort_opts_vals.sort_method == SORT_DEFAULT)
1300 		sort_opts_vals.sort_method = SORT_QSORT;
1301 
1302 	if (debug_sort)
1303 		printf("sort_method=%s\n",
1304 		    get_sort_method_name(sort_opts_vals.sort_method));
1305 
1306 	switch (sort_opts_vals.sort_method){
1307 	case SORT_RADIXSORT:
1308 		rxsort(list->list, list->count);
1309 		sort_list_dump(list, outfile);
1310 		break;
1311 	case SORT_MERGESORT:
1312 		mt_sort(list, mergesort, outfile);
1313 		break;
1314 	case SORT_HEAPSORT:
1315 		mt_sort(list, heapsort,	outfile);
1316 		break;
1317 	default:
1318 		mt_sort(list, sort_qsort, outfile);
1319 		break;
1320 	}
1321 }
1322 
1323 /******************* MT SORT ************************/
1324 
1325 #if defined(SORT_THREADS)
1326 /* semaphore to count threads */
1327 static sem_t mtsem;
1328 
1329 /* current system sort function */
1330 static int (*g_sort_func)(void *, size_t, size_t,
1331     int(*)(const void *, const void *));
1332 
1333 /*
1334  * Sort cycle thread (in multi-threaded mode)
1335  */
1336 static void*
1337 mt_sort_thread(void* arg)
1338 {
1339 	struct sort_list *list = arg;
1340 
1341 	g_sort_func(list->list, list->count, sizeof(struct sort_list_item *),
1342 	    (int(*)(const void *, const void *)) list_coll);
1343 
1344 	sem_post(&mtsem);
1345 
1346 	return (arg);
1347 }
1348 
1349 /*
1350  * Compare sub-lists. Empty sub-lists always go to the end of the list.
1351  */
1352 static int
1353 sub_list_cmp(struct sort_list *l1, struct sort_list *l2)
1354 {
1355 
1356 	if (l1 == l2)
1357 		return (0);
1358 	else {
1359 		if (l1->count == 0) {
1360 			return ((l2->count == 0) ? 0 : +1);
1361 		} else if (l2->count == 0) {
1362 			return (-1);
1363 		} else {
1364 			int ret;
1365 
1366 			ret = list_coll(&(l1->list[0]), &(l2->list[0]));
1367 			if (!ret)
1368 				return ((l1->sub_list_pos < l2->sub_list_pos) ?
1369 				    -1 : +1);
1370 			return (ret);
1371 		}
1372 	}
1373 }
1374 
1375 /*
1376  * Swap two array elements
1377  */
1378 static void
1379 sub_list_swap(struct sort_list **sl, int i1, int i2)
1380 {
1381 	struct sort_list *tmp;
1382 
1383 	tmp = sl[i1];
1384 	sl[i1] = sl[i2];
1385 	sl[i2] = tmp;
1386 }
1387 
1388 /* heap algorithm ==>> */
1389 
1390 /*
1391  * See heap sort algorithm
1392  * "Raises" last element to its right place
1393  */
1394 static void
1395 sub_list_swim(struct sort_list **sl, int indx)
1396 {
1397 
1398 	if (indx > 0) {
1399 		int parent_index;
1400 
1401 		parent_index = (indx - 1) >> 1;
1402 
1403 		if (sub_list_cmp(sl[indx], sl[parent_index]) < 0) {
1404 			/* swap child and parent and continue */
1405 			sub_list_swap(sl, indx, parent_index);
1406 			sub_list_swim(sl, parent_index);
1407 		}
1408 	}
1409 }
1410 
1411 /*
1412  * Sink the top element to its correct position
1413  */
1414 static void
1415 sub_list_sink(struct sort_list **sl, int indx, int size)
1416 {
1417 	int left_child_index;
1418 	int right_child_index;
1419 
1420 	left_child_index = indx + indx + 1;
1421 	right_child_index = left_child_index + 1;
1422 
1423 	if (left_child_index < size) {
1424 		int min_child_index;
1425 
1426 		min_child_index = left_child_index;
1427 
1428 		if ((right_child_index < size) &&
1429 		    (sub_list_cmp(sl[left_child_index],
1430 		    sl[right_child_index]) > 0))
1431 			min_child_index = right_child_index;
1432 		if (sub_list_cmp(sl[indx], sl[min_child_index]) > 0) {
1433 			sub_list_swap(sl, indx, min_child_index);
1434 			sub_list_sink(sl, min_child_index, size);
1435 		}
1436 	}
1437 }
1438 
1439 /* <<== heap algorithm */
1440 
1441 /*
1442  * Adds element to the "right" end
1443  */
1444 static void
1445 sub_list_push(struct sort_list *s, struct sort_list **sl, int size)
1446 {
1447 
1448 	sl[size++] = s;
1449 	sub_list_swim(sl, size - 1);
1450 }
1451 
1452 struct last_printed_item
1453 {
1454 	struct sort_list_item *item;
1455 };
1456 
1457 /*
1458  * Prints the current line of the file
1459  */
1460 static void
1461 sub_list_header_print(struct sort_list *sl, FILE *f_out,
1462     struct last_printed_item *lp)
1463 {
1464 
1465 	if (sl && sl->count && f_out && sl->list[0]->str) {
1466 		if (sort_opts_vals.uflag) {
1467 			if ((lp->item == NULL) || (list_coll(&(lp->item),
1468 			    &(sl->list[0])))) {
1469 				bwsfwrite(sl->list[0]->str, f_out,
1470 				    sort_opts_vals.zflag);
1471 				lp->item = sl->list[0];
1472 			}
1473 		} else
1474 			bwsfwrite(sl->list[0]->str, f_out,
1475 			    sort_opts_vals.zflag);
1476 	}
1477 }
1478 
1479 /*
1480  * Read next line
1481  */
1482 static void
1483 sub_list_next(struct sort_list *sl)
1484 {
1485 
1486 	if (sl && sl->count) {
1487 		sl->list += 1;
1488 		sl->count -= 1;
1489 	}
1490 }
1491 
1492 /*
1493  * Merge sub-lists to a file
1494  */
1495 static void
1496 merge_sub_lists(struct sort_list **sl, size_t n, FILE* f_out)
1497 {
1498 	struct last_printed_item lp;
1499 	size_t i;
1500 
1501 	memset(&lp,0,sizeof(lp));
1502 
1503 	/* construct the initial list: */
1504 	for (i = 0; i < n; i++)
1505 		sub_list_push(sl[i], sl, i);
1506 
1507 	while (sl[0]->count) { /* unfinished lists are always in front */
1508 		/* output the smallest line: */
1509 		sub_list_header_print(sl[0], f_out, &lp);
1510 		/* move to a new line, if possible: */
1511 		sub_list_next(sl[0]);
1512 		/* re-arrange the list: */
1513 		sub_list_sink(sl, 0, n);
1514 	}
1515 }
1516 
1517 /*
1518  * Merge sub-lists to a file
1519  */
1520 static void
1521 merge_list_parts(struct sort_list **parts, size_t n, const char *fn)
1522 {
1523 	FILE* f_out;
1524 
1525 	f_out = openfile(fn,"w");
1526 
1527 	merge_sub_lists(parts, n, f_out);
1528 
1529 	closefile(f_out, fn);
1530 }
1531 
1532 #endif /* defined(SORT_THREADS) */
1533 /*
1534  * Multi-threaded sort algorithm "driver"
1535  */
1536 static void
1537 mt_sort(struct sort_list *list,
1538     int(*sort_func)(void *, size_t, size_t, int(*)(const void *, const void *)),
1539     const char* fn)
1540 {
1541 #if defined(SORT_THREADS)
1542 	if (nthreads < 2 || list->count < nthreads) {
1543 #endif
1544 		/* if single thread or small data, do simple sort */
1545 		sort_func(list->list, list->count,
1546 		    sizeof(struct sort_list_item *),
1547 		    (int(*)(const void *, const void *)) list_coll);
1548 		sort_list_dump(list, fn);
1549 #if defined(SORT_THREADS)
1550 	} else {
1551 		/* multi-threaded sort */
1552 		struct sort_list **parts;
1553 		size_t avgsize, cstart, i;
1554 
1555 		/* array of sub-lists */
1556 		parts = sort_malloc(sizeof(struct sort_list*) * nthreads);
1557 		cstart = 0;
1558 		avgsize = list->count / nthreads;
1559 
1560 		/* set global system sort function */
1561 		g_sort_func = sort_func;
1562 
1563 		/* set sublists */
1564 		for (i = 0; i < nthreads; ++i) {
1565 			size_t sz = 0;
1566 
1567 			parts[i] = sort_malloc(sizeof(struct sort_list));
1568 			parts[i]->list = list->list + cstart;
1569 			parts[i]->memsize = 0;
1570 			parts[i]->sub_list_pos = i;
1571 
1572 			sz = (i == nthreads - 1) ? list->count - cstart :
1573 			    avgsize;
1574 
1575 			parts[i]->count = sz;
1576 
1577 			parts[i]->size = parts[i]->count;
1578 
1579 			cstart += sz;
1580 		}
1581 
1582 		/* init threads counting semaphore */
1583 		sem_init(&mtsem, 0, 0);
1584 
1585 		/* start threads */
1586 		for (i = 0; i < nthreads; ++i) {
1587 			pthread_t pth;
1588 			pthread_attr_t attr;
1589 
1590 			pthread_attr_init(&attr);
1591 			pthread_attr_setdetachstate(&attr, PTHREAD_DETACHED);
1592 
1593 			pthread_create(&pth, &attr, mt_sort_thread, parts[i]);
1594 
1595 			pthread_attr_destroy(&attr);
1596 		}
1597 
1598 		/* wait for threads completion */
1599 		for (i = 0; i < nthreads; ++i) {
1600 			sem_wait(&mtsem);
1601 		}
1602 		/* destroy the semaphore - we do not need it anymore */
1603 		sem_destroy(&mtsem);
1604 
1605 		/* merge sorted sub-lists to the file */
1606 		merge_list_parts(parts, nthreads, fn);
1607 
1608 		/* free sub-lists data */
1609 		for (i = 0; i < nthreads; ++i) {
1610 			sort_free(parts[i]);
1611 		}
1612 		sort_free(parts);
1613 	}
1614 #endif /* defined(SORT_THREADS) */
1615 }
1616