xref: /illumos-gate/usr/src/cmd/sort/main.c (revision 0f9b8dcfdb872a210003f6b077d091b793c24a6e)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License, Version 1.0 only
6  * (the "License").  You may not use this file except in compliance
7  * with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or http://www.opensolaris.org/os/licensing.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 /*
23  * Copyright 1998-2003 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 /*
28  * Overview of sort(1)
29  *
30  * sort(1) implements a robust sorting program, compliant with the POSIX
31  * specifications for sort, that is capable of handling large sorts and merges
32  * in single byte and multibyte locales.  Like most sort(1) implementations,
33  * this implementation uses an internal algorithm for sorting subsets of the
34  * requested data set and an external algorithm for sorting the subsets into the
35  * final output.  In the current implementation, the internal algorithm is a
36  * ternary radix quicksort, modified from the algorithm described in Bentley and
37  * Sedgewick [1], while the external algorithm is a priority-queue based
38  * heapsort, as outlined in Sedgewick [2].
39  *
40  * We use three major datatypes, defined in ./types.h: the line record,
41  * line_rec_t; the stream, stream_t; and the field definition, field_t.
42  * Because sort supports efficient code paths for each of the C, single-byte,
43  * and wide character/multibyte locales, each of these types contains unions
44  * and/or function pointers to describe appropriate properties or operations for
45  * each locale type.
46  *
47  * To utilize the radix quicksort algorithm with the potentially complex sort
48  * keys definable via the POSIX standard, we convert each line to a collatable
49  * string based on the key definition.  This approach is somewhat different from
50  * historical implementations of sort(1), which have built a complex
51  * field-by-field comparison function.  There are, of course, tradeoffs that
52  * accompany this decision, particularly when the duration of use of a given
53  * collated form is short.  However, the maintenance costs of parallel
54  * conversion and collation functions are estimated to be high, and the
55  * performance costs of a shared set of functions were found to be excessive in
56  * prototype.
57  *
58  * [1]	J. Bentley and R. Sedgewick, Fast Algorithms for Sorting and Searching
59  *	Strings, in Eighth Annual ACM-SIAM Symposium on Discrete Algorithms,
60  *	1997 (SODA 1997),
61  * [2]	R. Sedgewick, Algorithms in C, 3rd ed., vol. 1, Addison-Wesley, 1998.
62  */
63 
64 #include "main.h"
65 
66 static sort_t S;
67 
68 int
69 main(int argc, char *argv[])
70 {
71 	initialize_pre(&S);
72 
73 	if (options(&S, argc, argv))
74 		return (2);
75 
76 	initialize_post(&S);
77 
78 	if (S.m_check_if_sorted_only)
79 		check_if_sorted(&S);
80 
81 	if (!S.m_merge_only)
82 		internal_sort(&S);
83 
84 	merge(&S);
85 
86 	return (0);
87 }
88