xref: /illumos-gate/usr/src/cmd/sort/types.h (revision 101e15b5f8a77d9433805e541996abaabc9ca8c1)
1*101e15b5SRichard Lowe /*
2*101e15b5SRichard Lowe  * CDDL HEADER START
3*101e15b5SRichard Lowe  *
4*101e15b5SRichard Lowe  * The contents of this file are subject to the terms of the
5*101e15b5SRichard Lowe  * Common Development and Distribution License, Version 1.0 only
6*101e15b5SRichard Lowe  * (the "License").  You may not use this file except in compliance
7*101e15b5SRichard Lowe  * with the License.
8*101e15b5SRichard Lowe  *
9*101e15b5SRichard Lowe  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10*101e15b5SRichard Lowe  * or http://www.opensolaris.org/os/licensing.
11*101e15b5SRichard Lowe  * See the License for the specific language governing permissions
12*101e15b5SRichard Lowe  * and limitations under the License.
13*101e15b5SRichard Lowe  *
14*101e15b5SRichard Lowe  * When distributing Covered Code, include this CDDL HEADER in each
15*101e15b5SRichard Lowe  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16*101e15b5SRichard Lowe  * If applicable, add the following below this CDDL HEADER, with the
17*101e15b5SRichard Lowe  * fields enclosed by brackets "[]" replaced with your own identifying
18*101e15b5SRichard Lowe  * information: Portions Copyright [yyyy] [name of copyright owner]
19*101e15b5SRichard Lowe  *
20*101e15b5SRichard Lowe  * CDDL HEADER END
21*101e15b5SRichard Lowe  */
22*101e15b5SRichard Lowe /*
23*101e15b5SRichard Lowe  * Copyright 1998-2003 Sun Microsystems, Inc.  All rights reserved.
24*101e15b5SRichard Lowe  * Use is subject to license terms.
25*101e15b5SRichard Lowe  */
26*101e15b5SRichard Lowe 
27*101e15b5SRichard Lowe #ifndef	_SORT_TYPES_H
28*101e15b5SRichard Lowe #define	_SORT_TYPES_H
29*101e15b5SRichard Lowe 
30*101e15b5SRichard Lowe #ifdef	__cplusplus
31*101e15b5SRichard Lowe extern "C" {
32*101e15b5SRichard Lowe #endif
33*101e15b5SRichard Lowe 
34*101e15b5SRichard Lowe #include <sys/resource.h>
35*101e15b5SRichard Lowe #include <sys/types.h>
36*101e15b5SRichard Lowe #include <limits.h>
37*101e15b5SRichard Lowe #include <stdio.h>
38*101e15b5SRichard Lowe #include <stdlib.h>
39*101e15b5SRichard Lowe 
40*101e15b5SRichard Lowe typedef	int flag_t;
41*101e15b5SRichard Lowe 
42*101e15b5SRichard Lowe typedef	int (*cmp_fcn_t)(void *, void *, flag_t);
43*101e15b5SRichard Lowe 
44*101e15b5SRichard Lowe typedef union vchar {
45*101e15b5SRichard Lowe 	char	sc;
46*101e15b5SRichard Lowe 	uchar_t	usc;
47*101e15b5SRichard Lowe 	wchar_t	wc;
48*101e15b5SRichard Lowe } vchar_t;
49*101e15b5SRichard Lowe 
50*101e15b5SRichard Lowe typedef union vcharptr {
51*101e15b5SRichard Lowe 	char	*sp;
52*101e15b5SRichard Lowe 	uchar_t	*usp;
53*101e15b5SRichard Lowe 	wchar_t *wp;
54*101e15b5SRichard Lowe } vcharptr_t;
55*101e15b5SRichard Lowe 
56*101e15b5SRichard Lowe typedef struct line_rec {
57*101e15b5SRichard Lowe 	vcharptr_t l_data;		/* raw data */
58*101e15b5SRichard Lowe 	vcharptr_t l_raw_collate;	/* collatable raw data */
59*101e15b5SRichard Lowe 	vcharptr_t l_collate;		/* key-ordered collatable string */
60*101e15b5SRichard Lowe 	ssize_t	l_data_length;
61*101e15b5SRichard Lowe 	ssize_t	l_collate_length;
62*101e15b5SRichard Lowe 	ssize_t	l_collate_bufsize;
63*101e15b5SRichard Lowe } line_rec_t;
64*101e15b5SRichard Lowe 
65*101e15b5SRichard Lowe enum field_species {
66*101e15b5SRichard Lowe 	ALPHA,
67*101e15b5SRichard Lowe 	MONTH,
68*101e15b5SRichard Lowe 	NUMERIC
69*101e15b5SRichard Lowe };
70*101e15b5SRichard Lowe 
71*101e15b5SRichard Lowe #define	FIELD_DICTIONARY_ORDER		0x1
72*101e15b5SRichard Lowe #define	FIELD_FOLD_UPPERCASE		0x2
73*101e15b5SRichard Lowe #define	FIELD_IGNORE_NONPRINTABLES	0x4
74*101e15b5SRichard Lowe #define	FIELD_IGNORE_BLANKS_START	0x8
75*101e15b5SRichard Lowe #define	FIELD_IGNORE_BLANKS_END		0x10
76*101e15b5SRichard Lowe 
77*101e15b5SRichard Lowe #define	FIELD_REVERSE_COMPARISONS	0x20
78*101e15b5SRichard Lowe 
79*101e15b5SRichard Lowe #define	FIELD_MODIFIERS_DEFINED		0x40
80*101e15b5SRichard Lowe 
81*101e15b5SRichard Lowe typedef struct field {
82*101e15b5SRichard Lowe 	struct field		*f_next;
83*101e15b5SRichard Lowe 
84*101e15b5SRichard Lowe 	/*
85*101e15b5SRichard Lowe 	 * field ops vector
86*101e15b5SRichard Lowe 	 */
87*101e15b5SRichard Lowe 	ssize_t			(*f_convert)(struct field *, line_rec_t *,
88*101e15b5SRichard Lowe 	    vchar_t, ssize_t, ssize_t, ssize_t);
89*101e15b5SRichard Lowe 	enum field_species	f_species;
90*101e15b5SRichard Lowe 
91*101e15b5SRichard Lowe 	/*
92*101e15b5SRichard Lowe 	 * starting and ending fields, and offsets
93*101e15b5SRichard Lowe 	 */
94*101e15b5SRichard Lowe 	int			f_start_field;
95*101e15b5SRichard Lowe 	ssize_t			f_start_offset;
96*101e15b5SRichard Lowe 
97*101e15b5SRichard Lowe 	int			f_end_field;
98*101e15b5SRichard Lowe 	ssize_t			f_end_offset;
99*101e15b5SRichard Lowe 
100*101e15b5SRichard Lowe 	flag_t			f_options;
101*101e15b5SRichard Lowe } field_t;
102*101e15b5SRichard Lowe 
103*101e15b5SRichard Lowe #define	STREAM_SOURCE_MASK	0x000f
104*101e15b5SRichard Lowe #define	STREAM_NO_SOURCE	0x0000
105*101e15b5SRichard Lowe #define	STREAM_ARRAY		0x0001
106*101e15b5SRichard Lowe #define	STREAM_MMAP		0x0002
107*101e15b5SRichard Lowe #define	STREAM_SINGLE		0x0004
108*101e15b5SRichard Lowe #define	STREAM_WIDE		0x0008
109*101e15b5SRichard Lowe 
110*101e15b5SRichard Lowe #define	STREAM_OPEN		0x0010
111*101e15b5SRichard Lowe #define	STREAM_PRIMED		0x0020
112*101e15b5SRichard Lowe 
113*101e15b5SRichard Lowe #define	STREAM_OUTPUT		0x0040
114*101e15b5SRichard Lowe #define	STREAM_EOS_REACHED	0x0080
115*101e15b5SRichard Lowe #define	STREAM_NOTFILE		0x0100
116*101e15b5SRichard Lowe #define	STREAM_UNIQUE		0x0200
117*101e15b5SRichard Lowe #define	STREAM_INSTANT		0x0400
118*101e15b5SRichard Lowe #define	STREAM_TEMPORARY	0x0800
119*101e15b5SRichard Lowe #define	STREAM_NOT_FREEABLE	0x1000
120*101e15b5SRichard Lowe 
121*101e15b5SRichard Lowe #define	DEFAULT_INPUT_SIZE	(1 * MEGABYTE)
122*101e15b5SRichard Lowe #define	DEFAULT_RELEASE_SIZE	(MEGABYTE / 2)
123*101e15b5SRichard Lowe 
124*101e15b5SRichard Lowe #define	CHAR_AVG_LINE	32
125*101e15b5SRichard Lowe #define	WCHAR_AVG_LINE	(sizeof (wchar_t) * CHAR_AVG_LINE)
126*101e15b5SRichard Lowe #define	XFRM_MULTIPLIER	8
127*101e15b5SRichard Lowe 
128*101e15b5SRichard Lowe #define	NEXT_LINE_COMPLETE	0x0
129*101e15b5SRichard Lowe #define	NEXT_LINE_INCOMPLETE	0x1
130*101e15b5SRichard Lowe 
131*101e15b5SRichard Lowe #define	PRIME_SUCCEEDED		0x0
132*101e15b5SRichard Lowe #define	PRIME_FAILED_EMPTY_FILE	0x1
133*101e15b5SRichard Lowe #define	PRIME_FAILED		0x2
134*101e15b5SRichard Lowe 
135*101e15b5SRichard Lowe typedef struct stream_array {
136*101e15b5SRichard Lowe 	line_rec_t	**s_array;
137*101e15b5SRichard Lowe 	ssize_t		s_array_size;
138*101e15b5SRichard Lowe 	ssize_t		s_cur_index;
139*101e15b5SRichard Lowe } stream_array_t;
140*101e15b5SRichard Lowe 
141*101e15b5SRichard Lowe typedef struct stream_simple_file {
142*101e15b5SRichard Lowe 	/*
143*101e15b5SRichard Lowe 	 * stream_simple_file_t is used for STREAM_MMAP and for STREAM_OUTPUT
144*101e15b5SRichard Lowe 	 * for either single- (STREAM_SINGLE | STREAM_OUTPUT) or multi-byte
145*101e15b5SRichard Lowe 	 * (STREAM_WIDE | STREAM_OUTPUT) locales.
146*101e15b5SRichard Lowe 	 */
147*101e15b5SRichard Lowe 	int		s_fd;			/* file descriptor */
148*101e15b5SRichard Lowe 	caddr_t		s_release_origin;	/* start for next madvise(3C) */
149*101e15b5SRichard Lowe } stream_simple_file_t;
150*101e15b5SRichard Lowe 
151*101e15b5SRichard Lowe typedef struct stream_buffered_file {
152*101e15b5SRichard Lowe 	/*
153*101e15b5SRichard Lowe 	 * stream_buffered_file_t is used for both STREAM_STDIO and
154*101e15b5SRichard Lowe 	 * STREAM_WIDE.
155*101e15b5SRichard Lowe 	 */
156*101e15b5SRichard Lowe 	FILE		*s_fp;			/* file stream */
157*101e15b5SRichard Lowe 	void		*s_vbuf;		/* stdio alternate buffer */
158*101e15b5SRichard Lowe 	size_t		s_bytes_used;
159*101e15b5SRichard Lowe } stream_buffered_file_t;
160*101e15b5SRichard Lowe 
161*101e15b5SRichard Lowe typedef union stream_type {
162*101e15b5SRichard Lowe 	stream_array_t		LA;	/* array of line records */
163*101e15b5SRichard Lowe 	stream_simple_file_t	SF;	/* file accessed via mmap */
164*101e15b5SRichard Lowe 	stream_buffered_file_t	BF;	/* file accessed via stdio */
165*101e15b5SRichard Lowe } stream_type_t;
166*101e15b5SRichard Lowe 
167*101e15b5SRichard Lowe struct stream;
168*101e15b5SRichard Lowe 
169*101e15b5SRichard Lowe typedef struct stream_ops {
170*101e15b5SRichard Lowe 	int	(*sop_is_closable)(struct stream *);
171*101e15b5SRichard Lowe 	int	(*sop_close)(struct stream *);
172*101e15b5SRichard Lowe 	int	(*sop_eos)(struct stream *);
173*101e15b5SRichard Lowe 	ssize_t	(*sop_fetch)(struct stream *);
174*101e15b5SRichard Lowe 	void	(*sop_flush)(struct stream *);
175*101e15b5SRichard Lowe 	int	(*sop_free)(struct stream *);
176*101e15b5SRichard Lowe 	int	(*sop_open_for_write)(struct stream *);
177*101e15b5SRichard Lowe 	int	(*sop_prime)(struct stream *);
178*101e15b5SRichard Lowe 	void	(*sop_put_line)(struct stream *, line_rec_t *);
179*101e15b5SRichard Lowe 	void	(*sop_release_line)(struct stream *);
180*101e15b5SRichard Lowe 	void	(*sop_send_eol)(struct stream *);
181*101e15b5SRichard Lowe 	int	(*sop_unlink)(struct stream *);
182*101e15b5SRichard Lowe } stream_ops_t;
183*101e15b5SRichard Lowe 
184*101e15b5SRichard Lowe #define	SOP_IS_CLOSABLE(s)	((s)->s_ops.sop_is_closable)(s)
185*101e15b5SRichard Lowe #define	SOP_CLOSE(s)		((s)->s_ops.sop_close)(s)
186*101e15b5SRichard Lowe #define	SOP_EOS(s)		((s)->s_ops.sop_eos)(s)
187*101e15b5SRichard Lowe #define	SOP_FETCH(s)		((s)->s_ops.sop_fetch)(s)
188*101e15b5SRichard Lowe #define	SOP_FLUSH(s)		((s)->s_ops.sop_flush)(s)
189*101e15b5SRichard Lowe #define	SOP_FREE(s)		((s)->s_ops.sop_free)(s)
190*101e15b5SRichard Lowe #define	SOP_OPEN_FOR_WRITE(s)	((s)->s_ops.sop_open_for_write)(s)
191*101e15b5SRichard Lowe #define	SOP_PRIME(s)		((s)->s_ops.sop_prime)(s)
192*101e15b5SRichard Lowe #define	SOP_PUT_LINE(s, l)	((s)->s_ops.sop_put_line)(s, l)
193*101e15b5SRichard Lowe #define	SOP_RELEASE_LINE(s)	((s)->s_ops.sop_release_line)(s)
194*101e15b5SRichard Lowe #define	SOP_SEND_EOL(s)		((s)->s_ops.sop_send_eol)(s)
195*101e15b5SRichard Lowe #define	SOP_UNLINK(s)		((s)->s_ops.sop_unlink)(s)
196*101e15b5SRichard Lowe 
197*101e15b5SRichard Lowe /*
198*101e15b5SRichard Lowe  * The stream_t type is provided to simplify access to files, particularly for
199*101e15b5SRichard Lowe  * external merges.
200*101e15b5SRichard Lowe  */
201*101e15b5SRichard Lowe typedef struct stream {
202*101e15b5SRichard Lowe 	struct stream	*s_consumer;	/* dependent on s_buffer */
203*101e15b5SRichard Lowe 	struct stream	*s_previous;
204*101e15b5SRichard Lowe 	struct stream	*s_next;
205*101e15b5SRichard Lowe 
206*101e15b5SRichard Lowe 	char		*s_filename;
207*101e15b5SRichard Lowe 
208*101e15b5SRichard Lowe 	line_rec_t	s_current;	/* present line buffers */
209*101e15b5SRichard Lowe 	stream_ops_t	s_ops;		/* type-specific ops vector */
210*101e15b5SRichard Lowe 	stream_type_t	s_type;		/* type-specific attributes */
211*101e15b5SRichard Lowe 
212*101e15b5SRichard Lowe 	void		*s_buffer;
213*101e15b5SRichard Lowe 	size_t		s_buffer_size;
214*101e15b5SRichard Lowe 	off_t		s_filesize;
215*101e15b5SRichard Lowe 	size_t		s_element_size;
216*101e15b5SRichard Lowe 	flag_t		s_status;	/* flags */
217*101e15b5SRichard Lowe 	ino_t		s_ino;
218*101e15b5SRichard Lowe 	dev_t		s_dev;
219*101e15b5SRichard Lowe } stream_t;
220*101e15b5SRichard Lowe 
221*101e15b5SRichard Lowe /*
222*101e15b5SRichard Lowe  * sort(1) has, for debugging purposes, a primitive compile-time option to
223*101e15b5SRichard Lowe  * generate statistics of various operations executed during an invocation.
224*101e15b5SRichard Lowe  * These statistics are recorded in the following sort_statistics_t structure.
225*101e15b5SRichard Lowe  */
226*101e15b5SRichard Lowe typedef struct sort_statistics {
227*101e15b5SRichard Lowe 	u_longlong_t	st_avail_mem;
228*101e15b5SRichard Lowe 	u_longlong_t	st_convert_reallocs;
229*101e15b5SRichard Lowe 	u_longlong_t	st_fetched_lines;
230*101e15b5SRichard Lowe 	u_longlong_t	st_insert_full_down;
231*101e15b5SRichard Lowe 	u_longlong_t	st_insert_full_input;
232*101e15b5SRichard Lowe 	u_longlong_t	st_insert_full_up;
233*101e15b5SRichard Lowe 	u_longlong_t	st_line_conversions;
234*101e15b5SRichard Lowe 	u_longlong_t	st_not_unique_lines;
235*101e15b5SRichard Lowe 	u_longlong_t	st_put_lines;
236*101e15b5SRichard Lowe 	u_longlong_t	st_put_temp_lines_internal;
237*101e15b5SRichard Lowe 	u_longlong_t	st_put_temp_lines_merge;
238*101e15b5SRichard Lowe 	u_longlong_t	st_put_unique_lines;
239*101e15b5SRichard Lowe 	u_longlong_t	st_shelved_lines;
240*101e15b5SRichard Lowe 	u_longlong_t	st_subfiles;		/* number of insertion sorts */
241*101e15b5SRichard Lowe 	u_longlong_t	st_swaps;
242*101e15b5SRichard Lowe 	u_longlong_t	st_tqs_calls;
243*101e15b5SRichard Lowe 
244*101e15b5SRichard Lowe 	uint_t		st_input_files;
245*101e15b5SRichard Lowe 	uint_t		st_merge_files;
246*101e15b5SRichard Lowe } sort_statistics_t;
247*101e15b5SRichard Lowe 
248*101e15b5SRichard Lowe typedef struct sort {
249*101e15b5SRichard Lowe 	stream_t	*m_input_streams;
250*101e15b5SRichard Lowe 	char		*m_output_filename;
251*101e15b5SRichard Lowe 
252*101e15b5SRichard Lowe 	stream_t	*m_temporary_streams;
253*101e15b5SRichard Lowe 	char		*m_tmpdir_template;
254*101e15b5SRichard Lowe 
255*101e15b5SRichard Lowe 	field_t		*m_fields_head;
256*101e15b5SRichard Lowe 
257*101e15b5SRichard Lowe 	cmp_fcn_t	m_compare_fn;
258*101e15b5SRichard Lowe 	ssize_t		(*m_coll_convert)(field_t *, line_rec_t *, flag_t,
259*101e15b5SRichard Lowe 	    vchar_t);
260*101e15b5SRichard Lowe 
261*101e15b5SRichard Lowe 	sort_statistics_t *m_stats;
262*101e15b5SRichard Lowe 	size_t		m_memory_limit;
263*101e15b5SRichard Lowe 	size_t		m_memory_available;
264*101e15b5SRichard Lowe 
265*101e15b5SRichard Lowe 	flag_t		m_check_if_sorted_only;
266*101e15b5SRichard Lowe 	flag_t		m_merge_only;
267*101e15b5SRichard Lowe 	flag_t		m_unique_lines;
268*101e15b5SRichard Lowe 	flag_t		m_entire_line;
269*101e15b5SRichard Lowe 
270*101e15b5SRichard Lowe 	enum field_species m_default_species;
271*101e15b5SRichard Lowe 	flag_t		m_field_options;
272*101e15b5SRichard Lowe 	vchar_t		m_field_separator;
273*101e15b5SRichard Lowe 
274*101e15b5SRichard Lowe 	flag_t		m_c_locale;
275*101e15b5SRichard Lowe 	flag_t		m_single_byte_locale;
276*101e15b5SRichard Lowe 	flag_t		m_input_from_stdin;
277*101e15b5SRichard Lowe 	flag_t		m_output_to_stdout;
278*101e15b5SRichard Lowe 	flag_t		m_verbose;
279*101e15b5SRichard Lowe } sort_t;
280*101e15b5SRichard Lowe 
281*101e15b5SRichard Lowe #ifdef	__cplusplus
282*101e15b5SRichard Lowe }
283*101e15b5SRichard Lowe #endif
284*101e15b5SRichard Lowe 
285*101e15b5SRichard Lowe #endif	/* _SORT_TYPES_H */
286