xref: /illumos-gate/usr/src/cmd/sort/types.h (revision 101e15b5f8a77d9433805e541996abaabc9ca8c1)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License, Version 1.0 only
6  * (the "License").  You may not use this file except in compliance
7  * with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or http://www.opensolaris.org/os/licensing.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 /*
23  * Copyright 1998-2003 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #ifndef	_SORT_TYPES_H
28 #define	_SORT_TYPES_H
29 
30 #ifdef	__cplusplus
31 extern "C" {
32 #endif
33 
34 #include <sys/resource.h>
35 #include <sys/types.h>
36 #include <limits.h>
37 #include <stdio.h>
38 #include <stdlib.h>
39 
40 typedef	int flag_t;
41 
42 typedef	int (*cmp_fcn_t)(void *, void *, flag_t);
43 
44 typedef union vchar {
45 	char	sc;
46 	uchar_t	usc;
47 	wchar_t	wc;
48 } vchar_t;
49 
50 typedef union vcharptr {
51 	char	*sp;
52 	uchar_t	*usp;
53 	wchar_t *wp;
54 } vcharptr_t;
55 
56 typedef struct line_rec {
57 	vcharptr_t l_data;		/* raw data */
58 	vcharptr_t l_raw_collate;	/* collatable raw data */
59 	vcharptr_t l_collate;		/* key-ordered collatable string */
60 	ssize_t	l_data_length;
61 	ssize_t	l_collate_length;
62 	ssize_t	l_collate_bufsize;
63 } line_rec_t;
64 
65 enum field_species {
66 	ALPHA,
67 	MONTH,
68 	NUMERIC
69 };
70 
71 #define	FIELD_DICTIONARY_ORDER		0x1
72 #define	FIELD_FOLD_UPPERCASE		0x2
73 #define	FIELD_IGNORE_NONPRINTABLES	0x4
74 #define	FIELD_IGNORE_BLANKS_START	0x8
75 #define	FIELD_IGNORE_BLANKS_END		0x10
76 
77 #define	FIELD_REVERSE_COMPARISONS	0x20
78 
79 #define	FIELD_MODIFIERS_DEFINED		0x40
80 
81 typedef struct field {
82 	struct field		*f_next;
83 
84 	/*
85 	 * field ops vector
86 	 */
87 	ssize_t			(*f_convert)(struct field *, line_rec_t *,
88 	    vchar_t, ssize_t, ssize_t, ssize_t);
89 	enum field_species	f_species;
90 
91 	/*
92 	 * starting and ending fields, and offsets
93 	 */
94 	int			f_start_field;
95 	ssize_t			f_start_offset;
96 
97 	int			f_end_field;
98 	ssize_t			f_end_offset;
99 
100 	flag_t			f_options;
101 } field_t;
102 
103 #define	STREAM_SOURCE_MASK	0x000f
104 #define	STREAM_NO_SOURCE	0x0000
105 #define	STREAM_ARRAY		0x0001
106 #define	STREAM_MMAP		0x0002
107 #define	STREAM_SINGLE		0x0004
108 #define	STREAM_WIDE		0x0008
109 
110 #define	STREAM_OPEN		0x0010
111 #define	STREAM_PRIMED		0x0020
112 
113 #define	STREAM_OUTPUT		0x0040
114 #define	STREAM_EOS_REACHED	0x0080
115 #define	STREAM_NOTFILE		0x0100
116 #define	STREAM_UNIQUE		0x0200
117 #define	STREAM_INSTANT		0x0400
118 #define	STREAM_TEMPORARY	0x0800
119 #define	STREAM_NOT_FREEABLE	0x1000
120 
121 #define	DEFAULT_INPUT_SIZE	(1 * MEGABYTE)
122 #define	DEFAULT_RELEASE_SIZE	(MEGABYTE / 2)
123 
124 #define	CHAR_AVG_LINE	32
125 #define	WCHAR_AVG_LINE	(sizeof (wchar_t) * CHAR_AVG_LINE)
126 #define	XFRM_MULTIPLIER	8
127 
128 #define	NEXT_LINE_COMPLETE	0x0
129 #define	NEXT_LINE_INCOMPLETE	0x1
130 
131 #define	PRIME_SUCCEEDED		0x0
132 #define	PRIME_FAILED_EMPTY_FILE	0x1
133 #define	PRIME_FAILED		0x2
134 
135 typedef struct stream_array {
136 	line_rec_t	**s_array;
137 	ssize_t		s_array_size;
138 	ssize_t		s_cur_index;
139 } stream_array_t;
140 
141 typedef struct stream_simple_file {
142 	/*
143 	 * stream_simple_file_t is used for STREAM_MMAP and for STREAM_OUTPUT
144 	 * for either single- (STREAM_SINGLE | STREAM_OUTPUT) or multi-byte
145 	 * (STREAM_WIDE | STREAM_OUTPUT) locales.
146 	 */
147 	int		s_fd;			/* file descriptor */
148 	caddr_t		s_release_origin;	/* start for next madvise(3C) */
149 } stream_simple_file_t;
150 
151 typedef struct stream_buffered_file {
152 	/*
153 	 * stream_buffered_file_t is used for both STREAM_STDIO and
154 	 * STREAM_WIDE.
155 	 */
156 	FILE		*s_fp;			/* file stream */
157 	void		*s_vbuf;		/* stdio alternate buffer */
158 	size_t		s_bytes_used;
159 } stream_buffered_file_t;
160 
161 typedef union stream_type {
162 	stream_array_t		LA;	/* array of line records */
163 	stream_simple_file_t	SF;	/* file accessed via mmap */
164 	stream_buffered_file_t	BF;	/* file accessed via stdio */
165 } stream_type_t;
166 
167 struct stream;
168 
169 typedef struct stream_ops {
170 	int	(*sop_is_closable)(struct stream *);
171 	int	(*sop_close)(struct stream *);
172 	int	(*sop_eos)(struct stream *);
173 	ssize_t	(*sop_fetch)(struct stream *);
174 	void	(*sop_flush)(struct stream *);
175 	int	(*sop_free)(struct stream *);
176 	int	(*sop_open_for_write)(struct stream *);
177 	int	(*sop_prime)(struct stream *);
178 	void	(*sop_put_line)(struct stream *, line_rec_t *);
179 	void	(*sop_release_line)(struct stream *);
180 	void	(*sop_send_eol)(struct stream *);
181 	int	(*sop_unlink)(struct stream *);
182 } stream_ops_t;
183 
184 #define	SOP_IS_CLOSABLE(s)	((s)->s_ops.sop_is_closable)(s)
185 #define	SOP_CLOSE(s)		((s)->s_ops.sop_close)(s)
186 #define	SOP_EOS(s)		((s)->s_ops.sop_eos)(s)
187 #define	SOP_FETCH(s)		((s)->s_ops.sop_fetch)(s)
188 #define	SOP_FLUSH(s)		((s)->s_ops.sop_flush)(s)
189 #define	SOP_FREE(s)		((s)->s_ops.sop_free)(s)
190 #define	SOP_OPEN_FOR_WRITE(s)	((s)->s_ops.sop_open_for_write)(s)
191 #define	SOP_PRIME(s)		((s)->s_ops.sop_prime)(s)
192 #define	SOP_PUT_LINE(s, l)	((s)->s_ops.sop_put_line)(s, l)
193 #define	SOP_RELEASE_LINE(s)	((s)->s_ops.sop_release_line)(s)
194 #define	SOP_SEND_EOL(s)		((s)->s_ops.sop_send_eol)(s)
195 #define	SOP_UNLINK(s)		((s)->s_ops.sop_unlink)(s)
196 
197 /*
198  * The stream_t type is provided to simplify access to files, particularly for
199  * external merges.
200  */
201 typedef struct stream {
202 	struct stream	*s_consumer;	/* dependent on s_buffer */
203 	struct stream	*s_previous;
204 	struct stream	*s_next;
205 
206 	char		*s_filename;
207 
208 	line_rec_t	s_current;	/* present line buffers */
209 	stream_ops_t	s_ops;		/* type-specific ops vector */
210 	stream_type_t	s_type;		/* type-specific attributes */
211 
212 	void		*s_buffer;
213 	size_t		s_buffer_size;
214 	off_t		s_filesize;
215 	size_t		s_element_size;
216 	flag_t		s_status;	/* flags */
217 	ino_t		s_ino;
218 	dev_t		s_dev;
219 } stream_t;
220 
221 /*
222  * sort(1) has, for debugging purposes, a primitive compile-time option to
223  * generate statistics of various operations executed during an invocation.
224  * These statistics are recorded in the following sort_statistics_t structure.
225  */
226 typedef struct sort_statistics {
227 	u_longlong_t	st_avail_mem;
228 	u_longlong_t	st_convert_reallocs;
229 	u_longlong_t	st_fetched_lines;
230 	u_longlong_t	st_insert_full_down;
231 	u_longlong_t	st_insert_full_input;
232 	u_longlong_t	st_insert_full_up;
233 	u_longlong_t	st_line_conversions;
234 	u_longlong_t	st_not_unique_lines;
235 	u_longlong_t	st_put_lines;
236 	u_longlong_t	st_put_temp_lines_internal;
237 	u_longlong_t	st_put_temp_lines_merge;
238 	u_longlong_t	st_put_unique_lines;
239 	u_longlong_t	st_shelved_lines;
240 	u_longlong_t	st_subfiles;		/* number of insertion sorts */
241 	u_longlong_t	st_swaps;
242 	u_longlong_t	st_tqs_calls;
243 
244 	uint_t		st_input_files;
245 	uint_t		st_merge_files;
246 } sort_statistics_t;
247 
248 typedef struct sort {
249 	stream_t	*m_input_streams;
250 	char		*m_output_filename;
251 
252 	stream_t	*m_temporary_streams;
253 	char		*m_tmpdir_template;
254 
255 	field_t		*m_fields_head;
256 
257 	cmp_fcn_t	m_compare_fn;
258 	ssize_t		(*m_coll_convert)(field_t *, line_rec_t *, flag_t,
259 	    vchar_t);
260 
261 	sort_statistics_t *m_stats;
262 	size_t		m_memory_limit;
263 	size_t		m_memory_available;
264 
265 	flag_t		m_check_if_sorted_only;
266 	flag_t		m_merge_only;
267 	flag_t		m_unique_lines;
268 	flag_t		m_entire_line;
269 
270 	enum field_species m_default_species;
271 	flag_t		m_field_options;
272 	vchar_t		m_field_separator;
273 
274 	flag_t		m_c_locale;
275 	flag_t		m_single_byte_locale;
276 	flag_t		m_input_from_stdin;
277 	flag_t		m_output_to_stdout;
278 	flag_t		m_verbose;
279 } sort_t;
280 
281 #ifdef	__cplusplus
282 }
283 #endif
284 
285 #endif	/* _SORT_TYPES_H */
286