xref: /titanic_50/usr/src/cmd/sort/common/types.h (revision 7c478bd95313f5f23a4c958a745db2134aa03244)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License, Version 1.0 only
6  * (the "License").  You may not use this file except in compliance
7  * with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or http://www.opensolaris.org/os/licensing.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 /*
23  * Copyright 1998-2003 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #ifndef	_SORT_TYPES_H
28 #define	_SORT_TYPES_H
29 
30 #pragma ident	"%Z%%M%	%I%	%E% SMI"
31 
32 #ifdef	__cplusplus
33 extern "C" {
34 #endif
35 
36 #include <sys/resource.h>
37 #include <sys/types.h>
38 #include <limits.h>
39 #include <stdio.h>
40 #include <stdlib.h>
41 
42 typedef	int flag_t;
43 
44 typedef	int (*cmp_fcn_t)(void *, void *, flag_t);
45 
46 typedef union vchar {
47 	char	sc;
48 	uchar_t	usc;
49 	wchar_t	wc;
50 } vchar_t;
51 
52 typedef union vcharptr {
53 	char	*sp;
54 	uchar_t	*usp;
55 	wchar_t *wp;
56 } vcharptr_t;
57 
58 typedef struct line_rec {
59 	vcharptr_t l_data;		/* raw data */
60 	vcharptr_t l_raw_collate;	/* collatable raw data */
61 	vcharptr_t l_collate;		/* key-ordered collatable string */
62 	ssize_t	l_data_length;
63 	ssize_t	l_collate_length;
64 	ssize_t	l_collate_bufsize;
65 } line_rec_t;
66 
67 enum field_species {
68 	ALPHA,
69 	MONTH,
70 	NUMERIC
71 };
72 
73 #define	FIELD_DICTIONARY_ORDER		0x1
74 #define	FIELD_FOLD_UPPERCASE		0x2
75 #define	FIELD_IGNORE_NONPRINTABLES	0x4
76 #define	FIELD_IGNORE_BLANKS_START	0x8
77 #define	FIELD_IGNORE_BLANKS_END		0x10
78 
79 #define	FIELD_REVERSE_COMPARISONS	0x20
80 
81 #define	FIELD_MODIFIERS_DEFINED		0x40
82 
83 typedef struct field {
84 	struct field		*f_next;
85 
86 	/*
87 	 * field ops vector
88 	 */
89 	ssize_t			(*f_convert)(struct field *, line_rec_t *,
90 	    vchar_t, ssize_t, ssize_t, ssize_t);
91 	enum field_species	f_species;
92 
93 	/*
94 	 * starting and ending fields, and offsets
95 	 */
96 	int			f_start_field;
97 	ssize_t			f_start_offset;
98 
99 	int			f_end_field;
100 	ssize_t			f_end_offset;
101 
102 	flag_t			f_options;
103 } field_t;
104 
105 #define	STREAM_SOURCE_MASK	0x000f
106 #define	STREAM_NO_SOURCE	0x0000
107 #define	STREAM_ARRAY		0x0001
108 #define	STREAM_MMAP		0x0002
109 #define	STREAM_SINGLE		0x0004
110 #define	STREAM_WIDE		0x0008
111 
112 #define	STREAM_OPEN		0x0010
113 #define	STREAM_PRIMED		0x0020
114 
115 #define	STREAM_OUTPUT		0x0040
116 #define	STREAM_EOS_REACHED	0x0080
117 #define	STREAM_NOTFILE		0x0100
118 #define	STREAM_UNIQUE		0x0200
119 #define	STREAM_INSTANT		0x0400
120 #define	STREAM_TEMPORARY	0x0800
121 #define	STREAM_NOT_FREEABLE	0x1000
122 
123 #define	DEFAULT_INPUT_SIZE	(1 * MEGABYTE)
124 #define	DEFAULT_RELEASE_SIZE	(MEGABYTE / 2)
125 
126 #define	CHAR_AVG_LINE	32
127 #define	WCHAR_AVG_LINE	(sizeof (wchar_t) * CHAR_AVG_LINE)
128 #define	XFRM_MULTIPLIER	8
129 
130 #define	NEXT_LINE_COMPLETE	0x0
131 #define	NEXT_LINE_INCOMPLETE	0x1
132 
133 #define	PRIME_SUCCEEDED		0x0
134 #define	PRIME_FAILED_EMPTY_FILE	0x1
135 #define	PRIME_FAILED		0x2
136 
137 typedef struct stream_array {
138 	line_rec_t	**s_array;
139 	ssize_t		s_array_size;
140 	ssize_t		s_cur_index;
141 } stream_array_t;
142 
143 typedef struct stream_simple_file {
144 	/*
145 	 * stream_simple_file_t is used for STREAM_MMAP and for STREAM_OUTPUT
146 	 * for either single- (STREAM_SINGLE | STREAM_OUTPUT) or multi-byte
147 	 * (STREAM_WIDE | STREAM_OUTPUT) locales.
148 	 */
149 	int		s_fd;			/* file descriptor */
150 	caddr_t		s_release_origin;	/* start for next madvise(3C) */
151 } stream_simple_file_t;
152 
153 typedef struct stream_buffered_file {
154 	/*
155 	 * stream_buffered_file_t is used for both STREAM_STDIO and
156 	 * STREAM_WIDE.
157 	 */
158 	FILE		*s_fp;			/* file stream */
159 	void		*s_vbuf;		/* stdio alternate buffer */
160 	size_t		s_bytes_used;
161 } stream_buffered_file_t;
162 
163 typedef union stream_type {
164 	stream_array_t		LA;	/* array of line records */
165 	stream_simple_file_t	SF;	/* file accessed via mmap */
166 	stream_buffered_file_t	BF;	/* file accessed via stdio */
167 } stream_type_t;
168 
169 struct stream;
170 
171 typedef struct stream_ops {
172 	int	(*sop_is_closable)(struct stream *);
173 	int	(*sop_close)(struct stream *);
174 	int	(*sop_eos)(struct stream *);
175 	ssize_t	(*sop_fetch)(struct stream *);
176 	void	(*sop_flush)(struct stream *);
177 	int	(*sop_free)(struct stream *);
178 	int	(*sop_open_for_write)(struct stream *);
179 	int	(*sop_prime)(struct stream *);
180 	void	(*sop_put_line)(struct stream *, line_rec_t *);
181 	void	(*sop_release_line)(struct stream *);
182 	void	(*sop_send_eol)(struct stream *);
183 	int	(*sop_unlink)(struct stream *);
184 } stream_ops_t;
185 
186 #define	SOP_IS_CLOSABLE(s)	((s)->s_ops.sop_is_closable)(s)
187 #define	SOP_CLOSE(s)		((s)->s_ops.sop_close)(s)
188 #define	SOP_EOS(s)		((s)->s_ops.sop_eos)(s)
189 #define	SOP_FETCH(s)		((s)->s_ops.sop_fetch)(s)
190 #define	SOP_FLUSH(s)		((s)->s_ops.sop_flush)(s)
191 #define	SOP_FREE(s)		((s)->s_ops.sop_free)(s)
192 #define	SOP_OPEN_FOR_WRITE(s)	((s)->s_ops.sop_open_for_write)(s)
193 #define	SOP_PRIME(s)		((s)->s_ops.sop_prime)(s)
194 #define	SOP_PUT_LINE(s, l)	((s)->s_ops.sop_put_line)(s, l)
195 #define	SOP_RELEASE_LINE(s)	((s)->s_ops.sop_release_line)(s)
196 #define	SOP_SEND_EOL(s)		((s)->s_ops.sop_send_eol)(s)
197 #define	SOP_UNLINK(s)		((s)->s_ops.sop_unlink)(s)
198 
199 /*
200  * The stream_t type is provided to simplify access to files, particularly for
201  * external merges.
202  */
203 typedef struct stream {
204 	struct stream	*s_consumer;	/* dependent on s_buffer */
205 	struct stream	*s_previous;
206 	struct stream	*s_next;
207 
208 	char		*s_filename;
209 
210 	line_rec_t	s_current;	/* present line buffers */
211 	stream_ops_t	s_ops;		/* type-specific ops vector */
212 	stream_type_t	s_type;		/* type-specific attributes */
213 
214 	void		*s_buffer;
215 	size_t		s_buffer_size;
216 	off_t		s_filesize;
217 	size_t		s_element_size;
218 	flag_t		s_status;	/* flags */
219 	ino_t		s_ino;
220 	dev_t		s_dev;
221 } stream_t;
222 
223 /*
224  * sort(1) has, for debugging purposes, a primitive compile-time option to
225  * generate statistics of various operations executed during an invocation.
226  * These statistics are recorded in the following sort_statistics_t structure.
227  */
228 typedef struct sort_statistics {
229 	u_longlong_t	st_avail_mem;
230 	u_longlong_t	st_convert_reallocs;
231 	u_longlong_t	st_fetched_lines;
232 	u_longlong_t	st_insert_full_down;
233 	u_longlong_t	st_insert_full_input;
234 	u_longlong_t	st_insert_full_up;
235 	u_longlong_t	st_line_conversions;
236 	u_longlong_t	st_not_unique_lines;
237 	u_longlong_t	st_put_lines;
238 	u_longlong_t	st_put_temp_lines_internal;
239 	u_longlong_t	st_put_temp_lines_merge;
240 	u_longlong_t	st_put_unique_lines;
241 	u_longlong_t	st_shelved_lines;
242 	u_longlong_t	st_subfiles;		/* number of insertion sorts */
243 	u_longlong_t	st_swaps;
244 	u_longlong_t	st_tqs_calls;
245 
246 	uint_t		st_input_files;
247 	uint_t		st_merge_files;
248 } sort_statistics_t;
249 
250 typedef struct sort {
251 	stream_t	*m_input_streams;
252 	char		*m_output_filename;
253 
254 	stream_t	*m_temporary_streams;
255 	char		*m_tmpdir_template;
256 
257 	field_t		*m_fields_head;
258 
259 	cmp_fcn_t	m_compare_fn;
260 	ssize_t		(*m_coll_convert)(field_t *, line_rec_t *, flag_t,
261 	    vchar_t);
262 
263 	sort_statistics_t *m_stats;
264 	size_t		m_memory_limit;
265 	size_t		m_memory_available;
266 
267 	flag_t		m_check_if_sorted_only;
268 	flag_t		m_merge_only;
269 	flag_t		m_unique_lines;
270 	flag_t		m_entire_line;
271 
272 	enum field_species m_default_species;
273 	flag_t		m_field_options;
274 	vchar_t		m_field_separator;
275 
276 	flag_t		m_c_locale;
277 	flag_t		m_single_byte_locale;
278 	flag_t		m_input_from_stdin;
279 	flag_t		m_output_to_stdout;
280 	flag_t		m_verbose;
281 } sort_t;
282 
283 #ifdef	__cplusplus
284 }
285 #endif
286 
287 #endif	/* _SORT_TYPES_H */
288