xref: /titanic_52/usr/src/cmd/sort/common/check.c (revision 7c478bd95313f5f23a4c958a745db2134aa03244)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License, Version 1.0 only
6  * (the "License").  You may not use this file except in compliance
7  * with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or http://www.opensolaris.org/os/licensing.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 /*
23  * Copyright 1998-2003 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #pragma ident	"%Z%%M%	%I%	%E% SMI"
28 
29 #include "check.h"
30 
31 #ifndef DEBUG
32 #define	MSG_DISORDER		gettext("sort: disorder: ")
33 #define	MSG_NONUNIQUE		gettext("sort: non-unique: ")
34 #else /* DEBUG */
35 #define	MSG_DISORDER		gettext("sort: disorder (%llu): ")
36 #define	MSG_NONUNIQUE		gettext("sort: non-unique (%llu): ")
37 #endif /* DEBUG */
38 
39 #define	CHECK_FAILURE_DISORDER	0x1
40 #define	CHECK_FAILURE_NONUNIQUE	0x2
41 #define	CHECK_WIDE		0x4
42 
43 static void
44 fail_check(line_rec_t *L, int flags, u_longlong_t lineno)
45 {
46 	char *line;
47 	ssize_t length;
48 
49 	if (flags & CHECK_WIDE) {
50 		if ((length = (ssize_t)wcstombs(NULL, L->l_data.wp, 0)) < 0)
51 			die(EMSG_ILLEGAL_CHAR);
52 
53 		/*
54 		 * +1 for null character
55 		 */
56 		line = alloca(length + 1);
57 		(void) wcstombs(line, L->l_data.wp, L->l_data_length);
58 		line[length] = '\0';
59 	} else {
60 		line = L->l_data.sp;
61 		length = L->l_data_length;
62 	}
63 
64 	if (flags & CHECK_FAILURE_DISORDER) {
65 		(void) fprintf(stderr, MSG_DISORDER, lineno);
66 		(void) write(fileno(stderr), line, length);
67 		(void) fprintf(stderr, "\n");
68 		return;
69 	}
70 
71 	(void) fprintf(stderr, MSG_NONUNIQUE);
72 	(void) write(fileno(stderr), line, length);
73 	(void) fprintf(stderr, "\n");
74 }
75 
76 static void
77 swap_coll_bufs(line_rec_t *A, line_rec_t *B)
78 {
79 	char *coll_buffer = B->l_collate.sp;
80 	ssize_t coll_bufsize = B->l_collate_bufsize;
81 
82 	safe_free(B->l_raw_collate.sp);
83 	copy_line_rec(A, B);
84 
85 	A->l_collate.sp = coll_buffer;
86 	A->l_collate_bufsize = coll_bufsize;
87 	A->l_raw_collate.sp = NULL;
88 }
89 
90 /*
91  * check_if_sorted() interacts with a stream in a slightly different way than a
92  * simple sort or a merge operation:  the check involves looking at two adjacent
93  * lines of the file and verifying that they are collated according to the key
94  * specifiers given.  For files accessed via mmap(), this is simply done as the
95  * entirety of the file is present in the address space.  For files accessed via
96  * stdio, regardless of locale, we must be able to guarantee that two lines are
97  * present in memory at once.  The basic buffer code for stdio does not make
98  * such a guarantee, so we use stream_swap_buffer() to alternate between two
99  * input buffers.
100  */
101 void
102 check_if_sorted(sort_t *S)
103 {
104 	size_t input_mem;
105 	int numerator, denominator;
106 
107 	char *data_buffer = NULL;
108 	size_t data_bufsize = 0;
109 	line_rec_t last_line;
110 	u_longlong_t lineno = 0;
111 	int r;
112 	int swap_required;
113 	flag_t coll_flags;
114 	stream_t *cur_streamp = S->m_input_streams;
115 
116 	ssize_t (*conversion_fcn)(field_t *, line_rec_t *, flag_t, vchar_t) =
117 	    field_convert;
118 	int (*collation_fcn)(line_rec_t *, line_rec_t *, ssize_t, flag_t) =
119 	    collated;
120 
121 	set_memory_ratio(S, &numerator, &denominator);
122 
123 	if (stream_open_for_read(S, cur_streamp) > 1)
124 		die(EMSG_CHECK);
125 
126 	if (SOP_EOS(cur_streamp))
127 		exit(E_SUCCESS);
128 
129 	(void) memset(&last_line, 0, sizeof (line_rec_t));
130 
131 	/*
132 	 * We need to swap data buffers for the stream with each fetch, except
133 	 * on STREAM_MMAP (which are implicitly STREAM_SUSTAIN).
134 	 */
135 	swap_required = !(cur_streamp->s_status & STREAM_MMAP);
136 	if (swap_required) {
137 		stream_set(cur_streamp, STREAM_INSTANT);
138 		/*
139 		 * We use one half of the available memory for input, half for
140 		 * each buffer.  (The other half is left unreserved, in case
141 		 * conversions to collatable form require it.)
142 		 */
143 		input_mem = numerator * S->m_memory_available / denominator / 4;
144 
145 		stream_set_size(cur_streamp, input_mem);
146 		stream_swap_buffer(cur_streamp, &data_buffer, &data_bufsize);
147 		stream_set_size(cur_streamp, input_mem);
148 
149 		if (cur_streamp->s_status & STREAM_WIDE) {
150 			conversion_fcn = field_convert_wide;
151 			collation_fcn = collated_wide;
152 		}
153 	}
154 
155 	if (SOP_PRIME(cur_streamp) > 1)
156 		die(EMSG_CHECK);
157 
158 	if (S->m_field_options & FIELD_REVERSE_COMPARISONS)
159 		coll_flags = COLL_REVERSE;
160 	else
161 		coll_flags = 0;
162 	if (S->m_unique_lines)
163 		coll_flags |= COLL_UNIQUE;
164 
165 	cur_streamp->s_current.l_collate_bufsize = INITIAL_COLLATION_SIZE
166 	    * cur_streamp->s_element_size;
167 	cur_streamp->s_current.l_collate.sp = safe_realloc(NULL,
168 	    cur_streamp->s_current.l_collate_bufsize);
169 	cur_streamp->s_current.l_raw_collate.sp = NULL;
170 
171 	last_line.l_collate_bufsize = INITIAL_COLLATION_SIZE *
172 	    cur_streamp->s_element_size;
173 	last_line.l_collate.sp = safe_realloc(NULL,
174 	    last_line.l_collate_bufsize);
175 	last_line.l_raw_collate.sp = NULL;
176 
177 	(void) conversion_fcn(S->m_fields_head, &cur_streamp->s_current,
178 	    FCV_REALLOC, S->m_field_separator);
179 
180 	swap_coll_bufs(&cur_streamp->s_current, &last_line);
181 	if (swap_required)
182 		stream_swap_buffer(cur_streamp, &data_buffer, &data_bufsize);
183 
184 	while (!SOP_EOS(cur_streamp)) {
185 		(void) SOP_FETCH(cur_streamp);
186 		lineno++;
187 
188 		(void) conversion_fcn(S->m_fields_head, &cur_streamp->s_current,
189 		    FCV_REALLOC, S->m_field_separator);
190 
191 		r = collation_fcn(&last_line, &cur_streamp->s_current, 0,
192 		    coll_flags);
193 
194 		if (r < 0 || (r == 0 && S->m_unique_lines == 0)) {
195 			swap_coll_bufs(&cur_streamp->s_current, &last_line);
196 			if (swap_required)
197 				stream_swap_buffer(cur_streamp, &data_buffer,
198 				    &data_bufsize);
199 			continue;
200 		}
201 
202 		if (r > 0) {
203 #ifndef	XPG4
204 			fail_check(&cur_streamp->s_current,
205 			    CHECK_FAILURE_DISORDER |
206 			    (S->m_single_byte_locale ? 0 : CHECK_WIDE),
207 			    lineno);
208 #endif /* XPG4 */
209 			exit(E_FAILED_CHECK);
210 		}
211 
212 		if (r == 0 && S->m_unique_lines != 0) {
213 #ifndef	XPG4
214 			fail_check(&cur_streamp->s_current,
215 			    CHECK_FAILURE_NONUNIQUE |
216 			    (S->m_single_byte_locale ? 0 : CHECK_WIDE),
217 			    lineno);
218 #endif /* XPG4 */
219 			exit(E_FAILED_CHECK);
220 		}
221 	}
222 
223 	exit(E_SUCCESS);
224 	/*NOTREACHED*/
225 }
226