xref: /illumos-gate/usr/src/cmd/sort/streams_wide.c (revision 8119dad84d6416f13557b0ba8e2aaf9064cbcfd3)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #include "streams_wide.h"
27 #include "streams_common.h"
28 
29 #define	WIDE_VBUF_SIZE	(64 * KILOBYTE)
30 
31 #define	SHELF_OCCUPIED	1
32 #define	SHELF_VACANT	0
33 static int shelf = SHELF_VACANT;
34 
35 /*
36  * Wide character streams implementation
37  *
38  *   The wide character streams implementation is, for the most part, a
39  *   reimplementation of the stdio streams implementation, using wide character
40  *   string routines.  However, fgetws(3C) retains the newline that fgets(3C)
41  *   discards while reading a complete line.  As a result, the wide character
42  *   routines need to guard against coincidental exhaustion of the buffer, as
43  *   well as overwriting the end-of-line character and correcting the
44  *   l_data_length field.
45  */
46 
47 static int
48 stream_wide_prime(stream_t *str)
49 {
50 	stream_buffered_file_t *BF = &(str->s_type.BF);
51 	wchar_t *current_position;
52 	wchar_t *end_of_buffer;
53 	wchar_t *next_nl;
54 
55 	ASSERT(!(str->s_status & STREAM_OUTPUT));
56 	ASSERT(str->s_status & STREAM_OPEN);
57 
58 	if (str->s_status & STREAM_INSTANT && (str->s_buffer == NULL)) {
59 		str->s_buffer = xzmap(0, WIDE_VBUF_SIZE, PROT_READ |
60 		    PROT_WRITE, MAP_PRIVATE, 0);
61 		if (str->s_buffer == MAP_FAILED)
62 			die(EMSG_MMAP);
63 		str->s_buffer_size = WIDE_VBUF_SIZE;
64 	}
65 
66 	ASSERT(str->s_buffer != NULL);
67 
68 	if (stream_is_primed(str)) {
69 		int shelf_state = shelf;
70 
71 		ASSERT(str->s_current.l_data_length >= -1);
72 		(void) memcpy(str->s_buffer, str->s_current.l_data.wp,
73 		    (str->s_current.l_data_length + 1) * sizeof (wchar_t));
74 		str->s_current.l_data.wp = str->s_buffer;
75 
76 		if ((str->s_current.l_data_length == -1 ||
77 		    shelf_state == SHELF_OCCUPIED ||
78 		    *(str->s_current.l_data.wp +
79 		    str->s_current.l_data_length) != L'\0') &&
80 		    SOP_FETCH(str) == NEXT_LINE_INCOMPLETE &&
81 		    shelf_state == SHELF_OCCUPIED)
82 			die(EMSG_MEMORY);
83 
84 		return (PRIME_SUCCEEDED);
85 	}
86 
87 	stream_set(str, STREAM_PRIMED);
88 
89 	current_position = (wchar_t *)str->s_buffer;
90 	/*LINTED ALIGNMENT*/
91 	end_of_buffer = (wchar_t *)((char *)str->s_buffer +
92 	    str->s_buffer_size);
93 
94 	trip_eof(BF->s_fp);
95 	if (!feof(BF->s_fp))
96 		(void) fgetws(current_position, end_of_buffer
97 		    - current_position, BF->s_fp);
98 	else {
99 		stream_set(str, STREAM_EOS_REACHED);
100 		stream_unset(str, STREAM_PRIMED);
101 		return (PRIME_FAILED_EMPTY_FILE);
102 	}
103 
104 	str->s_current.l_data.wp = current_position;
105 	next_nl = xmemwchar(current_position, L'\n', end_of_buffer -
106 	    current_position);
107 	if (next_nl == NULL) {
108 		warn(WMSG_NEWLINE_ADDED, str->s_filename);
109 		str->s_current.l_data_length = MIN(wslen(current_position),
110 		    end_of_buffer - current_position);
111 	} else {
112 		str->s_current.l_data_length = next_nl - current_position;
113 	}
114 	*(str->s_current.l_data.wp + str->s_current.l_data_length) = L'\0';
115 
116 	str->s_current.l_collate.wp = NULL;
117 	str->s_current.l_collate_length = 0;
118 
119 	__S(stats_incr_fetches());
120 	return (PRIME_SUCCEEDED);
121 }
122 
123 static ssize_t
124 stream_wide_fetch(stream_t *str)
125 {
126 	ssize_t dist_to_buf_end;
127 	int ret_val;
128 	wchar_t *graft_pt;
129 	wchar_t *next_nl;
130 
131 	ASSERT(str->s_status & STREAM_OPEN);
132 	ASSERT((str->s_status & STREAM_EOS_REACHED) == 0);
133 
134 	graft_pt = str->s_current.l_data.wp + str->s_current.l_data_length + 1;
135 
136 	if (shelf == SHELF_VACANT)
137 		str->s_current.l_data.wp = graft_pt;
138 	else if (str->s_current.l_data_length > -1)
139 		graft_pt--;
140 
141 	dist_to_buf_end = str->s_buffer_size / sizeof (wchar_t) - (graft_pt -
142 	    (wchar_t *)str->s_buffer);
143 
144 	if (dist_to_buf_end <= 1) {
145 		str->s_current.l_data_length = -1;
146 		return (NEXT_LINE_INCOMPLETE);
147 	}
148 
149 	if (fgetws(graft_pt, dist_to_buf_end, str->s_type.BF.s_fp) == NULL) {
150 		if (feof(str->s_type.BF.s_fp))
151 			stream_set(str, STREAM_EOS_REACHED);
152 		else
153 			die(EMSG_READ, str->s_filename);
154 	}
155 
156 	trip_eof(str->s_type.BF.s_fp);
157 	if ((next_nl = xmemwchar(str->s_current.l_data.wp, L'\n',
158 	    dist_to_buf_end)) == NULL) {
159 		str->s_current.l_data_length =
160 		    MIN(wslen(str->s_current.l_data.wp), dist_to_buf_end);
161 	} else {
162 		str->s_current.l_data_length = next_nl -
163 		    str->s_current.l_data.wp;
164 	}
165 
166 	str->s_current.l_collate_length = 0;
167 
168 	if (*(str->s_current.l_data.wp + str->s_current.l_data_length) !=
169 	    L'\n') {
170 		if (!feof(str->s_type.BF.s_fp)) {
171 			if (shelf == SHELF_OCCUPIED)
172 				die(EMSG_MEMORY);
173 
174 			shelf = SHELF_OCCUPIED;
175 			ret_val = NEXT_LINE_INCOMPLETE;
176 			__S(stats_incr_shelves());
177 		} else {
178 			stream_set(str, STREAM_EOS_REACHED);
179 			warn(WMSG_NEWLINE_ADDED, str->s_filename);
180 		}
181 	} else {
182 		shelf = SHELF_VACANT;
183 		ret_val = NEXT_LINE_COMPLETE;
184 		*(str->s_current.l_data.wp + str->s_current.l_data_length) =
185 		    L'\0';
186 		__S(stats_incr_fetches());
187 	}
188 
189 	return (ret_val);
190 }
191 
192 ssize_t
193 stream_wide_fetch_overwrite(stream_t *str)
194 {
195 	ssize_t dist_to_buf_end;
196 
197 	ASSERT(str->s_status & STREAM_OPEN);
198 	ASSERT((str->s_status & STREAM_EOS_REACHED) == 0);
199 
200 	str->s_current.l_data.wp = str->s_buffer;
201 	dist_to_buf_end = str->s_buffer_size / sizeof (wchar_t);
202 
203 	if (fgetws(str->s_current.l_data.wp, dist_to_buf_end,
204 	    str->s_type.BF.s_fp) == NULL) {
205 		if (feof(str->s_type.BF.s_fp))
206 			stream_set(str, STREAM_EOS_REACHED);
207 		else
208 			die(EMSG_READ, str->s_filename);
209 	}
210 
211 	trip_eof(str->s_type.BF.s_fp);
212 	str->s_current.l_data_length = wslen(str->s_current.l_data.wp) - 1;
213 	str->s_current.l_collate_length = 0;
214 
215 	if (str->s_current.l_data_length == -1 ||
216 	    *(str->s_current.l_data.wp + str->s_current.l_data_length) !=
217 	    L'\n') {
218 		if (!feof(str->s_type.BF.s_fp)) {
219 			die(EMSG_MEMORY);
220 		} else {
221 			stream_set(str, STREAM_EOS_REACHED);
222 			warn(WMSG_NEWLINE_ADDED, str->s_filename);
223 			str->s_current.l_data_length++;
224 		}
225 	}
226 
227 	*(str->s_current.l_data.wp + str->s_current.l_data_length) = L'\0';
228 
229 	__S(stats_incr_fetches());
230 	return (NEXT_LINE_COMPLETE);
231 }
232 
233 static void
234 stream_wide_send_eol(stream_t *str)
235 {
236 	wchar_t w_crlf[2] = { L'\n', L'\0' };
237 
238 	ASSERT(str->s_status & STREAM_OPEN);
239 	ASSERT(str->s_status & STREAM_OUTPUT);
240 
241 	if (wxwrite(str->s_type.SF.s_fd, w_crlf) < 0)
242 		die(EMSG_WRITE, str->s_filename);
243 }
244 
245 static void
246 stream_wide_put_line(stream_t *str, line_rec_t *line)
247 {
248 	ASSERT(str->s_status & STREAM_OPEN);
249 	ASSERT(str->s_status & STREAM_OUTPUT);
250 
251 	if (line->l_data_length >= 0) {
252 		if (wxwrite(str->s_type.SF.s_fd, line->l_data.wp) >= 0) {
253 			stream_wide_send_eol(str);
254 			__S(stats_incr_puts());
255 		} else
256 			die(EMSG_WRITE, str->s_filename);
257 	}
258 	safe_free(line->l_raw_collate.wp);
259 	line->l_raw_collate.wp = NULL;
260 }
261 
262 void
263 stream_wide_put_line_unique(stream_t *str, line_rec_t *line)
264 {
265 	static line_rec_t pvs;
266 	static size_t collate_buf_len;
267 
268 	ASSERT(str->s_status & STREAM_OPEN);
269 	ASSERT(str->s_status & STREAM_OUTPUT);
270 
271 	if ((pvs.l_collate.sp == NULL ||
272 	    collated_wide(&pvs, line, 0, COLL_UNIQUE) != 0) &&
273 	    line->l_data_length >= 0) {
274 		stream_wide_put_line(str, line);
275 
276 		if (line->l_collate_length + sizeof (wchar_t) >
277 		    collate_buf_len) {
278 			pvs.l_collate.sp = safe_realloc(pvs.l_collate.sp,
279 			    line->l_collate_length + sizeof (wchar_t));
280 			collate_buf_len = line->l_collate_length +
281 			    sizeof (wchar_t);
282 		}
283 
284 		(void) memcpy(pvs.l_collate.sp, line->l_collate.sp,
285 		    line->l_collate_length);
286 		/* LINTED ALIGNMENT */
287 		*(wchar_t *)(pvs.l_collate.sp + line->l_collate_length) = L'\0';
288 		pvs.l_collate_length = line->l_collate_length;
289 	}
290 }
291 
292 static int
293 stream_wide_eos(stream_t *str)
294 {
295 	int retval = 0;
296 
297 	if (str == NULL || str->s_status & STREAM_EOS_REACHED)
298 		return (1);
299 
300 	trip_eof(str->s_type.BF.s_fp);
301 	if (feof(str->s_type.BF.s_fp) &&
302 	    shelf == SHELF_VACANT &&
303 	    str->s_current.l_collate_length != -1) {
304 		retval = 1;
305 		stream_set(str, STREAM_EOS_REACHED);
306 	}
307 
308 	return (retval);
309 }
310 
311 /*ARGSUSED*/
312 static void
313 stream_wide_release_line(stream_t *str)
314 {
315 }
316 
317 const stream_ops_t stream_wide_ops = {
318 	stream_stdio_is_closable,
319 	stream_stdio_close,
320 	stream_wide_eos,
321 	stream_wide_fetch,
322 	stream_stdio_flush,
323 	stream_stdio_free,
324 	stream_stdio_open_for_write,
325 	stream_wide_prime,
326 	stream_wide_put_line,
327 	stream_wide_release_line,
328 	stream_wide_send_eol,
329 	stream_stdio_unlink
330 };
331