xref: /titanic_52/usr/src/cmd/sort/common/streams_wide.c (revision bc37da3aa8455efcf567c456746e3fb9d7f0a189)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #pragma ident	"%Z%%M%	%I%	%E% SMI"
27 
28 #include "streams_wide.h"
29 #include "streams_common.h"
30 
31 #define	WIDE_VBUF_SIZE	(64 * KILOBYTE)
32 
33 #define	SHELF_OCCUPIED	1
34 #define	SHELF_VACANT	0
35 static int shelf = SHELF_VACANT;
36 
37 /*
38  * Wide character streams implementation
39  *
40  *   The wide character streams implementation is, for the most part, a
41  *   reimplementation of the stdio streams implementation, using wide character
42  *   string routines.  However, fgetws(3C) retains the newline that fgets(3C)
43  *   discards while reading a complete line.  As a result, the wide character
44  *   routines need to guard against coincidental exhaustion of the buffer, as
45  *   well as overwriting the end-of-line character and correcting the
46  *   l_data_length field.
47  */
48 
49 static int
50 stream_wide_prime(stream_t *str)
51 {
52 	stream_buffered_file_t *BF = &(str->s_type.BF);
53 	wchar_t *current_position;
54 	wchar_t *end_of_buffer;
55 	wchar_t *next_nl;
56 
57 	ASSERT(!(str->s_status & STREAM_OUTPUT));
58 	ASSERT(str->s_status & STREAM_OPEN);
59 
60 	if (str->s_status & STREAM_INSTANT && (str->s_buffer == NULL)) {
61 		str->s_buffer = xzmap(0, WIDE_VBUF_SIZE, PROT_READ |
62 		    PROT_WRITE, MAP_PRIVATE, 0);
63 		if (str->s_buffer == MAP_FAILED)
64 			die(EMSG_MMAP);
65 		str->s_buffer_size = WIDE_VBUF_SIZE;
66 	}
67 
68 	ASSERT(str->s_buffer != NULL);
69 
70 	if (stream_is_primed(str)) {
71 		int shelf_state = shelf;
72 
73 		ASSERT(str->s_current.l_data_length >= -1);
74 		(void) memcpy(str->s_buffer, str->s_current.l_data.wp,
75 		    (str->s_current.l_data_length + 1) * sizeof (wchar_t));
76 		str->s_current.l_data.wp = str->s_buffer;
77 
78 		if ((str->s_current.l_data_length == -1 ||
79 		    shelf_state == SHELF_OCCUPIED ||
80 		    *(str->s_current.l_data.wp +
81 		    str->s_current.l_data_length) != L'\0') &&
82 		    SOP_FETCH(str) == NEXT_LINE_INCOMPLETE &&
83 		    shelf_state == SHELF_OCCUPIED)
84 			die(EMSG_MEMORY);
85 
86 		return (PRIME_SUCCEEDED);
87 	}
88 
89 	stream_set(str, STREAM_PRIMED);
90 
91 	current_position = (wchar_t *)str->s_buffer;
92 	/*LINTED ALIGNMENT*/
93 	end_of_buffer = (wchar_t *)((char *)str->s_buffer +
94 	    str->s_buffer_size);
95 
96 	trip_eof(BF->s_fp);
97 	if (!feof(BF->s_fp))
98 		(void) fgetws(current_position, end_of_buffer
99 		    - current_position, BF->s_fp);
100 	else {
101 		stream_set(str, STREAM_EOS_REACHED);
102 		stream_unset(str, STREAM_PRIMED);
103 		return (PRIME_FAILED_EMPTY_FILE);
104 	}
105 
106 	str->s_current.l_data.wp = current_position;
107 	next_nl = xmemwchar(current_position, L'\n', end_of_buffer -
108 	    current_position);
109 	if (next_nl == NULL) {
110 		warn(WMSG_NEWLINE_ADDED, str->s_filename);
111 		str->s_current.l_data_length = MIN(wslen(current_position),
112 		    end_of_buffer - current_position);
113 	} else {
114 		str->s_current.l_data_length = next_nl - current_position;
115 	}
116 	*(str->s_current.l_data.wp + str->s_current.l_data_length) = L'\0';
117 
118 	str->s_current.l_collate.wp = NULL;
119 	str->s_current.l_collate_length = 0;
120 
121 	__S(stats_incr_fetches());
122 	return (PRIME_SUCCEEDED);
123 }
124 
125 static ssize_t
126 stream_wide_fetch(stream_t *str)
127 {
128 	ssize_t dist_to_buf_end;
129 	int ret_val;
130 	wchar_t *graft_pt;
131 	wchar_t *next_nl;
132 
133 	ASSERT(str->s_status & STREAM_OPEN);
134 	ASSERT((str->s_status & STREAM_EOS_REACHED) == 0);
135 
136 	graft_pt = str->s_current.l_data.wp + str->s_current.l_data_length + 1;
137 
138 	if (shelf == SHELF_VACANT)
139 		str->s_current.l_data.wp = graft_pt;
140 	else if (str->s_current.l_data_length > -1)
141 		graft_pt--;
142 
143 	dist_to_buf_end = str->s_buffer_size / sizeof (wchar_t) - (graft_pt -
144 	    (wchar_t *)str->s_buffer);
145 
146 	if (dist_to_buf_end <= 1) {
147 		str->s_current.l_data_length = -1;
148 		return (NEXT_LINE_INCOMPLETE);
149 	}
150 
151 	if (fgetws(graft_pt, dist_to_buf_end, str->s_type.BF.s_fp) == NULL) {
152 		if (feof(str->s_type.BF.s_fp))
153 			stream_set(str, STREAM_EOS_REACHED);
154 		else
155 			die(EMSG_READ, str->s_filename);
156 	}
157 
158 	trip_eof(str->s_type.BF.s_fp);
159 	if ((next_nl = xmemwchar(str->s_current.l_data.wp, L'\n',
160 	    dist_to_buf_end)) == NULL) {
161 		str->s_current.l_data_length =
162 		    MIN(wslen(str->s_current.l_data.wp), dist_to_buf_end);
163 	} else {
164 		str->s_current.l_data_length = next_nl -
165 		    str->s_current.l_data.wp;
166 	}
167 
168 	str->s_current.l_collate_length = 0;
169 
170 	if (*(str->s_current.l_data.wp + str->s_current.l_data_length) !=
171 	    L'\n') {
172 		if (!feof(str->s_type.BF.s_fp)) {
173 			if (shelf == SHELF_OCCUPIED)
174 				die(EMSG_MEMORY);
175 
176 			shelf = SHELF_OCCUPIED;
177 			ret_val = NEXT_LINE_INCOMPLETE;
178 			__S(stats_incr_shelves());
179 		} else {
180 			stream_set(str, STREAM_EOS_REACHED);
181 			warn(WMSG_NEWLINE_ADDED, str->s_filename);
182 		}
183 	} else {
184 		shelf = SHELF_VACANT;
185 		ret_val = NEXT_LINE_COMPLETE;
186 		*(str->s_current.l_data.wp + str->s_current.l_data_length) =
187 		    L'\0';
188 		__S(stats_incr_fetches());
189 	}
190 
191 	return (ret_val);
192 }
193 
194 ssize_t
195 stream_wide_fetch_overwrite(stream_t *str)
196 {
197 	ssize_t dist_to_buf_end;
198 
199 	ASSERT(str->s_status & STREAM_OPEN);
200 	ASSERT((str->s_status & STREAM_EOS_REACHED) == 0);
201 
202 	str->s_current.l_data.wp = str->s_buffer;
203 	dist_to_buf_end = str->s_buffer_size / sizeof (wchar_t);
204 
205 	if (fgetws(str->s_current.l_data.wp, dist_to_buf_end,
206 	    str->s_type.BF.s_fp) == NULL) {
207 		if (feof(str->s_type.BF.s_fp))
208 			stream_set(str, STREAM_EOS_REACHED);
209 		else
210 			die(EMSG_READ, str->s_filename);
211 	}
212 
213 	trip_eof(str->s_type.BF.s_fp);
214 	str->s_current.l_data_length = wslen(str->s_current.l_data.wp) - 1;
215 	str->s_current.l_collate_length = 0;
216 
217 	if (str->s_current.l_data_length == -1 ||
218 	    *(str->s_current.l_data.wp + str->s_current.l_data_length) !=
219 	    L'\n') {
220 		if (!feof(str->s_type.BF.s_fp)) {
221 			die(EMSG_MEMORY);
222 		} else {
223 			stream_set(str, STREAM_EOS_REACHED);
224 			warn(WMSG_NEWLINE_ADDED, str->s_filename);
225 			str->s_current.l_data_length++;
226 		}
227 	}
228 
229 	*(str->s_current.l_data.wp + str->s_current.l_data_length) = L'\0';
230 
231 	__S(stats_incr_fetches());
232 	return (NEXT_LINE_COMPLETE);
233 }
234 
235 static void
236 stream_wide_send_eol(stream_t *str)
237 {
238 	wchar_t w_crlf[2] = { L'\n', L'\0' };
239 
240 	ASSERT(str->s_status & STREAM_OPEN);
241 	ASSERT(str->s_status & STREAM_OUTPUT);
242 
243 	if (wxwrite(str->s_type.SF.s_fd, w_crlf) < 0)
244 		die(EMSG_WRITE, str->s_filename);
245 }
246 
247 static void
248 stream_wide_put_line(stream_t *str, line_rec_t *line)
249 {
250 	ASSERT(str->s_status & STREAM_OPEN);
251 	ASSERT(str->s_status & STREAM_OUTPUT);
252 
253 	if (line->l_data_length >= 0) {
254 		if (wxwrite(str->s_type.SF.s_fd, line->l_data.wp) >= 0) {
255 			stream_wide_send_eol(str);
256 			__S(stats_incr_puts());
257 		} else
258 			die(EMSG_WRITE, str->s_filename);
259 	}
260 	safe_free(line->l_raw_collate.wp);
261 	line->l_raw_collate.wp = NULL;
262 }
263 
264 void
265 stream_wide_put_line_unique(stream_t *str, line_rec_t *line)
266 {
267 	static line_rec_t pvs;
268 	static size_t collate_buf_len;
269 
270 	ASSERT(str->s_status & STREAM_OPEN);
271 	ASSERT(str->s_status & STREAM_OUTPUT);
272 
273 	if ((pvs.l_collate.sp == NULL ||
274 	    collated_wide(&pvs, line, 0, COLL_UNIQUE) != 0) &&
275 	    line->l_data_length >= 0) {
276 		stream_wide_put_line(str, line);
277 
278 		if (line->l_collate_length + sizeof (wchar_t) >
279 		    collate_buf_len) {
280 			pvs.l_collate.sp = safe_realloc(pvs.l_collate.sp,
281 			    line->l_collate_length + sizeof (wchar_t));
282 			collate_buf_len = line->l_collate_length +
283 			    sizeof (wchar_t);
284 		}
285 
286 		(void) memcpy(pvs.l_collate.sp, line->l_collate.sp,
287 		    line->l_collate_length);
288 		/* LINTED ALIGNMENT */
289 		*(wchar_t *)(pvs.l_collate.sp + line->l_collate_length) = L'\0';
290 		pvs.l_collate_length = line->l_collate_length;
291 	}
292 }
293 
294 static int
295 stream_wide_eos(stream_t *str)
296 {
297 	int retval = 0;
298 
299 	if (str == NULL || str->s_status & STREAM_EOS_REACHED)
300 		return (1);
301 
302 	trip_eof(str->s_type.BF.s_fp);
303 	if (feof(str->s_type.BF.s_fp) &&
304 	    shelf == SHELF_VACANT &&
305 	    str->s_current.l_collate_length != -1) {
306 		retval = 1;
307 		stream_set(str, STREAM_EOS_REACHED);
308 	}
309 
310 	return (retval);
311 }
312 
313 /*ARGSUSED*/
314 static void
315 stream_wide_release_line(stream_t *str)
316 {
317 }
318 
319 const stream_ops_t stream_wide_ops = {
320 	stream_stdio_is_closable,
321 	stream_stdio_close,
322 	stream_wide_eos,
323 	stream_wide_fetch,
324 	stream_stdio_flush,
325 	stream_stdio_free,
326 	stream_stdio_open_for_write,
327 	stream_wide_prime,
328 	stream_wide_put_line,
329 	stream_wide_release_line,
330 	stream_wide_send_eol,
331 	stream_stdio_unlink
332 };
333