1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License, Version 1.0 only
6 * (the "License"). You may not use this file except in compliance
7 * with the License.
8 *
9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10 * or http://www.opensolaris.org/os/licensing.
11 * See the License for the specific language governing permissions
12 * and limitations under the License.
13 *
14 * When distributing Covered Code, include this CDDL HEADER in each
15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16 * If applicable, add the following below this CDDL HEADER, with the
17 * fields enclosed by brackets "[]" replaced with your own identifying
18 * information: Portions Copyright [yyyy] [name of copyright owner]
19 *
20 * CDDL HEADER END
21 */
22 /*
23 * Copyright (c) 1996, by Sun Microsystems, Inc.
24 * All rights reserved.
25 */
26
27 /*
28 * wio_get.c
29 *
30 * Wide I/O Library
31 *
32 * Copyright 1990, 1995 by Mortice Kern Systems Inc. All rights reserved.
33 *
34 */
35
36 #if M_RCSID
37 #ifndef lint
38 static char rcsID[] = "$Header: /rd/src/libc/wide/rcs/wio_get.c 1.3 1995/07/26 17:50:45 ant Exp $";
39 #endif
40 #endif
41
42 #include <mks.h>
43 #include <errno.h>
44 #include <m_wio.h>
45
46 #ifdef M_I18N_LOCKING_SHIFT
47 /*
48 * Eat one or more shift-out and/or shift-in bytes.
49 * Return non-zero if an error occured on the stream.
50 * The stream's input state is updated accordingly.
51 *
52 * NOTE this function assumes that the shift-in and
53 * shift-out are bytes.
54 */
55 static int
eat_shift_bytes(wio)56 eat_shift_bytes(wio)
57 t_wide_io *wio;
58 {
59 char mb;
60 int ch, prev;
61 mbstate_t start_state;
62
63 for (prev = EOF; (ch = (*wio->get)(wio->object)) != EOF; prev = ch) {
64 /* Was it an insignificant shift byte, SI-SI or SO-SO? */
65 if (ch != prev) {
66 /* First iteration will always enter here looking
67 * for a state change. Subsequent iterations entering
68 * here are trying to identify redundant shifts, which
69 * are SO-SI or SI-SO pairs.
70 */
71 mb = (char) ch;
72 start_state = wio->_state;
73
74 /* Convert byte and identify a state change. */
75 if (mbrtowc((wchar_t *) 0, &mb, 1, &wio->_state) == -1
76 || mbsinit(&start_state) == mbsinit(&wio->_state)) {
77 /* Encoding error or no state change. */
78 if (wio->get != (int (*)(int, void *)) 0)
79 (void) (*wio->unget)(ch, wio->object);
80 wio->_state = start_state;
81 break;
82 }
83 }
84 }
85
86 if (wio->iserror != (int (*)(void *)) 0)
87 return !(*wio->iserror)(wio->object);
88
89 return 0;
90 }
91 #endif /* M_I18N_LOCKING_SHIFT */
92
93 /*
94 * Return a wide character or WEOF for EOF or error.
95 *
96 * The function referenced by "get" is passed the pointer "object"
97 * and returns an input byte or EOF if no further data available.
98 *
99 * This mechanism is used to do conversions of byte strings or
100 * streams into wide characters without loss of information in the
101 * case of a bad multibyte character conversion. The bad multibyte
102 * sequence is passed through as individual bytes.
103 */
104 wint_t
m_wio_get(wio)105 m_wio_get(wio)
106 t_wide_io *wio;
107 {
108 int ch;
109 wchar_t wc;
110 mbstate_t start_state;
111 static mbstate_t initial_state = { 0 };
112
113 if (wio == (t_wide_io *) 0 || wio->get == (int (*)(void *)) 0) {
114 errno = EINVAL;
115 return -1;
116 }
117
118 /* Do still have bytes available? */
119 if (wio->_next < wio->_size)
120 return (wint_t) wio->_mb[wio->_next++];
121
122 /* Read in enough bytes to convert a multibyte character. */
123 wio->_size = 0;
124 start_state = wio->_state;
125 for (wio->_next = 0; wio->_next < MB_CUR_MAX; ) {
126 if ((ch = (*wio->get)(wio->object)) == EOF)
127 break;
128
129 wio->_mb[wio->_next] = ch;
130
131 /* Attempt to convert multibyte character sequence. */
132 wio->_size = mbrtowc(
133 &wc, (char *) (wio->_mb + wio->_next), 1, &wio->_state
134 );
135
136 ++wio->_next;
137
138 if (0 <= wio->_size) {
139 #ifdef M_I18N_LOCKING_SHIFT
140 /* Only eat shift bytes within a line, since in line
141 * canonical mode, attempting to eat shift bytes
142 * following a <newline> causes another read().
143 */
144 if (ch != '\n') {
145 /* When a valid character is found, consume
146 * any trailing shift-in or shift-out bytes,
147 * updating the state accordingly.
148 */
149 (void) eat_shift_bytes(wio);
150 }
151 #endif /* M_I18N_LOCKING_SHIFT */
152
153 /* Remember the number of bytes converted. */
154 wio->_size = wio->_next;
155
156 return (wint_t) wc;
157 }
158 }
159
160 /* If we fill the multibyte character buffer or receive an
161 * EOF without recognising a multibyte character, then we
162 * will return individual bytes from the buffer. The buffer
163 * is restored to its state before the bogus byte sequence
164 * was read.
165 */
166 wio->_state = start_state;
167 wio->_size = wio->_next;
168 wio->_next = 0;
169
170 return 0 < wio->_size ? (wint_t) wio->_mb[wio->_next++] : WEOF;
171 }
172
173
174