1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License, Version 1.0 only 6 * (the "License"). You may not use this file except in compliance 7 * with the License. 8 * 9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10 * or http://www.opensolaris.org/os/licensing. 11 * See the License for the specific language governing permissions 12 * and limitations under the License. 13 * 14 * When distributing Covered Code, include this CDDL HEADER in each 15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16 * If applicable, add the following below this CDDL HEADER, with the 17 * fields enclosed by brackets "[]" replaced with your own identifying 18 * information: Portions Copyright [yyyy] [name of copyright owner] 19 * 20 * CDDL HEADER END 21 */ 22 /* 23 * Copyright (c) 1996, by Sun Microsystems, Inc. 24 * All rights reserved. 25 */ 26 27 /* 28 * wio_get.c 29 * 30 * Wide I/O Library 31 * 32 * Copyright 1990, 1995 by Mortice Kern Systems Inc. All rights reserved. 33 * 34 */ 35 36 #if M_RCSID 37 #ifndef lint 38 static char rcsID[] = "$Header: /rd/src/libc/wide/rcs/wio_get.c 1.3 1995/07/26 17:50:45 ant Exp $"; 39 #endif 40 #endif 41 42 #include <mks.h> 43 #include <errno.h> 44 #include <m_wio.h> 45 46 #ifdef M_I18N_LOCKING_SHIFT 47 /* 48 * Eat one or more shift-out and/or shift-in bytes. 49 * Return non-zero if an error occured on the stream. 50 * The stream's input state is updated accordingly. 51 * 52 * NOTE this function assumes that the shift-in and 53 * shift-out are bytes. 54 */ 55 static int 56 eat_shift_bytes(wio) 57 t_wide_io *wio; 58 { 59 char mb; 60 int ch, prev; 61 mbstate_t start_state; 62 63 for (prev = EOF; (ch = (*wio->get)(wio->object)) != EOF; prev = ch) { 64 /* Was it an insignificant shift byte, SI-SI or SO-SO? */ 65 if (ch != prev) { 66 /* First iteration will always enter here looking 67 * for a state change. Subsequent iterations entering 68 * here are trying to identify redundant shifts, which 69 * are SO-SI or SI-SO pairs. 70 */ 71 mb = (char) ch; 72 start_state = wio->_state; 73 74 /* Convert byte and identify a state change. */ 75 if (mbrtowc((wchar_t *) 0, &mb, 1, &wio->_state) == -1 76 || mbsinit(&start_state) == mbsinit(&wio->_state)) { 77 /* Encoding error or no state change. */ 78 if (wio->get != (int (*)(int, void *)) 0) 79 (void) (*wio->unget)(ch, wio->object); 80 wio->_state = start_state; 81 break; 82 } 83 } 84 } 85 86 if (wio->iserror != (int (*)(void *)) 0) 87 return !(*wio->iserror)(wio->object); 88 89 return 0; 90 } 91 #endif /* M_I18N_LOCKING_SHIFT */ 92 93 /* 94 * Return a wide character or WEOF for EOF or error. 95 * 96 * The function referenced by "get" is passed the pointer "object" 97 * and returns an input byte or EOF if no further data available. 98 * 99 * This mechanism is used to do conversions of byte strings or 100 * streams into wide characters without loss of information in the 101 * case of a bad multibyte character conversion. The bad multibyte 102 * sequence is passed through as individual bytes. 103 */ 104 wint_t 105 m_wio_get(wio) 106 t_wide_io *wio; 107 { 108 int ch; 109 wchar_t wc; 110 mbstate_t start_state; 111 static mbstate_t initial_state = { 0 }; 112 113 if (wio == (t_wide_io *) 0 || wio->get == (int (*)(void *)) 0) { 114 errno = EINVAL; 115 return -1; 116 } 117 118 /* Do still have bytes available? */ 119 if (wio->_next < wio->_size) 120 return (wint_t) wio->_mb[wio->_next++]; 121 122 /* Read in enough bytes to convert a multibyte character. */ 123 wio->_size = 0; 124 start_state = wio->_state; 125 for (wio->_next = 0; wio->_next < MB_CUR_MAX; ) { 126 if ((ch = (*wio->get)(wio->object)) == EOF) 127 break; 128 129 wio->_mb[wio->_next] = ch; 130 131 /* Attempt to convert multibyte character sequence. */ 132 wio->_size = mbrtowc( 133 &wc, (char *) (wio->_mb + wio->_next), 1, &wio->_state 134 ); 135 136 ++wio->_next; 137 138 if (0 <= wio->_size) { 139 #ifdef M_I18N_LOCKING_SHIFT 140 /* Only eat shift bytes within a line, since in line 141 * canonical mode, attempting to eat shift bytes 142 * following a <newline> causes another read(). 143 */ 144 if (ch != '\n') { 145 /* When a valid character is found, consume 146 * any trailing shift-in or shift-out bytes, 147 * updating the state accordingly. 148 */ 149 (void) eat_shift_bytes(wio); 150 } 151 #endif /* M_I18N_LOCKING_SHIFT */ 152 153 /* Remember the number of bytes converted. */ 154 wio->_size = wio->_next; 155 156 return (wint_t) wc; 157 } 158 } 159 160 /* If we fill the multibyte character buffer or receive an 161 * EOF without recognising a multibyte character, then we 162 * will return individual bytes from the buffer. The buffer 163 * is restored to its state before the bogus byte sequence 164 * was read. 165 */ 166 wio->_state = start_state; 167 wio->_size = wio->_next; 168 wio->_next = 0; 169 170 return 0 < wio->_size ? (wint_t) wio->_mb[wio->_next++] : WEOF; 171 } 172 173 174