1 /*- 2 * Copyright (c) 2002-2004 Tim J. Robbins. All rights reserved. 3 * Copyright (c) 1993 4 * The Regents of the University of California. All rights reserved. 5 * 6 * This code is derived from software contributed to Berkeley by 7 * Paul Borman at Krystal Technologies. 8 * 9 * Copyright (c) 2011 The FreeBSD Foundation 10 * All rights reserved. 11 * Portions of this software were developed by David Chisnall 12 * under sponsorship from the FreeBSD Foundation. 13 * 14 * Redistribution and use in source and binary forms, with or without 15 * modification, are permitted provided that the following conditions 16 * are met: 17 * 1. Redistributions of source code must retain the above copyright 18 * notice, this list of conditions and the following disclaimer. 19 * 2. Redistributions in binary form must reproduce the above copyright 20 * notice, this list of conditions and the following disclaimer in the 21 * documentation and/or other materials provided with the distribution. 22 * 3. All advertising materials mentioning features or use of this software 23 * must display the following acknowledgement: 24 * This product includes software developed by the University of 25 * California, Berkeley and its contributors. 26 * 4. Neither the name of the University nor the names of its contributors 27 * may be used to endorse or promote products derived from this software 28 * without specific prior written permission. 29 * 30 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 31 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 32 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 33 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 34 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 35 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 36 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 37 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 38 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 39 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 40 * SUCH DAMAGE. 41 */ 42 43 #if defined(LIBC_SCCS) && !defined(lint) 44 static char sccsid[] = "@(#)euc.c 8.1 (Berkeley) 6/4/93"; 45 #endif /* LIBC_SCCS and not lint */ 46 #include <sys/param.h> 47 __FBSDID("$FreeBSD$"); 48 49 #include <errno.h> 50 #include <limits.h> 51 #include <runetype.h> 52 #include <stdlib.h> 53 #include <string.h> 54 #include <wchar.h> 55 #include "mblocal.h" 56 57 extern int __mb_sb_limit; 58 59 static size_t _EUC_mbrtowc(wchar_t * __restrict, const char * __restrict, 60 size_t, mbstate_t * __restrict); 61 static int _EUC_mbsinit(const mbstate_t *); 62 static size_t _EUC_wcrtomb(char * __restrict, wchar_t, 63 mbstate_t * __restrict); 64 65 typedef struct { 66 int count[4]; 67 wchar_t bits[4]; 68 wchar_t mask; 69 } _EucInfo; 70 71 typedef struct { 72 wchar_t ch; 73 int set; 74 int want; 75 } _EucState; 76 77 int 78 _EUC_init(struct xlocale_ctype *l, _RuneLocale *rl) 79 { 80 _EucInfo *ei; 81 int x, new__mb_cur_max; 82 char *v, *e; 83 84 if (rl->__variable == NULL) 85 return (EFTYPE); 86 87 v = (char *)rl->__variable; 88 89 while (*v == ' ' || *v == '\t') 90 ++v; 91 92 if ((ei = malloc(sizeof(_EucInfo))) == NULL) 93 return (errno == 0 ? ENOMEM : errno); 94 95 new__mb_cur_max = 0; 96 for (x = 0; x < 4; ++x) { 97 ei->count[x] = (int)strtol(v, &e, 0); 98 if (v == e || !(v = e)) { 99 free(ei); 100 return (EFTYPE); 101 } 102 if (new__mb_cur_max < ei->count[x]) 103 new__mb_cur_max = ei->count[x]; 104 while (*v == ' ' || *v == '\t') 105 ++v; 106 ei->bits[x] = (int)strtol(v, &e, 0); 107 if (v == e || !(v = e)) { 108 free(ei); 109 return (EFTYPE); 110 } 111 while (*v == ' ' || *v == '\t') 112 ++v; 113 } 114 ei->mask = (int)strtol(v, &e, 0); 115 if (v == e || !(v = e)) { 116 free(ei); 117 return (EFTYPE); 118 } 119 rl->__variable = ei; 120 rl->__variable_len = sizeof(_EucInfo); 121 l->runes = rl; 122 l->__mb_cur_max = new__mb_cur_max; 123 l->__mbrtowc = _EUC_mbrtowc; 124 l->__wcrtomb = _EUC_wcrtomb; 125 l->__mbsinit = _EUC_mbsinit; 126 l->__mb_sb_limit = 256; 127 return (0); 128 } 129 130 static int 131 _EUC_mbsinit(const mbstate_t *ps) 132 { 133 134 return (ps == NULL || ((const _EucState *)ps)->want == 0); 135 } 136 137 #define CEI ((_EucInfo *)(_CurrentRuneLocale->__variable)) 138 139 #define _SS2 0x008e 140 #define _SS3 0x008f 141 142 #define GR_BITS 0x80808080 /* XXX: to be fixed */ 143 144 static __inline int 145 _euc_set(u_int c) 146 { 147 148 c &= 0xff; 149 return ((c & 0x80) ? c == _SS3 ? 3 : c == _SS2 ? 2 : 1 : 0); 150 } 151 152 static size_t 153 _EUC_mbrtowc(wchar_t * __restrict pwc, const char * __restrict s, size_t n, 154 mbstate_t * __restrict ps) 155 { 156 _EucState *es; 157 int i, set, want; 158 wchar_t wc; 159 const char *os; 160 161 es = (_EucState *)ps; 162 163 if (es->want < 0 || es->want > MB_CUR_MAX || es->set < 0 || 164 es->set > 3) { 165 errno = EINVAL; 166 return ((size_t)-1); 167 } 168 169 if (s == NULL) { 170 s = ""; 171 n = 1; 172 pwc = NULL; 173 } 174 175 if (n == 0) 176 /* Incomplete multibyte sequence */ 177 return ((size_t)-2); 178 179 os = s; 180 181 if (es->want == 0) { 182 want = CEI->count[set = _euc_set(*s)]; 183 if (set == 2 || set == 3) { 184 --want; 185 if (--n == 0) { 186 /* Incomplete multibyte sequence */ 187 es->set = set; 188 es->want = want; 189 es->ch = 0; 190 return ((size_t)-2); 191 } 192 ++s; 193 if (*s == '\0') { 194 errno = EILSEQ; 195 return ((size_t)-1); 196 } 197 } 198 wc = (unsigned char)*s++; 199 } else { 200 set = es->set; 201 want = es->want; 202 wc = es->ch; 203 } 204 for (i = (es->want == 0) ? 1 : 0; i < MIN(want, n); i++) { 205 if (*s == '\0') { 206 errno = EILSEQ; 207 return ((size_t)-1); 208 } 209 wc = (wc << 8) | (unsigned char)*s++; 210 } 211 if (i < want) { 212 /* Incomplete multibyte sequence */ 213 es->set = set; 214 es->want = want - i; 215 es->ch = wc; 216 return ((size_t)-2); 217 } 218 wc = (wc & ~CEI->mask) | CEI->bits[set]; 219 if (pwc != NULL) 220 *pwc = wc; 221 es->want = 0; 222 return (wc == L'\0' ? 0 : s - os); 223 } 224 225 static size_t 226 _EUC_wcrtomb(char * __restrict s, wchar_t wc, mbstate_t * __restrict ps) 227 { 228 _EucState *es; 229 wchar_t m, nm; 230 int i, len; 231 232 es = (_EucState *)ps; 233 234 if (es->want != 0) { 235 errno = EINVAL; 236 return ((size_t)-1); 237 } 238 239 if (s == NULL) 240 /* Reset to initial shift state (no-op) */ 241 return (1); 242 243 m = wc & CEI->mask; 244 nm = wc & ~m; 245 246 if (m == CEI->bits[1]) { 247 CodeSet1: 248 /* Codeset 1: The first byte must have 0x80 in it. */ 249 i = len = CEI->count[1]; 250 while (i-- > 0) 251 *s++ = (nm >> (i << 3)) | 0x80; 252 } else { 253 if (m == CEI->bits[0]) 254 i = len = CEI->count[0]; 255 else if (m == CEI->bits[2]) { 256 i = len = CEI->count[2]; 257 *s++ = _SS2; 258 --i; 259 /* SS2 designates G2 into GR */ 260 nm |= GR_BITS; 261 } else if (m == CEI->bits[3]) { 262 i = len = CEI->count[3]; 263 *s++ = _SS3; 264 --i; 265 /* SS3 designates G3 into GR */ 266 nm |= GR_BITS; 267 } else 268 goto CodeSet1; /* Bletch */ 269 while (i-- > 0) 270 *s++ = (nm >> (i << 3)) & 0xff; 271 } 272 return (len); 273 } 274