1 /* 2 * Copyright (c) 2002-2004 Tim J. Robbins. All rights reserved. 3 * Copyright (c) 1993 4 * The Regents of the University of California. All rights reserved. 5 * 6 * This code is derived from software contributed to Berkeley by 7 * Paul Borman at Krystal Technologies. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 3. All advertising materials mentioning features or use of this software 18 * must display the following acknowledgement: 19 * This product includes software developed by the University of 20 * California, Berkeley and its contributors. 21 * 4. Neither the name of the University nor the names of its contributors 22 * may be used to endorse or promote products derived from this software 23 * without specific prior written permission. 24 * 25 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 26 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 27 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 28 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 29 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 30 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 31 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 32 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 33 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 34 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 35 * SUCH DAMAGE. 36 */ 37 38 /* 39 * Copyright 2010 Nexenta Systems, Inc. All rights reserved. 40 * Use is subject to license terms. 41 */ 42 43 #include "lint.h" 44 #include <errno.h> 45 #include <limits.h> 46 #include <stdlib.h> 47 #include <string.h> 48 #include <wchar.h> 49 #include <sys/types.h> 50 #include "runetype.h" 51 #include "mblocal.h" 52 53 #define MIN(a, b) ((a) < (b) ? (a) : (b)) 54 55 static size_t _EUC_mbrtowc(wchar_t *_RESTRICT_KYWD, 56 const char *_RESTRICT_KYWD, 57 size_t, mbstate_t *_RESTRICT_KYWD); 58 static int _EUC_mbsinit(const mbstate_t *); 59 static size_t _EUC_wcrtomb(char *_RESTRICT_KYWD, wchar_t, 60 mbstate_t *_RESTRICT_KYWD); 61 62 typedef struct { 63 int count[4]; 64 wchar_t bits[4]; 65 wchar_t mask; 66 } _EucInfo; 67 68 typedef struct { 69 wchar_t ch; 70 int set; 71 int want; 72 } _EucState; 73 74 int 75 _EUC_init(_RuneLocale *rl) 76 { 77 _EucInfo *ei; 78 int x, new__mb_cur_max; 79 char *v, *e; 80 81 if (rl->__variable == NULL) 82 return (EINVAL); 83 84 v = (char *)rl->__variable; 85 86 while (*v == ' ' || *v == '\t') 87 ++v; 88 89 if ((ei = malloc(sizeof (_EucInfo))) == NULL) 90 return (errno == 0 ? ENOMEM : errno); 91 92 new__mb_cur_max = 0; 93 for (x = 0; x < 4; ++x) { 94 ei->count[x] = (int)strtol(v, &e, 0); 95 if (v == e || !(v = e)) { 96 free(ei); 97 return (EINVAL); 98 } 99 if (new__mb_cur_max < ei->count[x]) 100 new__mb_cur_max = ei->count[x]; 101 while (*v == ' ' || *v == '\t') 102 ++v; 103 ei->bits[x] = (int)strtol(v, &e, 0); 104 if (v == e || !(v = e)) { 105 free(ei); 106 return (EINVAL); 107 } 108 while (*v == ' ' || *v == '\t') 109 ++v; 110 } 111 ei->mask = (int)strtol(v, &e, 0); 112 if (v == e || !(v = e)) { 113 free(ei); 114 return (EINVAL); 115 } 116 rl->__variable = ei; 117 rl->__variable_len = sizeof (_EucInfo); 118 _CurrentRuneLocale = rl; 119 __ctype[520] = new__mb_cur_max; 120 __mbrtowc = _EUC_mbrtowc; 121 __wcrtomb = _EUC_wcrtomb; 122 __mbsinit = _EUC_mbsinit; 123 charset_is_ascii = 0; 124 return (0); 125 } 126 127 static int 128 _EUC_mbsinit(const mbstate_t *ps) 129 { 130 131 return (ps == NULL || ((const _EucState *)ps)->want == 0); 132 } 133 134 #define CEI ((_EucInfo *)(_CurrentRuneLocale->__variable)) 135 136 #define _SS2 0x008e 137 #define _SS3 0x008f 138 139 #define GR_BITS 0x80808080 /* XXX: to be fixed */ 140 141 static int 142 _euc_set(uint_t c) 143 { 144 145 c &= 0xff; 146 return ((c & 0x80) ? c == _SS3 ? 3 : c == _SS2 ? 2 : 1 : 0); 147 } 148 149 static size_t 150 _EUC_mbrtowc(wchar_t *_RESTRICT_KYWD pwc, const char *_RESTRICT_KYWD s, 151 size_t n, mbstate_t *_RESTRICT_KYWD ps) 152 { 153 _EucState *es; 154 int i, set, want; 155 wchar_t wc; 156 const char *os; 157 158 es = (_EucState *)ps; 159 160 if (es->want < 0 || es->want > MB_CUR_MAX || es->set < 0 || 161 es->set > 3) { 162 errno = EINVAL; 163 return ((size_t)-1); 164 } 165 166 if (s == NULL) { 167 s = ""; 168 n = 1; 169 pwc = NULL; 170 } 171 172 if (n == 0) 173 /* Incomplete multibyte sequence */ 174 return ((size_t)-2); 175 176 os = s; 177 178 if (es->want == 0) { 179 want = CEI->count[set = _euc_set(*s)]; 180 if (set == 2 || set == 3) { 181 --want; 182 if (--n == 0) { 183 /* Incomplete multibyte sequence */ 184 es->set = set; 185 es->want = want; 186 es->ch = 0; 187 return ((size_t)-2); 188 } 189 ++s; 190 if (*s == '\0') { 191 errno = EILSEQ; 192 return ((size_t)-1); 193 } 194 } 195 wc = (unsigned char)*s++; 196 } else { 197 set = es->set; 198 want = es->want; 199 wc = es->ch; 200 } 201 for (i = (es->want == 0) ? 1 : 0; i < MIN(want, n); i++) { 202 if (*s == '\0') { 203 errno = EILSEQ; 204 return ((size_t)-1); 205 } 206 wc = (wc << 8) | (unsigned char)*s++; 207 } 208 if (i < want) { 209 /* Incomplete multibyte sequence */ 210 es->set = set; 211 es->want = want - i; 212 es->ch = wc; 213 return ((size_t)-2); 214 } 215 wc = (wc & ~CEI->mask) | CEI->bits[set]; 216 if (pwc != NULL) 217 *pwc = wc; 218 es->want = 0; 219 return (wc == L'\0' ? 0 : s - os); 220 } 221 222 static size_t 223 _EUC_wcrtomb(char *_RESTRICT_KYWD s, wchar_t wc, mbstate_t *_RESTRICT_KYWD ps) 224 { 225 _EucState *es; 226 wchar_t m, nm; 227 int i, len; 228 229 es = (_EucState *)ps; 230 231 if (es->want != 0) { 232 errno = EINVAL; 233 return ((size_t)-1); 234 } 235 236 if (s == NULL) 237 /* Reset to initial shift state (no-op) */ 238 return (1); 239 240 m = wc & CEI->mask; 241 nm = wc & ~m; 242 243 if (m == CEI->bits[1]) { 244 CodeSet1: 245 /* Codeset 1: The first byte must have 0x80 in it. */ 246 i = len = CEI->count[1]; 247 while (i-- > 0) { 248 *(unsigned char *)s = (nm >> (i << 3)) | 0x80; 249 s++; 250 } 251 } else { 252 if (m == CEI->bits[0]) 253 i = len = CEI->count[0]; 254 else if (m == CEI->bits[2]) { 255 i = len = CEI->count[2]; 256 *(unsigned char *)s = _SS2; 257 s++; 258 --i; 259 /* SS2 designates G2 into GR */ 260 nm |= GR_BITS; 261 } else if (m == CEI->bits[3]) { 262 i = len = CEI->count[3]; 263 *(unsigned char *)s = _SS3; 264 s++; 265 --i; 266 /* SS3 designates G3 into GR */ 267 nm |= GR_BITS; 268 } else 269 goto CodeSet1; /* Bletch */ 270 while (i-- > 0) 271 *s++ = (nm >> (i << 3)) & 0xff; 272 } 273 return (len); 274 } 275