1 /* 2 * Copyright (c) 2002-2004 Tim J. Robbins. All rights reserved. 3 * Copyright (c) 1993 4 * The Regents of the University of California. All rights reserved. 5 * 6 * This code is derived from software contributed to Berkeley by 7 * Paul Borman at Krystal Technologies. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 3. All advertising materials mentioning features or use of this software 18 * must display the following acknowledgement: 19 * This product includes software developed by the University of 20 * California, Berkeley and its contributors. 21 * 4. Neither the name of the University nor the names of its contributors 22 * may be used to endorse or promote products derived from this software 23 * without specific prior written permission. 24 * 25 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 26 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 27 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 28 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 29 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 30 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 31 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 32 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 33 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 34 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 35 * SUCH DAMAGE. 36 */ 37 38 /* 39 * Copyright 2010 Nexenta Systems, Inc. All rights reserved. 40 * Use is subject to license terms. 41 */ 42 43 #include "lint.h" 44 #include <errno.h> 45 #include <limits.h> 46 #include <stdlib.h> 47 #include <string.h> 48 #include <wchar.h> 49 #include <sys/types.h> 50 #include "runetype.h" 51 #include "mblocal.h" 52 53 #define MIN(a, b) ((a) < (b) ? (a) : (b)) 54 55 static size_t _EUC_mbrtowc(wchar_t *_RESTRICT_KYWD, 56 const char *_RESTRICT_KYWD, 57 size_t, mbstate_t *_RESTRICT_KYWD); 58 static int _EUC_mbsinit(const mbstate_t *); 59 static size_t _EUC_wcrtomb(char *_RESTRICT_KYWD, wchar_t, 60 mbstate_t *_RESTRICT_KYWD); 61 62 typedef struct { 63 int count[4]; 64 wchar_t bits[4]; 65 wchar_t mask; 66 } _EucInfo; 67 68 typedef struct { 69 wchar_t ch; 70 int set; 71 int want; 72 } _EucState; 73 74 int 75 _EUC_init(_RuneLocale *rl) 76 { 77 _EucInfo *ei; 78 int x, new__mb_cur_max; 79 char *v, *e; 80 81 if (rl->__variable == NULL) 82 return (EINVAL); 83 84 v = (char *)rl->__variable; 85 86 while (*v == ' ' || *v == '\t') 87 ++v; 88 89 if ((ei = malloc(sizeof (_EucInfo))) == NULL) 90 return (errno == 0 ? ENOMEM : errno); 91 92 new__mb_cur_max = 0; 93 for (x = 0; x < 4; ++x) { 94 ei->count[x] = (int)strtol(v, &e, 0); 95 if (v == e || !(v = e)) { 96 free(ei); 97 return (EINVAL); 98 } 99 if (new__mb_cur_max < ei->count[x]) 100 new__mb_cur_max = ei->count[x]; 101 while (*v == ' ' || *v == '\t') 102 ++v; 103 ei->bits[x] = (int)strtol(v, &e, 0); 104 if (v == e || !(v = e)) { 105 free(ei); 106 return (EINVAL); 107 } 108 while (*v == ' ' || *v == '\t') 109 ++v; 110 } 111 ei->mask = (int)strtol(v, &e, 0); 112 if (v == e || !(v = e)) { 113 free(ei); 114 return (EINVAL); 115 } 116 rl->__variable = ei; 117 rl->__variable_len = sizeof (_EucInfo); 118 _CurrentRuneLocale = rl; 119 __ctype[520] = new__mb_cur_max; 120 __mbrtowc = _EUC_mbrtowc; 121 __wcrtomb = _EUC_wcrtomb; 122 __mbsinit = _EUC_mbsinit; 123 return (0); 124 } 125 126 static int 127 _EUC_mbsinit(const mbstate_t *ps) 128 { 129 130 return (ps == NULL || ((const _EucState *)ps)->want == 0); 131 } 132 133 #define CEI ((_EucInfo *)(_CurrentRuneLocale->__variable)) 134 135 #define _SS2 0x008e 136 #define _SS3 0x008f 137 138 #define GR_BITS 0x80808080 /* XXX: to be fixed */ 139 140 static int 141 _euc_set(uint_t c) 142 { 143 144 c &= 0xff; 145 return ((c & 0x80) ? c == _SS3 ? 3 : c == _SS2 ? 2 : 1 : 0); 146 } 147 148 static size_t 149 _EUC_mbrtowc(wchar_t *_RESTRICT_KYWD pwc, const char *_RESTRICT_KYWD s, 150 size_t n, mbstate_t *_RESTRICT_KYWD ps) 151 { 152 _EucState *es; 153 int i, set, want; 154 wchar_t wc; 155 const char *os; 156 157 es = (_EucState *)ps; 158 159 if (es->want < 0 || es->want > MB_CUR_MAX || es->set < 0 || 160 es->set > 3) { 161 errno = EINVAL; 162 return ((size_t)-1); 163 } 164 165 if (s == NULL) { 166 s = ""; 167 n = 1; 168 pwc = NULL; 169 } 170 171 if (n == 0) 172 /* Incomplete multibyte sequence */ 173 return ((size_t)-2); 174 175 os = s; 176 177 if (es->want == 0) { 178 want = CEI->count[set = _euc_set(*s)]; 179 if (set == 2 || set == 3) { 180 --want; 181 if (--n == 0) { 182 /* Incomplete multibyte sequence */ 183 es->set = set; 184 es->want = want; 185 es->ch = 0; 186 return ((size_t)-2); 187 } 188 ++s; 189 if (*s == '\0') { 190 errno = EILSEQ; 191 return ((size_t)-1); 192 } 193 } 194 wc = (unsigned char)*s++; 195 } else { 196 set = es->set; 197 want = es->want; 198 wc = es->ch; 199 } 200 for (i = (es->want == 0) ? 1 : 0; i < MIN(want, n); i++) { 201 if (*s == '\0') { 202 errno = EILSEQ; 203 return ((size_t)-1); 204 } 205 wc = (wc << 8) | (unsigned char)*s++; 206 } 207 if (i < want) { 208 /* Incomplete multibyte sequence */ 209 es->set = set; 210 es->want = want - i; 211 es->ch = wc; 212 return ((size_t)-2); 213 } 214 wc = (wc & ~CEI->mask) | CEI->bits[set]; 215 if (pwc != NULL) 216 *pwc = wc; 217 es->want = 0; 218 return (wc == L'\0' ? 0 : s - os); 219 } 220 221 static size_t 222 _EUC_wcrtomb(char *_RESTRICT_KYWD s, wchar_t wc, mbstate_t *_RESTRICT_KYWD ps) 223 { 224 _EucState *es; 225 wchar_t m, nm; 226 int i, len; 227 228 es = (_EucState *)ps; 229 230 if (es->want != 0) { 231 errno = EINVAL; 232 return ((size_t)-1); 233 } 234 235 if (s == NULL) 236 /* Reset to initial shift state (no-op) */ 237 return (1); 238 239 m = wc & CEI->mask; 240 nm = wc & ~m; 241 242 if (m == CEI->bits[1]) { 243 CodeSet1: 244 /* Codeset 1: The first byte must have 0x80 in it. */ 245 i = len = CEI->count[1]; 246 while (i-- > 0) { 247 *(unsigned char *)s = (nm >> (i << 3)) | 0x80; 248 s++; 249 } 250 } else { 251 if (m == CEI->bits[0]) 252 i = len = CEI->count[0]; 253 else if (m == CEI->bits[2]) { 254 i = len = CEI->count[2]; 255 *(unsigned char *)s = _SS2; 256 s++; 257 --i; 258 /* SS2 designates G2 into GR */ 259 nm |= GR_BITS; 260 } else if (m == CEI->bits[3]) { 261 i = len = CEI->count[3]; 262 *(unsigned char *)s = _SS3; 263 s++; 264 --i; 265 /* SS3 designates G3 into GR */ 266 nm |= GR_BITS; 267 } else 268 goto CodeSet1; /* Bletch */ 269 while (i-- > 0) 270 *s++ = (nm >> (i << 3)) & 0xff; 271 } 272 return (len); 273 } 274