1 /* 2 * Copyright 2010 Nexenta Systems, Inc. All rights reserved. 3 * Copyright (c) 2002-2004 Tim J. Robbins. All rights reserved. 4 * Copyright (c) 1993 5 * The Regents of the University of California. All rights reserved. 6 * 7 * This code is derived from software contributed to Berkeley by 8 * Paul Borman at Krystal Technologies. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 4. Neither the name of the University nor the names of its contributors 19 * may be used to endorse or promote products derived from this software 20 * without specific prior written permission. 21 * 22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 25 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 */ 34 35 #include "lint.h" 36 #include <errno.h> 37 #include <limits.h> 38 #include <stdlib.h> 39 #include <string.h> 40 #include <wchar.h> 41 #include <sys/types.h> 42 #include "runetype.h" 43 #include "mblocal.h" 44 45 #define MIN(a, b) ((a) < (b) ? (a) : (b)) 46 47 static size_t _EUC_mbrtowc(wchar_t *_RESTRICT_KYWD, 48 const char *_RESTRICT_KYWD, 49 size_t, mbstate_t *_RESTRICT_KYWD); 50 static int _EUC_mbsinit(const mbstate_t *); 51 static size_t _EUC_wcrtomb(char *_RESTRICT_KYWD, wchar_t, 52 mbstate_t *_RESTRICT_KYWD); 53 54 typedef struct { 55 int count[4]; 56 wchar_t bits[4]; 57 wchar_t mask; 58 } _EucInfo; 59 60 typedef struct { 61 wchar_t ch; 62 int set; 63 int want; 64 } _EucState; 65 66 int 67 _EUC_init(_RuneLocale *rl) 68 { 69 _EucInfo *ei; 70 int x, new__mb_cur_max; 71 char *v, *e; 72 73 if (rl->__variable == NULL) 74 return (EINVAL); 75 76 v = (char *)rl->__variable; 77 78 while (*v == ' ' || *v == '\t') 79 ++v; 80 81 if ((ei = malloc(sizeof (_EucInfo))) == NULL) 82 return (errno == 0 ? ENOMEM : errno); 83 84 new__mb_cur_max = 0; 85 for (x = 0; x < 4; ++x) { 86 ei->count[x] = (int)strtol(v, &e, 0); 87 if (v == e || !(v = e)) { 88 free(ei); 89 return (EINVAL); 90 } 91 if (new__mb_cur_max < ei->count[x]) 92 new__mb_cur_max = ei->count[x]; 93 while (*v == ' ' || *v == '\t') 94 ++v; 95 ei->bits[x] = (int)strtol(v, &e, 0); 96 if (v == e || !(v = e)) { 97 free(ei); 98 return (EINVAL); 99 } 100 while (*v == ' ' || *v == '\t') 101 ++v; 102 } 103 ei->mask = (int)strtol(v, &e, 0); 104 if (v == e || !(v = e)) { 105 free(ei); 106 return (EINVAL); 107 } 108 rl->__variable = ei; 109 rl->__variable_len = sizeof (_EucInfo); 110 _CurrentRuneLocale = rl; 111 __ctype[520] = new__mb_cur_max; 112 __mbrtowc = _EUC_mbrtowc; 113 __wcrtomb = _EUC_wcrtomb; 114 __mbsinit = _EUC_mbsinit; 115 charset_is_ascii = 0; 116 return (0); 117 } 118 119 static int 120 _EUC_mbsinit(const mbstate_t *ps) 121 { 122 123 return (ps == NULL || ((const _EucState *)ps)->want == 0); 124 } 125 126 #define CEI ((_EucInfo *)(_CurrentRuneLocale->__variable)) 127 128 #define _SS2 0x008e 129 #define _SS3 0x008f 130 131 #define GR_BITS 0x80808080 /* XXX: to be fixed */ 132 133 static int 134 _euc_set(uint_t c) 135 { 136 137 c &= 0xff; 138 return ((c & 0x80) ? c == _SS3 ? 3 : c == _SS2 ? 2 : 1 : 0); 139 } 140 141 static size_t 142 _EUC_mbrtowc(wchar_t *_RESTRICT_KYWD pwc, const char *_RESTRICT_KYWD s, 143 size_t n, mbstate_t *_RESTRICT_KYWD ps) 144 { 145 _EucState *es; 146 int i, set, want; 147 wchar_t wc; 148 const char *os; 149 150 es = (_EucState *)ps; 151 152 if (es->want < 0 || es->want > MB_CUR_MAX || es->set < 0 || 153 es->set > 3) { 154 errno = EINVAL; 155 return ((size_t)-1); 156 } 157 158 if (s == NULL) { 159 s = ""; 160 n = 1; 161 pwc = NULL; 162 } 163 164 if (n == 0) 165 /* Incomplete multibyte sequence */ 166 return ((size_t)-2); 167 168 os = s; 169 170 if (es->want == 0) { 171 want = CEI->count[set = _euc_set(*s)]; 172 if (set == 2 || set == 3) { 173 --want; 174 if (--n == 0) { 175 /* Incomplete multibyte sequence */ 176 es->set = set; 177 es->want = want; 178 es->ch = 0; 179 return ((size_t)-2); 180 } 181 ++s; 182 if (*s == '\0') { 183 errno = EILSEQ; 184 return ((size_t)-1); 185 } 186 } 187 wc = (unsigned char)*s++; 188 } else { 189 set = es->set; 190 want = es->want; 191 wc = es->ch; 192 } 193 for (i = (es->want == 0) ? 1 : 0; i < MIN(want, n); i++) { 194 if (*s == '\0') { 195 errno = EILSEQ; 196 return ((size_t)-1); 197 } 198 wc = (wc << 8) | (unsigned char)*s++; 199 } 200 if (i < want) { 201 /* Incomplete multibyte sequence */ 202 es->set = set; 203 es->want = want - i; 204 es->ch = wc; 205 return ((size_t)-2); 206 } 207 wc = (wc & ~CEI->mask) | CEI->bits[set]; 208 if (pwc != NULL) 209 *pwc = wc; 210 es->want = 0; 211 return (wc == L'\0' ? 0 : s - os); 212 } 213 214 static size_t 215 _EUC_wcrtomb(char *_RESTRICT_KYWD s, wchar_t wc, mbstate_t *_RESTRICT_KYWD ps) 216 { 217 _EucState *es; 218 wchar_t m, nm; 219 int i, len; 220 221 es = (_EucState *)ps; 222 223 if (es->want != 0) { 224 errno = EINVAL; 225 return ((size_t)-1); 226 } 227 228 if (s == NULL) 229 /* Reset to initial shift state (no-op) */ 230 return (1); 231 232 m = wc & CEI->mask; 233 nm = wc & ~m; 234 235 if (m == CEI->bits[1]) { 236 CodeSet1: 237 /* Codeset 1: The first byte must have 0x80 in it. */ 238 i = len = CEI->count[1]; 239 while (i-- > 0) { 240 *(unsigned char *)s = (nm >> (i << 3)) | 0x80; 241 s++; 242 } 243 } else { 244 if (m == CEI->bits[0]) 245 i = len = CEI->count[0]; 246 else if (m == CEI->bits[2]) { 247 i = len = CEI->count[2]; 248 *(unsigned char *)s = _SS2; 249 s++; 250 --i; 251 /* SS2 designates G2 into GR */ 252 nm |= GR_BITS; 253 } else if (m == CEI->bits[3]) { 254 i = len = CEI->count[3]; 255 *(unsigned char *)s = _SS3; 256 s++; 257 --i; 258 /* SS3 designates G3 into GR */ 259 nm |= GR_BITS; 260 } else 261 goto CodeSet1; /* Bletch */ 262 while (i-- > 0) 263 *s++ = (nm >> (i << 3)) & 0xff; 264 } 265 return (len); 266 } 267