1 /* $FreeBSD$ */ 2 /* $NetBSD: citrus_ues.c,v 1.1 2006/11/13 15:16:31 tnozaki Exp $ */ 3 4 /*- 5 * Copyright (c)2006 Citrus Project, 6 * All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27 * SUCH DAMAGE. 28 */ 29 30 #include <sys/cdefs.h> 31 32 #include <assert.h> 33 #include <errno.h> 34 #include <limits.h> 35 #include <stdio.h> 36 #include <stdint.h> 37 #include <stdlib.h> 38 #include <string.h> 39 #include <wchar.h> 40 41 #include "citrus_namespace.h" 42 #include "citrus_types.h" 43 #include "citrus_bcs.h" 44 #include "citrus_module.h" 45 #include "citrus_stdenc.h" 46 #include "citrus_ues.h" 47 48 typedef struct { 49 size_t mb_cur_max; 50 int mode; 51 #define MODE_C99 1 52 } _UESEncodingInfo; 53 54 typedef struct { 55 int chlen; 56 char ch[12]; 57 } _UESState; 58 59 #define _CEI_TO_EI(_cei_) (&(_cei_)->ei) 60 #define _CEI_TO_STATE(_cei_, _func_) (_cei_)->states.s_##_func_ 61 62 #define _FUNCNAME(m) _citrus_UES_##m 63 #define _ENCODING_INFO _UESEncodingInfo 64 #define _ENCODING_STATE _UESState 65 #define _ENCODING_MB_CUR_MAX(_ei_) (_ei_)->mb_cur_max 66 #define _ENCODING_IS_STATE_DEPENDENT 0 67 #define _STATE_NEEDS_EXPLICIT_INIT(_ps_) 0 68 69 static __inline void 70 /*ARGSUSED*/ 71 _citrus_UES_init_state(_UESEncodingInfo * __restrict ei __unused, 72 _UESState * __restrict psenc) 73 { 74 75 psenc->chlen = 0; 76 } 77 78 static __inline void 79 /*ARGSUSED*/ 80 _citrus_UES_pack_state(_UESEncodingInfo * __restrict ei __unused, 81 void *__restrict pspriv, const _UESState * __restrict psenc) 82 { 83 84 memcpy(pspriv, (const void *)psenc, sizeof(*psenc)); 85 } 86 87 static __inline void 88 /*ARGSUSED*/ 89 _citrus_UES_unpack_state(_UESEncodingInfo * __restrict ei __unused, 90 _UESState * __restrict psenc, const void * __restrict pspriv) 91 { 92 93 memcpy((void *)psenc, pspriv, sizeof(*psenc)); 94 } 95 96 static __inline int 97 to_int(int ch) 98 { 99 100 if (ch >= '0' && ch <= '9') 101 return (ch - '0'); 102 else if (ch >= 'A' && ch <= 'F') 103 return ((ch - 'A') + 10); 104 else if (ch >= 'a' && ch <= 'f') 105 return ((ch - 'a') + 10); 106 return (-1); 107 } 108 109 #define ESCAPE '\\' 110 #define UCS2_ESC 'u' 111 #define UCS4_ESC 'U' 112 113 #define UCS2_BIT 16 114 #define UCS4_BIT 32 115 #define BMP_MAX UINT32_C(0xFFFF) 116 #define UCS2_MAX UINT32_C(0x10FFFF) 117 #define UCS4_MAX UINT32_C(0x7FFFFFFF) 118 119 static const char *xdig = "0123456789abcdef"; 120 121 static __inline int 122 to_str(char *s, wchar_t wc, int bit) 123 { 124 char *p; 125 126 p = s; 127 *p++ = ESCAPE; 128 switch (bit) { 129 case UCS2_BIT: 130 *p++ = UCS2_ESC; 131 break; 132 case UCS4_BIT: 133 *p++ = UCS4_ESC; 134 break; 135 default: 136 abort(); 137 } 138 do { 139 *p++ = xdig[(wc >> (bit -= 4)) & 0xF]; 140 } while (bit > 0); 141 return (p - s); 142 } 143 144 static __inline bool 145 is_hi_surrogate(wchar_t wc) 146 { 147 148 return (wc >= 0xD800 && wc <= 0xDBFF); 149 } 150 151 static __inline bool 152 is_lo_surrogate(wchar_t wc) 153 { 154 155 return (wc >= 0xDC00 && wc <= 0xDFFF); 156 } 157 158 static __inline wchar_t 159 surrogate_to_ucs(wchar_t hi, wchar_t lo) 160 { 161 162 hi -= 0xD800; 163 lo -= 0xDC00; 164 return ((hi << 10 | lo) + 0x10000); 165 } 166 167 static __inline void 168 ucs_to_surrogate(wchar_t wc, wchar_t * __restrict hi, wchar_t * __restrict lo) 169 { 170 171 wc -= 0x10000; 172 *hi = (wc >> 10) + 0xD800; 173 *lo = (wc & 0x3FF) + 0xDC00; 174 } 175 176 static __inline bool 177 is_basic(wchar_t wc) 178 { 179 180 return ((uint32_t)wc <= 0x9F && wc != 0x24 && wc != 0x40 && 181 wc != 0x60); 182 } 183 184 static int 185 _citrus_UES_mbrtowc_priv(_UESEncodingInfo * __restrict ei, 186 wchar_t * __restrict pwc, char ** __restrict s, size_t n, 187 _UESState * __restrict psenc, size_t * __restrict nresult) 188 { 189 char *s0; 190 int ch, head, num, tail; 191 wchar_t hi, wc; 192 193 if (*s == NULL) { 194 _citrus_UES_init_state(ei, psenc); 195 *nresult = 0; 196 return (0); 197 } 198 s0 = *s; 199 200 hi = (wchar_t)0; 201 tail = 0; 202 203 surrogate: 204 wc = (wchar_t)0; 205 head = tail; 206 if (psenc->chlen == head) { 207 if (n-- < 1) 208 goto restart; 209 psenc->ch[psenc->chlen++] = *s0++; 210 } 211 ch = (unsigned char)psenc->ch[head++]; 212 if (ch == ESCAPE) { 213 if (psenc->chlen == head) { 214 if (n-- < 1) 215 goto restart; 216 psenc->ch[psenc->chlen++] = *s0++; 217 } 218 switch (psenc->ch[head]) { 219 case UCS2_ESC: 220 tail += 6; 221 break; 222 case UCS4_ESC: 223 if (ei->mode & MODE_C99) { 224 tail = 10; 225 break; 226 } 227 /*FALLTHROUGH*/ 228 default: 229 tail = 0; 230 } 231 ++head; 232 } 233 for (; head < tail; ++head) { 234 if (psenc->chlen == head) { 235 if (n-- < 1) { 236 restart: 237 *s = s0; 238 *nresult = (size_t)-2; 239 return (0); 240 } 241 psenc->ch[psenc->chlen++] = *s0++; 242 } 243 num = to_int((int)(unsigned char)psenc->ch[head]); 244 if (num < 0) { 245 tail = 0; 246 break; 247 } 248 wc = (wc << 4) | num; 249 } 250 head = 0; 251 switch (tail) { 252 case 0: 253 break; 254 case 6: 255 if (hi != (wchar_t)0) 256 break; 257 if ((ei->mode & MODE_C99) == 0) { 258 if (is_hi_surrogate(wc) != 0) { 259 hi = wc; 260 goto surrogate; 261 } 262 if ((uint32_t)wc <= 0x7F /* XXX */ || 263 is_lo_surrogate(wc) != 0) 264 break; 265 goto done; 266 } 267 /*FALLTHROUGH*/ 268 case 10: 269 if (is_basic(wc) == 0 && (uint32_t)wc <= UCS4_MAX && 270 is_hi_surrogate(wc) == 0 && is_lo_surrogate(wc) == 0) 271 goto done; 272 *nresult = (size_t)-1; 273 return (EILSEQ); 274 case 12: 275 if (is_lo_surrogate(wc) == 0) 276 break; 277 wc = surrogate_to_ucs(hi, wc); 278 goto done; 279 } 280 ch = (unsigned char)psenc->ch[0]; 281 head = psenc->chlen; 282 if (--head > 0) 283 memmove(&psenc->ch[0], &psenc->ch[1], head); 284 wc = (wchar_t)ch; 285 done: 286 psenc->chlen = head; 287 if (pwc != NULL) 288 *pwc = wc; 289 *nresult = (size_t)((wc == 0) ? 0 : (s0 - *s)); 290 *s = s0; 291 292 return (0); 293 } 294 295 static int 296 _citrus_UES_wcrtomb_priv(_UESEncodingInfo * __restrict ei, 297 char * __restrict s, size_t n, wchar_t wc, 298 _UESState * __restrict psenc, size_t * __restrict nresult) 299 { 300 wchar_t hi, lo; 301 302 if (psenc->chlen != 0) 303 return (EINVAL); 304 305 if ((ei->mode & MODE_C99) ? is_basic(wc) : (uint32_t)wc <= 0x7F) { 306 if (n-- < 1) 307 goto e2big; 308 psenc->ch[psenc->chlen++] = (char)wc; 309 } else if ((uint32_t)wc <= BMP_MAX) { 310 if (n < 6) 311 goto e2big; 312 psenc->chlen = to_str(&psenc->ch[0], wc, UCS2_BIT); 313 } else if ((ei->mode & MODE_C99) == 0 && (uint32_t)wc <= UCS2_MAX) { 314 if (n < 12) 315 goto e2big; 316 ucs_to_surrogate(wc, &hi, &lo); 317 psenc->chlen += to_str(&psenc->ch[0], hi, UCS2_BIT); 318 psenc->chlen += to_str(&psenc->ch[6], lo, UCS2_BIT); 319 } else if ((ei->mode & MODE_C99) && (uint32_t)wc <= UCS4_MAX) { 320 if (n < 10) 321 goto e2big; 322 psenc->chlen = to_str(&psenc->ch[0], wc, UCS4_BIT); 323 } else { 324 *nresult = (size_t)-1; 325 return (EILSEQ); 326 } 327 memcpy(s, psenc->ch, psenc->chlen); 328 *nresult = psenc->chlen; 329 psenc->chlen = 0; 330 331 return (0); 332 333 e2big: 334 *nresult = (size_t)-1; 335 return (E2BIG); 336 } 337 338 /*ARGSUSED*/ 339 static int 340 _citrus_UES_stdenc_wctocs(_UESEncodingInfo * __restrict ei __unused, 341 _csid_t * __restrict csid, _index_t * __restrict idx, wchar_t wc) 342 { 343 344 *csid = 0; 345 *idx = (_index_t)wc; 346 347 return (0); 348 } 349 350 static __inline int 351 /*ARGSUSED*/ 352 _citrus_UES_stdenc_cstowc(_UESEncodingInfo * __restrict ei __unused, 353 wchar_t * __restrict wc, _csid_t csid, _index_t idx) 354 { 355 356 if (csid != 0) 357 return (EILSEQ); 358 *wc = (wchar_t)idx; 359 360 return (0); 361 } 362 363 static __inline int 364 /*ARGSUSED*/ 365 _citrus_UES_stdenc_get_state_desc_generic(_UESEncodingInfo * __restrict ei __unused, 366 _UESState * __restrict psenc, int * __restrict rstate) 367 { 368 369 *rstate = (psenc->chlen == 0) ? _STDENC_SDGEN_INITIAL : 370 _STDENC_SDGEN_INCOMPLETE_CHAR; 371 return (0); 372 } 373 374 static void 375 /*ARGSUSED*/ 376 _citrus_UES_encoding_module_uninit(_UESEncodingInfo *ei __unused) 377 { 378 379 /* ei seems to be unused */ 380 } 381 382 static int 383 /*ARGSUSED*/ 384 _citrus_UES_encoding_module_init(_UESEncodingInfo * __restrict ei, 385 const void * __restrict var, size_t lenvar) 386 { 387 const char *p; 388 389 p = var; 390 memset((void *)ei, 0, sizeof(*ei)); 391 while (lenvar > 0) { 392 switch (_bcs_toupper(*p)) { 393 case 'C': 394 MATCH(C99, ei->mode |= MODE_C99); 395 break; 396 } 397 ++p; 398 --lenvar; 399 } 400 ei->mb_cur_max = (ei->mode & MODE_C99) ? 10 : 12; 401 402 return (0); 403 } 404 405 /* ---------------------------------------------------------------------- 406 * public interface for stdenc 407 */ 408 409 _CITRUS_STDENC_DECLS(UES); 410 _CITRUS_STDENC_DEF_OPS(UES); 411 412 #include "citrus_stdenc_template.h" 413