1 /* $NetBSD: citrus_ues.c,v 1.3 2012/02/12 13:51:29 wiz Exp $ */ 2 3 /*- 4 * SPDX-License-Identifier: BSD-2-Clause 5 * 6 * Copyright (c)2006 Citrus Project, 7 * All rights reserved. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 19 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 21 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 22 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 23 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 24 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 25 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 26 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 27 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 28 * SUCH DAMAGE. 29 */ 30 31 #include <sys/cdefs.h> 32 33 #include <assert.h> 34 #include <errno.h> 35 #include <limits.h> 36 #include <stdio.h> 37 #include <stdint.h> 38 #include <stdlib.h> 39 #include <string.h> 40 #include <wchar.h> 41 42 #include "citrus_namespace.h" 43 #include "citrus_types.h" 44 #include "citrus_bcs.h" 45 #include "citrus_module.h" 46 #include "citrus_stdenc.h" 47 #include "citrus_ues.h" 48 49 typedef struct { 50 size_t mb_cur_max; 51 int mode; 52 #define MODE_C99 1 53 } _UESEncodingInfo; 54 55 typedef struct { 56 int chlen; 57 char ch[12]; 58 } _UESState; 59 60 #define _CEI_TO_EI(_cei_) (&(_cei_)->ei) 61 #define _CEI_TO_STATE(_cei_, _func_) (_cei_)->states.s_##_func_ 62 63 #define _FUNCNAME(m) _citrus_UES_##m 64 #define _ENCODING_INFO _UESEncodingInfo 65 #define _ENCODING_STATE _UESState 66 #define _ENCODING_MB_CUR_MAX(_ei_) (_ei_)->mb_cur_max 67 #define _ENCODING_IS_STATE_DEPENDENT 0 68 #define _STATE_NEEDS_EXPLICIT_INIT(_ps_) 0 69 70 static __inline void 71 /*ARGSUSED*/ 72 _citrus_UES_init_state(_UESEncodingInfo * __restrict ei __unused, 73 _UESState * __restrict psenc) 74 { 75 76 psenc->chlen = 0; 77 } 78 79 #if 0 80 static __inline void 81 /*ARGSUSED*/ 82 _citrus_UES_pack_state(_UESEncodingInfo * __restrict ei __unused, 83 void *__restrict pspriv, const _UESState * __restrict psenc) 84 { 85 86 memcpy(pspriv, (const void *)psenc, sizeof(*psenc)); 87 } 88 89 static __inline void 90 /*ARGSUSED*/ 91 _citrus_UES_unpack_state(_UESEncodingInfo * __restrict ei __unused, 92 _UESState * __restrict psenc, const void * __restrict pspriv) 93 { 94 95 memcpy((void *)psenc, pspriv, sizeof(*psenc)); 96 } 97 #endif 98 99 static __inline int 100 to_int(int ch) 101 { 102 103 if (ch >= '0' && ch <= '9') 104 return (ch - '0'); 105 else if (ch >= 'A' && ch <= 'F') 106 return ((ch - 'A') + 10); 107 else if (ch >= 'a' && ch <= 'f') 108 return ((ch - 'a') + 10); 109 return (-1); 110 } 111 112 #define ESCAPE '\\' 113 #define UCS2_ESC 'u' 114 #define UCS4_ESC 'U' 115 116 #define UCS2_BIT 16 117 #define UCS4_BIT 32 118 #define BMP_MAX UINT32_C(0xFFFF) 119 #define UCS2_MAX UINT32_C(0x10FFFF) 120 #define UCS4_MAX UINT32_C(0x7FFFFFFF) 121 122 static const char *xdig = "0123456789abcdef"; 123 124 static __inline int 125 to_str(char *s, wchar_t wc, int bit) 126 { 127 char *p; 128 129 p = s; 130 *p++ = ESCAPE; 131 switch (bit) { 132 case UCS2_BIT: 133 *p++ = UCS2_ESC; 134 break; 135 case UCS4_BIT: 136 *p++ = UCS4_ESC; 137 break; 138 default: 139 abort(); 140 } 141 do { 142 *p++ = xdig[(wc >> (bit -= 4)) & 0xF]; 143 } while (bit > 0); 144 return (p - s); 145 } 146 147 static __inline bool 148 is_hi_surrogate(wchar_t wc) 149 { 150 151 return (wc >= 0xD800 && wc <= 0xDBFF); 152 } 153 154 static __inline bool 155 is_lo_surrogate(wchar_t wc) 156 { 157 158 return (wc >= 0xDC00 && wc <= 0xDFFF); 159 } 160 161 static __inline wchar_t 162 surrogate_to_ucs(wchar_t hi, wchar_t lo) 163 { 164 165 hi -= 0xD800; 166 lo -= 0xDC00; 167 return ((hi << 10 | lo) + 0x10000); 168 } 169 170 static __inline void 171 ucs_to_surrogate(wchar_t wc, wchar_t * __restrict hi, wchar_t * __restrict lo) 172 { 173 174 wc -= 0x10000; 175 *hi = (wc >> 10) + 0xD800; 176 *lo = (wc & 0x3FF) + 0xDC00; 177 } 178 179 static __inline bool 180 is_basic(wchar_t wc) 181 { 182 183 return ((uint32_t)wc <= 0x9F && wc != 0x24 && wc != 0x40 && 184 wc != 0x60); 185 } 186 187 static int 188 _citrus_UES_mbrtowc_priv(_UESEncodingInfo * __restrict ei, 189 wchar_t * __restrict pwc, char ** __restrict s, size_t n, 190 _UESState * __restrict psenc, size_t * __restrict nresult) 191 { 192 char *s0; 193 int ch, head, num, tail; 194 wchar_t hi, wc; 195 196 if (*s == NULL) { 197 _citrus_UES_init_state(ei, psenc); 198 *nresult = 0; 199 return (0); 200 } 201 s0 = *s; 202 203 hi = (wchar_t)0; 204 tail = 0; 205 206 surrogate: 207 wc = (wchar_t)0; 208 head = tail; 209 if (psenc->chlen == head) { 210 if (n-- < 1) 211 goto restart; 212 psenc->ch[psenc->chlen++] = *s0++; 213 } 214 ch = (unsigned char)psenc->ch[head++]; 215 if (ch == ESCAPE) { 216 if (psenc->chlen == head) { 217 if (n-- < 1) 218 goto restart; 219 psenc->ch[psenc->chlen++] = *s0++; 220 } 221 switch (psenc->ch[head]) { 222 case UCS2_ESC: 223 tail += 6; 224 break; 225 case UCS4_ESC: 226 if (ei->mode & MODE_C99) { 227 tail = 10; 228 break; 229 } 230 /*FALLTHROUGH*/ 231 default: 232 tail = 0; 233 } 234 ++head; 235 } 236 for (; head < tail; ++head) { 237 if (psenc->chlen == head) { 238 if (n-- < 1) { 239 restart: 240 *s = s0; 241 *nresult = (size_t)-2; 242 return (0); 243 } 244 psenc->ch[psenc->chlen++] = *s0++; 245 } 246 num = to_int((int)(unsigned char)psenc->ch[head]); 247 if (num < 0) { 248 tail = 0; 249 break; 250 } 251 wc = (wc << 4) | num; 252 } 253 head = 0; 254 switch (tail) { 255 case 0: 256 break; 257 case 6: 258 if (hi != (wchar_t)0) 259 break; 260 if ((ei->mode & MODE_C99) == 0) { 261 if (is_hi_surrogate(wc) != 0) { 262 hi = wc; 263 goto surrogate; 264 } 265 if ((uint32_t)wc <= 0x7F /* XXX */ || 266 is_lo_surrogate(wc) != 0) 267 break; 268 goto done; 269 } 270 /*FALLTHROUGH*/ 271 case 10: 272 if (is_basic(wc) == 0 && (uint32_t)wc <= UCS4_MAX && 273 is_hi_surrogate(wc) == 0 && is_lo_surrogate(wc) == 0) 274 goto done; 275 *nresult = (size_t)-1; 276 return (EILSEQ); 277 case 12: 278 if (is_lo_surrogate(wc) == 0) 279 break; 280 wc = surrogate_to_ucs(hi, wc); 281 goto done; 282 } 283 ch = (unsigned char)psenc->ch[0]; 284 head = psenc->chlen; 285 if (--head > 0) 286 memmove(&psenc->ch[0], &psenc->ch[1], head); 287 wc = (wchar_t)ch; 288 done: 289 psenc->chlen = head; 290 if (pwc != NULL) 291 *pwc = wc; 292 *nresult = (size_t)((wc == 0) ? 0 : (s0 - *s)); 293 *s = s0; 294 295 return (0); 296 } 297 298 static int 299 _citrus_UES_wcrtomb_priv(_UESEncodingInfo * __restrict ei, 300 char * __restrict s, size_t n, wchar_t wc, 301 _UESState * __restrict psenc, size_t * __restrict nresult) 302 { 303 wchar_t hi, lo; 304 305 if (psenc->chlen != 0) 306 return (EINVAL); 307 308 if ((ei->mode & MODE_C99) ? is_basic(wc) : (uint32_t)wc <= 0x7F) { 309 if (n-- < 1) 310 goto e2big; 311 psenc->ch[psenc->chlen++] = (char)wc; 312 } else if ((uint32_t)wc <= BMP_MAX) { 313 if (n < 6) 314 goto e2big; 315 psenc->chlen = to_str(&psenc->ch[0], wc, UCS2_BIT); 316 } else if ((ei->mode & MODE_C99) == 0 && (uint32_t)wc <= UCS2_MAX) { 317 if (n < 12) 318 goto e2big; 319 ucs_to_surrogate(wc, &hi, &lo); 320 psenc->chlen += to_str(&psenc->ch[0], hi, UCS2_BIT); 321 psenc->chlen += to_str(&psenc->ch[6], lo, UCS2_BIT); 322 } else if ((ei->mode & MODE_C99) && (uint32_t)wc <= UCS4_MAX) { 323 if (n < 10) 324 goto e2big; 325 psenc->chlen = to_str(&psenc->ch[0], wc, UCS4_BIT); 326 } else { 327 *nresult = (size_t)-1; 328 return (EILSEQ); 329 } 330 memcpy(s, psenc->ch, psenc->chlen); 331 *nresult = psenc->chlen; 332 psenc->chlen = 0; 333 334 return (0); 335 336 e2big: 337 *nresult = (size_t)-1; 338 return (E2BIG); 339 } 340 341 /*ARGSUSED*/ 342 static int 343 _citrus_UES_stdenc_wctocs(_UESEncodingInfo * __restrict ei __unused, 344 _csid_t * __restrict csid, _index_t * __restrict idx, wchar_t wc) 345 { 346 347 *csid = 0; 348 *idx = (_index_t)wc; 349 350 return (0); 351 } 352 353 static __inline int 354 /*ARGSUSED*/ 355 _citrus_UES_stdenc_cstowc(_UESEncodingInfo * __restrict ei __unused, 356 wchar_t * __restrict wc, _csid_t csid, _index_t idx) 357 { 358 359 if (csid != 0) 360 return (EILSEQ); 361 *wc = (wchar_t)idx; 362 363 return (0); 364 } 365 366 static __inline int 367 /*ARGSUSED*/ 368 _citrus_UES_stdenc_get_state_desc_generic(_UESEncodingInfo * __restrict ei __unused, 369 _UESState * __restrict psenc, int * __restrict rstate) 370 { 371 372 *rstate = (psenc->chlen == 0) ? _STDENC_SDGEN_INITIAL : 373 _STDENC_SDGEN_INCOMPLETE_CHAR; 374 return (0); 375 } 376 377 static void 378 /*ARGSUSED*/ 379 _citrus_UES_encoding_module_uninit(_UESEncodingInfo *ei __unused) 380 { 381 382 /* ei seems to be unused */ 383 } 384 385 static int 386 /*ARGSUSED*/ 387 _citrus_UES_encoding_module_init(_UESEncodingInfo * __restrict ei, 388 const void * __restrict var, size_t lenvar) 389 { 390 const char *p; 391 392 p = var; 393 memset((void *)ei, 0, sizeof(*ei)); 394 while (lenvar > 0) { 395 switch (_bcs_toupper(*p)) { 396 case 'C': 397 MATCH(C99, ei->mode |= MODE_C99); 398 break; 399 } 400 ++p; 401 --lenvar; 402 } 403 ei->mb_cur_max = (ei->mode & MODE_C99) ? 10 : 12; 404 405 return (0); 406 } 407 408 /* ---------------------------------------------------------------------- 409 * public interface for stdenc 410 */ 411 412 _CITRUS_STDENC_DECLS(UES); 413 _CITRUS_STDENC_DEF_OPS(UES); 414 415 #include "citrus_stdenc_template.h" 416