1 /* $FreeBSD$ */ 2 /* $NetBSD: citrus_ues.c,v 1.3 2012/02/12 13:51:29 wiz Exp $ */ 3 4 /*- 5 * SPDX-License-Identifier: BSD-2-Clause 6 * 7 * Copyright (c)2006 Citrus Project, 8 * All rights reserved. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 22 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 29 * SUCH DAMAGE. 30 */ 31 32 #include <sys/cdefs.h> 33 34 #include <assert.h> 35 #include <errno.h> 36 #include <limits.h> 37 #include <stdio.h> 38 #include <stdint.h> 39 #include <stdlib.h> 40 #include <string.h> 41 #include <wchar.h> 42 43 #include "citrus_namespace.h" 44 #include "citrus_types.h" 45 #include "citrus_bcs.h" 46 #include "citrus_module.h" 47 #include "citrus_stdenc.h" 48 #include "citrus_ues.h" 49 50 typedef struct { 51 size_t mb_cur_max; 52 int mode; 53 #define MODE_C99 1 54 } _UESEncodingInfo; 55 56 typedef struct { 57 int chlen; 58 char ch[12]; 59 } _UESState; 60 61 #define _CEI_TO_EI(_cei_) (&(_cei_)->ei) 62 #define _CEI_TO_STATE(_cei_, _func_) (_cei_)->states.s_##_func_ 63 64 #define _FUNCNAME(m) _citrus_UES_##m 65 #define _ENCODING_INFO _UESEncodingInfo 66 #define _ENCODING_STATE _UESState 67 #define _ENCODING_MB_CUR_MAX(_ei_) (_ei_)->mb_cur_max 68 #define _ENCODING_IS_STATE_DEPENDENT 0 69 #define _STATE_NEEDS_EXPLICIT_INIT(_ps_) 0 70 71 static __inline void 72 /*ARGSUSED*/ 73 _citrus_UES_init_state(_UESEncodingInfo * __restrict ei __unused, 74 _UESState * __restrict psenc) 75 { 76 77 psenc->chlen = 0; 78 } 79 80 #if 0 81 static __inline void 82 /*ARGSUSED*/ 83 _citrus_UES_pack_state(_UESEncodingInfo * __restrict ei __unused, 84 void *__restrict pspriv, const _UESState * __restrict psenc) 85 { 86 87 memcpy(pspriv, (const void *)psenc, sizeof(*psenc)); 88 } 89 90 static __inline void 91 /*ARGSUSED*/ 92 _citrus_UES_unpack_state(_UESEncodingInfo * __restrict ei __unused, 93 _UESState * __restrict psenc, const void * __restrict pspriv) 94 { 95 96 memcpy((void *)psenc, pspriv, sizeof(*psenc)); 97 } 98 #endif 99 100 static __inline int 101 to_int(int ch) 102 { 103 104 if (ch >= '0' && ch <= '9') 105 return (ch - '0'); 106 else if (ch >= 'A' && ch <= 'F') 107 return ((ch - 'A') + 10); 108 else if (ch >= 'a' && ch <= 'f') 109 return ((ch - 'a') + 10); 110 return (-1); 111 } 112 113 #define ESCAPE '\\' 114 #define UCS2_ESC 'u' 115 #define UCS4_ESC 'U' 116 117 #define UCS2_BIT 16 118 #define UCS4_BIT 32 119 #define BMP_MAX UINT32_C(0xFFFF) 120 #define UCS2_MAX UINT32_C(0x10FFFF) 121 #define UCS4_MAX UINT32_C(0x7FFFFFFF) 122 123 static const char *xdig = "0123456789abcdef"; 124 125 static __inline int 126 to_str(char *s, wchar_t wc, int bit) 127 { 128 char *p; 129 130 p = s; 131 *p++ = ESCAPE; 132 switch (bit) { 133 case UCS2_BIT: 134 *p++ = UCS2_ESC; 135 break; 136 case UCS4_BIT: 137 *p++ = UCS4_ESC; 138 break; 139 default: 140 abort(); 141 } 142 do { 143 *p++ = xdig[(wc >> (bit -= 4)) & 0xF]; 144 } while (bit > 0); 145 return (p - s); 146 } 147 148 static __inline bool 149 is_hi_surrogate(wchar_t wc) 150 { 151 152 return (wc >= 0xD800 && wc <= 0xDBFF); 153 } 154 155 static __inline bool 156 is_lo_surrogate(wchar_t wc) 157 { 158 159 return (wc >= 0xDC00 && wc <= 0xDFFF); 160 } 161 162 static __inline wchar_t 163 surrogate_to_ucs(wchar_t hi, wchar_t lo) 164 { 165 166 hi -= 0xD800; 167 lo -= 0xDC00; 168 return ((hi << 10 | lo) + 0x10000); 169 } 170 171 static __inline void 172 ucs_to_surrogate(wchar_t wc, wchar_t * __restrict hi, wchar_t * __restrict lo) 173 { 174 175 wc -= 0x10000; 176 *hi = (wc >> 10) + 0xD800; 177 *lo = (wc & 0x3FF) + 0xDC00; 178 } 179 180 static __inline bool 181 is_basic(wchar_t wc) 182 { 183 184 return ((uint32_t)wc <= 0x9F && wc != 0x24 && wc != 0x40 && 185 wc != 0x60); 186 } 187 188 static int 189 _citrus_UES_mbrtowc_priv(_UESEncodingInfo * __restrict ei, 190 wchar_t * __restrict pwc, char ** __restrict s, size_t n, 191 _UESState * __restrict psenc, size_t * __restrict nresult) 192 { 193 char *s0; 194 int ch, head, num, tail; 195 wchar_t hi, wc; 196 197 if (*s == NULL) { 198 _citrus_UES_init_state(ei, psenc); 199 *nresult = 0; 200 return (0); 201 } 202 s0 = *s; 203 204 hi = (wchar_t)0; 205 tail = 0; 206 207 surrogate: 208 wc = (wchar_t)0; 209 head = tail; 210 if (psenc->chlen == head) { 211 if (n-- < 1) 212 goto restart; 213 psenc->ch[psenc->chlen++] = *s0++; 214 } 215 ch = (unsigned char)psenc->ch[head++]; 216 if (ch == ESCAPE) { 217 if (psenc->chlen == head) { 218 if (n-- < 1) 219 goto restart; 220 psenc->ch[psenc->chlen++] = *s0++; 221 } 222 switch (psenc->ch[head]) { 223 case UCS2_ESC: 224 tail += 6; 225 break; 226 case UCS4_ESC: 227 if (ei->mode & MODE_C99) { 228 tail = 10; 229 break; 230 } 231 /*FALLTHROUGH*/ 232 default: 233 tail = 0; 234 } 235 ++head; 236 } 237 for (; head < tail; ++head) { 238 if (psenc->chlen == head) { 239 if (n-- < 1) { 240 restart: 241 *s = s0; 242 *nresult = (size_t)-2; 243 return (0); 244 } 245 psenc->ch[psenc->chlen++] = *s0++; 246 } 247 num = to_int((int)(unsigned char)psenc->ch[head]); 248 if (num < 0) { 249 tail = 0; 250 break; 251 } 252 wc = (wc << 4) | num; 253 } 254 head = 0; 255 switch (tail) { 256 case 0: 257 break; 258 case 6: 259 if (hi != (wchar_t)0) 260 break; 261 if ((ei->mode & MODE_C99) == 0) { 262 if (is_hi_surrogate(wc) != 0) { 263 hi = wc; 264 goto surrogate; 265 } 266 if ((uint32_t)wc <= 0x7F /* XXX */ || 267 is_lo_surrogate(wc) != 0) 268 break; 269 goto done; 270 } 271 /*FALLTHROUGH*/ 272 case 10: 273 if (is_basic(wc) == 0 && (uint32_t)wc <= UCS4_MAX && 274 is_hi_surrogate(wc) == 0 && is_lo_surrogate(wc) == 0) 275 goto done; 276 *nresult = (size_t)-1; 277 return (EILSEQ); 278 case 12: 279 if (is_lo_surrogate(wc) == 0) 280 break; 281 wc = surrogate_to_ucs(hi, wc); 282 goto done; 283 } 284 ch = (unsigned char)psenc->ch[0]; 285 head = psenc->chlen; 286 if (--head > 0) 287 memmove(&psenc->ch[0], &psenc->ch[1], head); 288 wc = (wchar_t)ch; 289 done: 290 psenc->chlen = head; 291 if (pwc != NULL) 292 *pwc = wc; 293 *nresult = (size_t)((wc == 0) ? 0 : (s0 - *s)); 294 *s = s0; 295 296 return (0); 297 } 298 299 static int 300 _citrus_UES_wcrtomb_priv(_UESEncodingInfo * __restrict ei, 301 char * __restrict s, size_t n, wchar_t wc, 302 _UESState * __restrict psenc, size_t * __restrict nresult) 303 { 304 wchar_t hi, lo; 305 306 if (psenc->chlen != 0) 307 return (EINVAL); 308 309 if ((ei->mode & MODE_C99) ? is_basic(wc) : (uint32_t)wc <= 0x7F) { 310 if (n-- < 1) 311 goto e2big; 312 psenc->ch[psenc->chlen++] = (char)wc; 313 } else if ((uint32_t)wc <= BMP_MAX) { 314 if (n < 6) 315 goto e2big; 316 psenc->chlen = to_str(&psenc->ch[0], wc, UCS2_BIT); 317 } else if ((ei->mode & MODE_C99) == 0 && (uint32_t)wc <= UCS2_MAX) { 318 if (n < 12) 319 goto e2big; 320 ucs_to_surrogate(wc, &hi, &lo); 321 psenc->chlen += to_str(&psenc->ch[0], hi, UCS2_BIT); 322 psenc->chlen += to_str(&psenc->ch[6], lo, UCS2_BIT); 323 } else if ((ei->mode & MODE_C99) && (uint32_t)wc <= UCS4_MAX) { 324 if (n < 10) 325 goto e2big; 326 psenc->chlen = to_str(&psenc->ch[0], wc, UCS4_BIT); 327 } else { 328 *nresult = (size_t)-1; 329 return (EILSEQ); 330 } 331 memcpy(s, psenc->ch, psenc->chlen); 332 *nresult = psenc->chlen; 333 psenc->chlen = 0; 334 335 return (0); 336 337 e2big: 338 *nresult = (size_t)-1; 339 return (E2BIG); 340 } 341 342 /*ARGSUSED*/ 343 static int 344 _citrus_UES_stdenc_wctocs(_UESEncodingInfo * __restrict ei __unused, 345 _csid_t * __restrict csid, _index_t * __restrict idx, wchar_t wc) 346 { 347 348 *csid = 0; 349 *idx = (_index_t)wc; 350 351 return (0); 352 } 353 354 static __inline int 355 /*ARGSUSED*/ 356 _citrus_UES_stdenc_cstowc(_UESEncodingInfo * __restrict ei __unused, 357 wchar_t * __restrict wc, _csid_t csid, _index_t idx) 358 { 359 360 if (csid != 0) 361 return (EILSEQ); 362 *wc = (wchar_t)idx; 363 364 return (0); 365 } 366 367 static __inline int 368 /*ARGSUSED*/ 369 _citrus_UES_stdenc_get_state_desc_generic(_UESEncodingInfo * __restrict ei __unused, 370 _UESState * __restrict psenc, int * __restrict rstate) 371 { 372 373 *rstate = (psenc->chlen == 0) ? _STDENC_SDGEN_INITIAL : 374 _STDENC_SDGEN_INCOMPLETE_CHAR; 375 return (0); 376 } 377 378 static void 379 /*ARGSUSED*/ 380 _citrus_UES_encoding_module_uninit(_UESEncodingInfo *ei __unused) 381 { 382 383 /* ei seems to be unused */ 384 } 385 386 static int 387 /*ARGSUSED*/ 388 _citrus_UES_encoding_module_init(_UESEncodingInfo * __restrict ei, 389 const void * __restrict var, size_t lenvar) 390 { 391 const char *p; 392 393 p = var; 394 memset((void *)ei, 0, sizeof(*ei)); 395 while (lenvar > 0) { 396 switch (_bcs_toupper(*p)) { 397 case 'C': 398 MATCH(C99, ei->mode |= MODE_C99); 399 break; 400 } 401 ++p; 402 --lenvar; 403 } 404 ei->mb_cur_max = (ei->mode & MODE_C99) ? 10 : 12; 405 406 return (0); 407 } 408 409 /* ---------------------------------------------------------------------- 410 * public interface for stdenc 411 */ 412 413 _CITRUS_STDENC_DECLS(UES); 414 _CITRUS_STDENC_DEF_OPS(UES); 415 416 #include "citrus_stdenc_template.h" 417