1ad30f8e7SGabor Kovesdan /* $FreeBSD$ */ 2ad30f8e7SGabor Kovesdan /* $NetBSD: citrus_utf1632.c,v 1.9 2008/06/14 16:01:08 tnozaki Exp $ */ 3ad30f8e7SGabor Kovesdan 4ad30f8e7SGabor Kovesdan /*- 5*5e53a4f9SPedro F. Giffuni * SPDX-License-Identifier: BSD-2-Clause 6*5e53a4f9SPedro F. Giffuni * 7ad30f8e7SGabor Kovesdan * Copyright (c)2003 Citrus Project, 8ad30f8e7SGabor Kovesdan * All rights reserved. 9ad30f8e7SGabor Kovesdan * 10ad30f8e7SGabor Kovesdan * Redistribution and use in source and binary forms, with or without 11ad30f8e7SGabor Kovesdan * modification, are permitted provided that the following conditions 12ad30f8e7SGabor Kovesdan * are met: 13ad30f8e7SGabor Kovesdan * 1. Redistributions of source code must retain the above copyright 14ad30f8e7SGabor Kovesdan * notice, this list of conditions and the following disclaimer. 15ad30f8e7SGabor Kovesdan * 2. Redistributions in binary form must reproduce the above copyright 16ad30f8e7SGabor Kovesdan * notice, this list of conditions and the following disclaimer in the 17ad30f8e7SGabor Kovesdan * documentation and/or other materials provided with the distribution. 18ad30f8e7SGabor Kovesdan * 19ad30f8e7SGabor Kovesdan * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 20ad30f8e7SGabor Kovesdan * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 21ad30f8e7SGabor Kovesdan * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 22ad30f8e7SGabor Kovesdan * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 23ad30f8e7SGabor Kovesdan * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 24ad30f8e7SGabor Kovesdan * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 25ad30f8e7SGabor Kovesdan * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 26ad30f8e7SGabor Kovesdan * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 27ad30f8e7SGabor Kovesdan * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 28ad30f8e7SGabor Kovesdan * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 29ad30f8e7SGabor Kovesdan * SUCH DAMAGE. 30ad30f8e7SGabor Kovesdan */ 31ad30f8e7SGabor Kovesdan 32ad30f8e7SGabor Kovesdan #include <sys/cdefs.h> 33ad30f8e7SGabor Kovesdan #include <sys/endian.h> 34ad30f8e7SGabor Kovesdan #include <sys/types.h> 35ad30f8e7SGabor Kovesdan 36ad30f8e7SGabor Kovesdan #include <assert.h> 37ad30f8e7SGabor Kovesdan #include <errno.h> 38ad30f8e7SGabor Kovesdan #include <limits.h> 39ad30f8e7SGabor Kovesdan #include <stddef.h> 40ad30f8e7SGabor Kovesdan #include <stdio.h> 41ad30f8e7SGabor Kovesdan #include <stdlib.h> 42ad30f8e7SGabor Kovesdan #include <string.h> 43ad30f8e7SGabor Kovesdan #include <wchar.h> 44ad30f8e7SGabor Kovesdan 45ad30f8e7SGabor Kovesdan #include "citrus_namespace.h" 46ad30f8e7SGabor Kovesdan #include "citrus_types.h" 47ad30f8e7SGabor Kovesdan #include "citrus_module.h" 48ad30f8e7SGabor Kovesdan #include "citrus_stdenc.h" 49ad30f8e7SGabor Kovesdan #include "citrus_bcs.h" 50ad30f8e7SGabor Kovesdan 51ad30f8e7SGabor Kovesdan #include "citrus_utf1632.h" 52ad30f8e7SGabor Kovesdan 53ad30f8e7SGabor Kovesdan 54ad30f8e7SGabor Kovesdan /* ---------------------------------------------------------------------- 55ad30f8e7SGabor Kovesdan * private stuffs used by templates 56ad30f8e7SGabor Kovesdan */ 57ad30f8e7SGabor Kovesdan 58ad30f8e7SGabor Kovesdan typedef struct { 59ad30f8e7SGabor Kovesdan int chlen; 60ad30f8e7SGabor Kovesdan int current_endian; 61ad30f8e7SGabor Kovesdan uint8_t ch[4]; 62ad30f8e7SGabor Kovesdan } _UTF1632State; 63ad30f8e7SGabor Kovesdan 64ad30f8e7SGabor Kovesdan #define _ENDIAN_UNKNOWN 0 65ad30f8e7SGabor Kovesdan #define _ENDIAN_BIG 1 66ad30f8e7SGabor Kovesdan #define _ENDIAN_LITTLE 2 67ad30f8e7SGabor Kovesdan #if BYTE_ORDER == BIG_ENDIAN 68ad30f8e7SGabor Kovesdan #define _ENDIAN_INTERNAL _ENDIAN_BIG 69ad30f8e7SGabor Kovesdan #define _ENDIAN_SWAPPED _ENDIAN_LITTLE 70ad30f8e7SGabor Kovesdan #else 71ad30f8e7SGabor Kovesdan #define _ENDIAN_INTERNAL _ENDIAN_LITTLE 72ad30f8e7SGabor Kovesdan #define _ENDIAN_SWAPPED _ENDIAN_BIG 73ad30f8e7SGabor Kovesdan #endif 74ad30f8e7SGabor Kovesdan #define _MODE_UTF32 0x00000001U 75ad30f8e7SGabor Kovesdan #define _MODE_FORCE_ENDIAN 0x00000002U 76ad30f8e7SGabor Kovesdan 77ad30f8e7SGabor Kovesdan typedef struct { 78ad30f8e7SGabor Kovesdan int preffered_endian; 79ad30f8e7SGabor Kovesdan unsigned int cur_max; 80ad30f8e7SGabor Kovesdan uint32_t mode; 81ad30f8e7SGabor Kovesdan } _UTF1632EncodingInfo; 82ad30f8e7SGabor Kovesdan 83ad30f8e7SGabor Kovesdan #define _FUNCNAME(m) _citrus_UTF1632_##m 84ad30f8e7SGabor Kovesdan #define _ENCODING_INFO _UTF1632EncodingInfo 85ad30f8e7SGabor Kovesdan #define _ENCODING_STATE _UTF1632State 86ad30f8e7SGabor Kovesdan #define _ENCODING_MB_CUR_MAX(_ei_) ((_ei_)->cur_max) 87ad30f8e7SGabor Kovesdan #define _ENCODING_IS_STATE_DEPENDENT 0 88ad30f8e7SGabor Kovesdan #define _STATE_NEEDS_EXPLICIT_INIT(_ps_) 0 89ad30f8e7SGabor Kovesdan 90ad30f8e7SGabor Kovesdan 91ad30f8e7SGabor Kovesdan static __inline void 92ad30f8e7SGabor Kovesdan /*ARGSUSED*/ 93ad30f8e7SGabor Kovesdan _citrus_UTF1632_init_state(_UTF1632EncodingInfo *ei __unused, 94ad30f8e7SGabor Kovesdan _UTF1632State *s) 95ad30f8e7SGabor Kovesdan { 96ad30f8e7SGabor Kovesdan 97ad30f8e7SGabor Kovesdan memset(s, 0, sizeof(*s)); 98ad30f8e7SGabor Kovesdan } 99ad30f8e7SGabor Kovesdan 100ad30f8e7SGabor Kovesdan static int 101ad30f8e7SGabor Kovesdan _citrus_UTF1632_mbrtowc_priv(_UTF1632EncodingInfo *ei, wchar_t *pwc, 1021243a98eSTijl Coosemans char **s, size_t n, _UTF1632State *psenc, size_t *nresult) 103ad30f8e7SGabor Kovesdan { 1041243a98eSTijl Coosemans char *s0; 105ad30f8e7SGabor Kovesdan size_t result; 106ad30f8e7SGabor Kovesdan wchar_t wc = L'\0'; 107ad30f8e7SGabor Kovesdan int chlenbak, endian, needlen; 108ad30f8e7SGabor Kovesdan 109ad30f8e7SGabor Kovesdan s0 = *s; 110ad30f8e7SGabor Kovesdan 111ad30f8e7SGabor Kovesdan if (s0 == NULL) { 112ad30f8e7SGabor Kovesdan _citrus_UTF1632_init_state(ei, psenc); 113ad30f8e7SGabor Kovesdan *nresult = 0; /* state independent */ 114ad30f8e7SGabor Kovesdan return (0); 115ad30f8e7SGabor Kovesdan } 116ad30f8e7SGabor Kovesdan 117ad30f8e7SGabor Kovesdan result = 0; 118ad30f8e7SGabor Kovesdan chlenbak = psenc->chlen; 119ad30f8e7SGabor Kovesdan 120ad30f8e7SGabor Kovesdan refetch: 121ad30f8e7SGabor Kovesdan needlen = ((ei->mode & _MODE_UTF32) != 0 || chlenbak >= 2) ? 4 : 2; 122ad30f8e7SGabor Kovesdan 123ad30f8e7SGabor Kovesdan while (chlenbak < needlen) { 124ad30f8e7SGabor Kovesdan if (n == 0) 125ad30f8e7SGabor Kovesdan goto restart; 126ad30f8e7SGabor Kovesdan psenc->ch[chlenbak++] = *s0++; 127ad30f8e7SGabor Kovesdan n--; 128ad30f8e7SGabor Kovesdan result++; 129ad30f8e7SGabor Kovesdan } 130ad30f8e7SGabor Kovesdan 131ad30f8e7SGabor Kovesdan /* judge endian marker */ 132ad30f8e7SGabor Kovesdan if ((ei->mode & _MODE_UTF32) == 0) { 133ad30f8e7SGabor Kovesdan /* UTF16 */ 134ad30f8e7SGabor Kovesdan if (psenc->ch[0] == 0xFE && psenc->ch[1] == 0xFF) { 135ad30f8e7SGabor Kovesdan psenc->current_endian = _ENDIAN_BIG; 136ad30f8e7SGabor Kovesdan chlenbak = 0; 137ad30f8e7SGabor Kovesdan goto refetch; 138ad30f8e7SGabor Kovesdan } else if (psenc->ch[0] == 0xFF && psenc->ch[1] == 0xFE) { 139ad30f8e7SGabor Kovesdan psenc->current_endian = _ENDIAN_LITTLE; 140ad30f8e7SGabor Kovesdan chlenbak = 0; 141ad30f8e7SGabor Kovesdan goto refetch; 142ad30f8e7SGabor Kovesdan } 143ad30f8e7SGabor Kovesdan } else { 144ad30f8e7SGabor Kovesdan /* UTF32 */ 145ad30f8e7SGabor Kovesdan if (psenc->ch[0] == 0x00 && psenc->ch[1] == 0x00 && 146ad30f8e7SGabor Kovesdan psenc->ch[2] == 0xFE && psenc->ch[3] == 0xFF) { 147ad30f8e7SGabor Kovesdan psenc->current_endian = _ENDIAN_BIG; 148ad30f8e7SGabor Kovesdan chlenbak = 0; 149ad30f8e7SGabor Kovesdan goto refetch; 150ad30f8e7SGabor Kovesdan } else if (psenc->ch[0] == 0xFF && psenc->ch[1] == 0xFE && 151ad30f8e7SGabor Kovesdan psenc->ch[2] == 0x00 && psenc->ch[3] == 0x00) { 152ad30f8e7SGabor Kovesdan psenc->current_endian = _ENDIAN_LITTLE; 153ad30f8e7SGabor Kovesdan chlenbak = 0; 154ad30f8e7SGabor Kovesdan goto refetch; 155ad30f8e7SGabor Kovesdan } 156ad30f8e7SGabor Kovesdan } 157ad30f8e7SGabor Kovesdan endian = ((ei->mode & _MODE_FORCE_ENDIAN) != 0 || 158ad30f8e7SGabor Kovesdan psenc->current_endian == _ENDIAN_UNKNOWN) ? ei->preffered_endian : 159ad30f8e7SGabor Kovesdan psenc->current_endian; 160ad30f8e7SGabor Kovesdan 161ad30f8e7SGabor Kovesdan /* get wc */ 162ad30f8e7SGabor Kovesdan if ((ei->mode & _MODE_UTF32) == 0) { 163ad30f8e7SGabor Kovesdan /* UTF16 */ 164ad30f8e7SGabor Kovesdan if (needlen == 2) { 165ad30f8e7SGabor Kovesdan switch (endian) { 166ad30f8e7SGabor Kovesdan case _ENDIAN_LITTLE: 167ad30f8e7SGabor Kovesdan wc = (psenc->ch[0] | 168ad30f8e7SGabor Kovesdan ((wchar_t)psenc->ch[1] << 8)); 169ad30f8e7SGabor Kovesdan break; 170ad30f8e7SGabor Kovesdan case _ENDIAN_BIG: 171ad30f8e7SGabor Kovesdan wc = (psenc->ch[1] | 172ad30f8e7SGabor Kovesdan ((wchar_t)psenc->ch[0] << 8)); 173ad30f8e7SGabor Kovesdan break; 174ad30f8e7SGabor Kovesdan default: 175ad30f8e7SGabor Kovesdan goto ilseq; 176ad30f8e7SGabor Kovesdan } 177ad30f8e7SGabor Kovesdan if (wc >= 0xD800 && wc <= 0xDBFF) { 178ad30f8e7SGabor Kovesdan /* surrogate high */ 179ad30f8e7SGabor Kovesdan needlen = 4; 180ad30f8e7SGabor Kovesdan goto refetch; 181ad30f8e7SGabor Kovesdan } 182ad30f8e7SGabor Kovesdan } else { 183ad30f8e7SGabor Kovesdan /* surrogate low */ 184ad30f8e7SGabor Kovesdan wc -= 0xD800; /* wc : surrogate high (see above) */ 185ad30f8e7SGabor Kovesdan wc <<= 10; 186ad30f8e7SGabor Kovesdan switch (endian) { 187ad30f8e7SGabor Kovesdan case _ENDIAN_LITTLE: 188ad30f8e7SGabor Kovesdan if (psenc->ch[3] < 0xDC || psenc->ch[3] > 0xDF) 189ad30f8e7SGabor Kovesdan goto ilseq; 190ad30f8e7SGabor Kovesdan wc |= psenc->ch[2]; 191ad30f8e7SGabor Kovesdan wc |= (wchar_t)(psenc->ch[3] & 3) << 8; 192ad30f8e7SGabor Kovesdan break; 193ad30f8e7SGabor Kovesdan case _ENDIAN_BIG: 194ad30f8e7SGabor Kovesdan if (psenc->ch[2]<0xDC || psenc->ch[2]>0xDF) 195ad30f8e7SGabor Kovesdan goto ilseq; 196ad30f8e7SGabor Kovesdan wc |= psenc->ch[3]; 197ad30f8e7SGabor Kovesdan wc |= (wchar_t)(psenc->ch[2] & 3) << 8; 198ad30f8e7SGabor Kovesdan break; 199ad30f8e7SGabor Kovesdan default: 200ad30f8e7SGabor Kovesdan goto ilseq; 201ad30f8e7SGabor Kovesdan } 202ad30f8e7SGabor Kovesdan wc += 0x10000; 203ad30f8e7SGabor Kovesdan } 204ad30f8e7SGabor Kovesdan } else { 205ad30f8e7SGabor Kovesdan /* UTF32 */ 206ad30f8e7SGabor Kovesdan switch (endian) { 207ad30f8e7SGabor Kovesdan case _ENDIAN_LITTLE: 208ad30f8e7SGabor Kovesdan wc = (psenc->ch[0] | 209ad30f8e7SGabor Kovesdan ((wchar_t)psenc->ch[1] << 8) | 210ad30f8e7SGabor Kovesdan ((wchar_t)psenc->ch[2] << 16) | 211ad30f8e7SGabor Kovesdan ((wchar_t)psenc->ch[3] << 24)); 212ad30f8e7SGabor Kovesdan break; 213ad30f8e7SGabor Kovesdan case _ENDIAN_BIG: 214ad30f8e7SGabor Kovesdan wc = (psenc->ch[3] | 215ad30f8e7SGabor Kovesdan ((wchar_t)psenc->ch[2] << 8) | 216ad30f8e7SGabor Kovesdan ((wchar_t)psenc->ch[1] << 16) | 217ad30f8e7SGabor Kovesdan ((wchar_t)psenc->ch[0] << 24)); 218ad30f8e7SGabor Kovesdan break; 219ad30f8e7SGabor Kovesdan default: 220ad30f8e7SGabor Kovesdan goto ilseq; 221ad30f8e7SGabor Kovesdan } 222ad30f8e7SGabor Kovesdan if (wc >= 0xD800 && wc <= 0xDFFF) 223ad30f8e7SGabor Kovesdan goto ilseq; 224ad30f8e7SGabor Kovesdan } 225ad30f8e7SGabor Kovesdan 226ad30f8e7SGabor Kovesdan 227ad30f8e7SGabor Kovesdan *pwc = wc; 228ad30f8e7SGabor Kovesdan psenc->chlen = 0; 229ad30f8e7SGabor Kovesdan *nresult = result; 230ad30f8e7SGabor Kovesdan *s = s0; 231ad30f8e7SGabor Kovesdan 232ad30f8e7SGabor Kovesdan return (0); 233ad30f8e7SGabor Kovesdan 234ad30f8e7SGabor Kovesdan ilseq: 235ad30f8e7SGabor Kovesdan *nresult = (size_t)-1; 236ad30f8e7SGabor Kovesdan psenc->chlen = 0; 237ad30f8e7SGabor Kovesdan return (EILSEQ); 238ad30f8e7SGabor Kovesdan 239ad30f8e7SGabor Kovesdan restart: 240ad30f8e7SGabor Kovesdan *nresult = (size_t)-2; 241ad30f8e7SGabor Kovesdan psenc->chlen = chlenbak; 242ad30f8e7SGabor Kovesdan *s = s0; 243ad30f8e7SGabor Kovesdan return (0); 244ad30f8e7SGabor Kovesdan } 245ad30f8e7SGabor Kovesdan 246ad30f8e7SGabor Kovesdan static int 247ad30f8e7SGabor Kovesdan _citrus_UTF1632_wcrtomb_priv(_UTF1632EncodingInfo *ei, char *s, size_t n, 248ad30f8e7SGabor Kovesdan wchar_t wc, _UTF1632State *psenc, size_t *nresult) 249ad30f8e7SGabor Kovesdan { 250ad30f8e7SGabor Kovesdan wchar_t wc2; 251ad30f8e7SGabor Kovesdan static const char _bom[4] = { 252ad30f8e7SGabor Kovesdan 0x00, 0x00, 0xFE, 0xFF, 253ad30f8e7SGabor Kovesdan }; 254ad30f8e7SGabor Kovesdan const char *bom = &_bom[0]; 255ad30f8e7SGabor Kovesdan size_t cnt; 256ad30f8e7SGabor Kovesdan 257ad30f8e7SGabor Kovesdan cnt = (size_t)0; 258ad30f8e7SGabor Kovesdan if (psenc->current_endian == _ENDIAN_UNKNOWN) { 259ad30f8e7SGabor Kovesdan if ((ei->mode & _MODE_FORCE_ENDIAN) == 0) { 260ad30f8e7SGabor Kovesdan if (ei->mode & _MODE_UTF32) 261ad30f8e7SGabor Kovesdan cnt = 4; 262ad30f8e7SGabor Kovesdan else { 263ad30f8e7SGabor Kovesdan cnt = 2; 264ad30f8e7SGabor Kovesdan bom += 2; 265ad30f8e7SGabor Kovesdan } 266ad30f8e7SGabor Kovesdan if (n < cnt) 267ad30f8e7SGabor Kovesdan goto e2big; 268ad30f8e7SGabor Kovesdan memcpy(s, bom, cnt); 269ad30f8e7SGabor Kovesdan s += cnt, n -= cnt; 270ad30f8e7SGabor Kovesdan } 271ad30f8e7SGabor Kovesdan psenc->current_endian = ei->preffered_endian; 272ad30f8e7SGabor Kovesdan } 273ad30f8e7SGabor Kovesdan 274ad30f8e7SGabor Kovesdan wc2 = 0; 275ad30f8e7SGabor Kovesdan if ((ei->mode & _MODE_UTF32)==0) { 276ad30f8e7SGabor Kovesdan /* UTF16 */ 277ad30f8e7SGabor Kovesdan if (wc > 0xFFFF) { 278ad30f8e7SGabor Kovesdan /* surrogate */ 279ad30f8e7SGabor Kovesdan if (wc > 0x10FFFF) 280ad30f8e7SGabor Kovesdan goto ilseq; 281ad30f8e7SGabor Kovesdan if (n < 4) 282ad30f8e7SGabor Kovesdan goto e2big; 283ad30f8e7SGabor Kovesdan cnt += 4; 284ad30f8e7SGabor Kovesdan wc -= 0x10000; 285ad30f8e7SGabor Kovesdan wc2 = (wc & 0x3FF) | 0xDC00; 286ad30f8e7SGabor Kovesdan wc = (wc>>10) | 0xD800; 287ad30f8e7SGabor Kovesdan } else { 288ad30f8e7SGabor Kovesdan if (n < 2) 289ad30f8e7SGabor Kovesdan goto e2big; 290ad30f8e7SGabor Kovesdan cnt += 2; 291ad30f8e7SGabor Kovesdan } 292ad30f8e7SGabor Kovesdan 293ad30f8e7SGabor Kovesdan surrogate: 294ad30f8e7SGabor Kovesdan switch (psenc->current_endian) { 295ad30f8e7SGabor Kovesdan case _ENDIAN_BIG: 296ad30f8e7SGabor Kovesdan s[1] = wc; 297ad30f8e7SGabor Kovesdan s[0] = (wc >>= 8); 298ad30f8e7SGabor Kovesdan break; 299ad30f8e7SGabor Kovesdan case _ENDIAN_LITTLE: 300ad30f8e7SGabor Kovesdan s[0] = wc; 301ad30f8e7SGabor Kovesdan s[1] = (wc >>= 8); 302ad30f8e7SGabor Kovesdan break; 303ad30f8e7SGabor Kovesdan } 304ad30f8e7SGabor Kovesdan if (wc2 != 0) { 305ad30f8e7SGabor Kovesdan wc = wc2; 306ad30f8e7SGabor Kovesdan wc2 = 0; 307ad30f8e7SGabor Kovesdan s += 2; 308ad30f8e7SGabor Kovesdan goto surrogate; 309ad30f8e7SGabor Kovesdan } 310ad30f8e7SGabor Kovesdan } else { 311ad30f8e7SGabor Kovesdan /* UTF32 */ 312ad30f8e7SGabor Kovesdan if (wc >= 0xD800 && wc <= 0xDFFF) 313ad30f8e7SGabor Kovesdan goto ilseq; 314ad30f8e7SGabor Kovesdan if (n < 4) 315ad30f8e7SGabor Kovesdan goto e2big; 316ad30f8e7SGabor Kovesdan cnt += 4; 317ad30f8e7SGabor Kovesdan switch (psenc->current_endian) { 318ad30f8e7SGabor Kovesdan case _ENDIAN_BIG: 319ad30f8e7SGabor Kovesdan s[3] = wc; 320ad30f8e7SGabor Kovesdan s[2] = (wc >>= 8); 321ad30f8e7SGabor Kovesdan s[1] = (wc >>= 8); 322ad30f8e7SGabor Kovesdan s[0] = (wc >>= 8); 323ad30f8e7SGabor Kovesdan break; 324ad30f8e7SGabor Kovesdan case _ENDIAN_LITTLE: 325ad30f8e7SGabor Kovesdan s[0] = wc; 326ad30f8e7SGabor Kovesdan s[1] = (wc >>= 8); 327ad30f8e7SGabor Kovesdan s[2] = (wc >>= 8); 328ad30f8e7SGabor Kovesdan s[3] = (wc >>= 8); 329ad30f8e7SGabor Kovesdan break; 330ad30f8e7SGabor Kovesdan } 331ad30f8e7SGabor Kovesdan } 332ad30f8e7SGabor Kovesdan *nresult = cnt; 333ad30f8e7SGabor Kovesdan 334ad30f8e7SGabor Kovesdan return (0); 335ad30f8e7SGabor Kovesdan 336ad30f8e7SGabor Kovesdan ilseq: 337ad30f8e7SGabor Kovesdan *nresult = (size_t)-1; 338ad30f8e7SGabor Kovesdan return (EILSEQ); 339ad30f8e7SGabor Kovesdan e2big: 340ad30f8e7SGabor Kovesdan *nresult = (size_t)-1; 341ad30f8e7SGabor Kovesdan return (E2BIG); 342ad30f8e7SGabor Kovesdan } 343ad30f8e7SGabor Kovesdan 344ad30f8e7SGabor Kovesdan static void 345ad30f8e7SGabor Kovesdan parse_variable(_UTF1632EncodingInfo * __restrict ei, 346ad30f8e7SGabor Kovesdan const void * __restrict var, size_t lenvar) 347ad30f8e7SGabor Kovesdan { 348ad30f8e7SGabor Kovesdan const char *p; 349ad30f8e7SGabor Kovesdan 350ad30f8e7SGabor Kovesdan p = var; 351ad30f8e7SGabor Kovesdan while (lenvar > 0) { 352ad30f8e7SGabor Kovesdan switch (*p) { 353ad30f8e7SGabor Kovesdan case 'B': 354ad30f8e7SGabor Kovesdan case 'b': 355ad30f8e7SGabor Kovesdan MATCH(big, ei->preffered_endian = _ENDIAN_BIG); 356ad30f8e7SGabor Kovesdan break; 357ad30f8e7SGabor Kovesdan case 'L': 358ad30f8e7SGabor Kovesdan case 'l': 359ad30f8e7SGabor Kovesdan MATCH(little, ei->preffered_endian = _ENDIAN_LITTLE); 360ad30f8e7SGabor Kovesdan break; 361ad30f8e7SGabor Kovesdan case 'i': 362ad30f8e7SGabor Kovesdan case 'I': 363ad30f8e7SGabor Kovesdan MATCH(internal, ei->preffered_endian = _ENDIAN_INTERNAL); 364ad30f8e7SGabor Kovesdan break; 365ad30f8e7SGabor Kovesdan case 's': 366ad30f8e7SGabor Kovesdan case 'S': 367ad30f8e7SGabor Kovesdan MATCH(swapped, ei->preffered_endian = _ENDIAN_SWAPPED); 368ad30f8e7SGabor Kovesdan break; 369ad30f8e7SGabor Kovesdan case 'F': 370ad30f8e7SGabor Kovesdan case 'f': 371ad30f8e7SGabor Kovesdan MATCH(force, ei->mode |= _MODE_FORCE_ENDIAN); 372ad30f8e7SGabor Kovesdan break; 373ad30f8e7SGabor Kovesdan case 'U': 374ad30f8e7SGabor Kovesdan case 'u': 375ad30f8e7SGabor Kovesdan MATCH(utf32, ei->mode |= _MODE_UTF32); 376ad30f8e7SGabor Kovesdan break; 377ad30f8e7SGabor Kovesdan } 378ad30f8e7SGabor Kovesdan p++; 379ad30f8e7SGabor Kovesdan lenvar--; 380ad30f8e7SGabor Kovesdan } 381ad30f8e7SGabor Kovesdan } 382ad30f8e7SGabor Kovesdan 383ad30f8e7SGabor Kovesdan static int 384ad30f8e7SGabor Kovesdan /*ARGSUSED*/ 385ad30f8e7SGabor Kovesdan _citrus_UTF1632_encoding_module_init(_UTF1632EncodingInfo * __restrict ei, 386ad30f8e7SGabor Kovesdan const void * __restrict var, size_t lenvar) 387ad30f8e7SGabor Kovesdan { 388ad30f8e7SGabor Kovesdan 389ad30f8e7SGabor Kovesdan memset((void *)ei, 0, sizeof(*ei)); 390ad30f8e7SGabor Kovesdan 391ad30f8e7SGabor Kovesdan parse_variable(ei, var, lenvar); 392ad30f8e7SGabor Kovesdan 393ad30f8e7SGabor Kovesdan ei->cur_max = ((ei->mode&_MODE_UTF32) == 0) ? 6 : 8; 394ad30f8e7SGabor Kovesdan /* 6: endian + surrogate */ 395ad30f8e7SGabor Kovesdan /* 8: endian + normal */ 396ad30f8e7SGabor Kovesdan 397ad30f8e7SGabor Kovesdan if (ei->preffered_endian == _ENDIAN_UNKNOWN) { 398ad30f8e7SGabor Kovesdan ei->preffered_endian = _ENDIAN_BIG; 399ad30f8e7SGabor Kovesdan } 400ad30f8e7SGabor Kovesdan 401ad30f8e7SGabor Kovesdan return (0); 402ad30f8e7SGabor Kovesdan } 403ad30f8e7SGabor Kovesdan 404ad30f8e7SGabor Kovesdan static void 405ad30f8e7SGabor Kovesdan /*ARGSUSED*/ 406ad30f8e7SGabor Kovesdan _citrus_UTF1632_encoding_module_uninit(_UTF1632EncodingInfo *ei __unused) 407ad30f8e7SGabor Kovesdan { 408ad30f8e7SGabor Kovesdan 409ad30f8e7SGabor Kovesdan } 410ad30f8e7SGabor Kovesdan 411ad30f8e7SGabor Kovesdan static __inline int 412ad30f8e7SGabor Kovesdan /*ARGSUSED*/ 413ad30f8e7SGabor Kovesdan _citrus_UTF1632_stdenc_wctocs(_UTF1632EncodingInfo * __restrict ei __unused, 414ad30f8e7SGabor Kovesdan _csid_t * __restrict csid, _index_t * __restrict idx, _wc_t wc) 415ad30f8e7SGabor Kovesdan { 416ad30f8e7SGabor Kovesdan 417ad30f8e7SGabor Kovesdan *csid = 0; 418ad30f8e7SGabor Kovesdan *idx = (_index_t)wc; 419ad30f8e7SGabor Kovesdan 420ad30f8e7SGabor Kovesdan return (0); 421ad30f8e7SGabor Kovesdan } 422ad30f8e7SGabor Kovesdan 423ad30f8e7SGabor Kovesdan static __inline int 424ad30f8e7SGabor Kovesdan /*ARGSUSED*/ 425ad30f8e7SGabor Kovesdan _citrus_UTF1632_stdenc_cstowc(_UTF1632EncodingInfo * __restrict ei __unused, 426ad30f8e7SGabor Kovesdan _wc_t * __restrict wc, _csid_t csid, _index_t idx) 427ad30f8e7SGabor Kovesdan { 428ad30f8e7SGabor Kovesdan 429ad30f8e7SGabor Kovesdan if (csid != 0) 430ad30f8e7SGabor Kovesdan return (EILSEQ); 431ad30f8e7SGabor Kovesdan 432ad30f8e7SGabor Kovesdan *wc = (_wc_t)idx; 433ad30f8e7SGabor Kovesdan 434ad30f8e7SGabor Kovesdan return (0); 435ad30f8e7SGabor Kovesdan } 436ad30f8e7SGabor Kovesdan 437ad30f8e7SGabor Kovesdan static __inline int 438ad30f8e7SGabor Kovesdan /*ARGSUSED*/ 439ad30f8e7SGabor Kovesdan _citrus_UTF1632_stdenc_get_state_desc_generic(_UTF1632EncodingInfo * __restrict ei __unused, 440ad30f8e7SGabor Kovesdan _UTF1632State * __restrict psenc, int * __restrict rstate) 441ad30f8e7SGabor Kovesdan { 442ad30f8e7SGabor Kovesdan 443ad30f8e7SGabor Kovesdan *rstate = (psenc->chlen == 0) ? _STDENC_SDGEN_INITIAL : 444ad30f8e7SGabor Kovesdan _STDENC_SDGEN_INCOMPLETE_CHAR; 445ad30f8e7SGabor Kovesdan return (0); 446ad30f8e7SGabor Kovesdan } 447ad30f8e7SGabor Kovesdan 448ad30f8e7SGabor Kovesdan /* ---------------------------------------------------------------------- 449ad30f8e7SGabor Kovesdan * public interface for stdenc 450ad30f8e7SGabor Kovesdan */ 451ad30f8e7SGabor Kovesdan 452ad30f8e7SGabor Kovesdan _CITRUS_STDENC_DECLS(UTF1632); 453ad30f8e7SGabor Kovesdan _CITRUS_STDENC_DEF_OPS(UTF1632); 454ad30f8e7SGabor Kovesdan 455ad30f8e7SGabor Kovesdan #include "citrus_stdenc_template.h" 456