1*ad30f8e7SGabor Kovesdan /* $FreeBSD$ */ 2*ad30f8e7SGabor Kovesdan /* $NetBSD: citrus_utf1632.c,v 1.9 2008/06/14 16:01:08 tnozaki Exp $ */ 3*ad30f8e7SGabor Kovesdan 4*ad30f8e7SGabor Kovesdan /*- 5*ad30f8e7SGabor Kovesdan * Copyright (c)2003 Citrus Project, 6*ad30f8e7SGabor Kovesdan * All rights reserved. 7*ad30f8e7SGabor Kovesdan * 8*ad30f8e7SGabor Kovesdan * Redistribution and use in source and binary forms, with or without 9*ad30f8e7SGabor Kovesdan * modification, are permitted provided that the following conditions 10*ad30f8e7SGabor Kovesdan * are met: 11*ad30f8e7SGabor Kovesdan * 1. Redistributions of source code must retain the above copyright 12*ad30f8e7SGabor Kovesdan * notice, this list of conditions and the following disclaimer. 13*ad30f8e7SGabor Kovesdan * 2. Redistributions in binary form must reproduce the above copyright 14*ad30f8e7SGabor Kovesdan * notice, this list of conditions and the following disclaimer in the 15*ad30f8e7SGabor Kovesdan * documentation and/or other materials provided with the distribution. 16*ad30f8e7SGabor Kovesdan * 17*ad30f8e7SGabor Kovesdan * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 18*ad30f8e7SGabor Kovesdan * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19*ad30f8e7SGabor Kovesdan * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20*ad30f8e7SGabor Kovesdan * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 21*ad30f8e7SGabor Kovesdan * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22*ad30f8e7SGabor Kovesdan * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23*ad30f8e7SGabor Kovesdan * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24*ad30f8e7SGabor Kovesdan * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25*ad30f8e7SGabor Kovesdan * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26*ad30f8e7SGabor Kovesdan * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27*ad30f8e7SGabor Kovesdan * SUCH DAMAGE. 28*ad30f8e7SGabor Kovesdan */ 29*ad30f8e7SGabor Kovesdan 30*ad30f8e7SGabor Kovesdan #include <sys/cdefs.h> 31*ad30f8e7SGabor Kovesdan #include <sys/endian.h> 32*ad30f8e7SGabor Kovesdan #include <sys/types.h> 33*ad30f8e7SGabor Kovesdan 34*ad30f8e7SGabor Kovesdan #include <assert.h> 35*ad30f8e7SGabor Kovesdan #include <errno.h> 36*ad30f8e7SGabor Kovesdan #include <limits.h> 37*ad30f8e7SGabor Kovesdan #include <stddef.h> 38*ad30f8e7SGabor Kovesdan #include <stdio.h> 39*ad30f8e7SGabor Kovesdan #include <stdlib.h> 40*ad30f8e7SGabor Kovesdan #include <string.h> 41*ad30f8e7SGabor Kovesdan #include <wchar.h> 42*ad30f8e7SGabor Kovesdan 43*ad30f8e7SGabor Kovesdan #include "citrus_namespace.h" 44*ad30f8e7SGabor Kovesdan #include "citrus_types.h" 45*ad30f8e7SGabor Kovesdan #include "citrus_module.h" 46*ad30f8e7SGabor Kovesdan #include "citrus_stdenc.h" 47*ad30f8e7SGabor Kovesdan #include "citrus_bcs.h" 48*ad30f8e7SGabor Kovesdan 49*ad30f8e7SGabor Kovesdan #include "citrus_utf1632.h" 50*ad30f8e7SGabor Kovesdan 51*ad30f8e7SGabor Kovesdan 52*ad30f8e7SGabor Kovesdan /* ---------------------------------------------------------------------- 53*ad30f8e7SGabor Kovesdan * private stuffs used by templates 54*ad30f8e7SGabor Kovesdan */ 55*ad30f8e7SGabor Kovesdan 56*ad30f8e7SGabor Kovesdan typedef struct { 57*ad30f8e7SGabor Kovesdan int chlen; 58*ad30f8e7SGabor Kovesdan int current_endian; 59*ad30f8e7SGabor Kovesdan uint8_t ch[4]; 60*ad30f8e7SGabor Kovesdan } _UTF1632State; 61*ad30f8e7SGabor Kovesdan 62*ad30f8e7SGabor Kovesdan #define _ENDIAN_UNKNOWN 0 63*ad30f8e7SGabor Kovesdan #define _ENDIAN_BIG 1 64*ad30f8e7SGabor Kovesdan #define _ENDIAN_LITTLE 2 65*ad30f8e7SGabor Kovesdan #if BYTE_ORDER == BIG_ENDIAN 66*ad30f8e7SGabor Kovesdan #define _ENDIAN_INTERNAL _ENDIAN_BIG 67*ad30f8e7SGabor Kovesdan #define _ENDIAN_SWAPPED _ENDIAN_LITTLE 68*ad30f8e7SGabor Kovesdan #else 69*ad30f8e7SGabor Kovesdan #define _ENDIAN_INTERNAL _ENDIAN_LITTLE 70*ad30f8e7SGabor Kovesdan #define _ENDIAN_SWAPPED _ENDIAN_BIG 71*ad30f8e7SGabor Kovesdan #endif 72*ad30f8e7SGabor Kovesdan #define _MODE_UTF32 0x00000001U 73*ad30f8e7SGabor Kovesdan #define _MODE_FORCE_ENDIAN 0x00000002U 74*ad30f8e7SGabor Kovesdan 75*ad30f8e7SGabor Kovesdan typedef struct { 76*ad30f8e7SGabor Kovesdan int preffered_endian; 77*ad30f8e7SGabor Kovesdan unsigned int cur_max; 78*ad30f8e7SGabor Kovesdan uint32_t mode; 79*ad30f8e7SGabor Kovesdan } _UTF1632EncodingInfo; 80*ad30f8e7SGabor Kovesdan 81*ad30f8e7SGabor Kovesdan #define _FUNCNAME(m) _citrus_UTF1632_##m 82*ad30f8e7SGabor Kovesdan #define _ENCODING_INFO _UTF1632EncodingInfo 83*ad30f8e7SGabor Kovesdan #define _ENCODING_STATE _UTF1632State 84*ad30f8e7SGabor Kovesdan #define _ENCODING_MB_CUR_MAX(_ei_) ((_ei_)->cur_max) 85*ad30f8e7SGabor Kovesdan #define _ENCODING_IS_STATE_DEPENDENT 0 86*ad30f8e7SGabor Kovesdan #define _STATE_NEEDS_EXPLICIT_INIT(_ps_) 0 87*ad30f8e7SGabor Kovesdan 88*ad30f8e7SGabor Kovesdan 89*ad30f8e7SGabor Kovesdan static __inline void 90*ad30f8e7SGabor Kovesdan /*ARGSUSED*/ 91*ad30f8e7SGabor Kovesdan _citrus_UTF1632_init_state(_UTF1632EncodingInfo *ei __unused, 92*ad30f8e7SGabor Kovesdan _UTF1632State *s) 93*ad30f8e7SGabor Kovesdan { 94*ad30f8e7SGabor Kovesdan 95*ad30f8e7SGabor Kovesdan memset(s, 0, sizeof(*s)); 96*ad30f8e7SGabor Kovesdan } 97*ad30f8e7SGabor Kovesdan 98*ad30f8e7SGabor Kovesdan static int 99*ad30f8e7SGabor Kovesdan _citrus_UTF1632_mbrtowc_priv(_UTF1632EncodingInfo *ei, wchar_t *pwc, 100*ad30f8e7SGabor Kovesdan char **s, size_t n, _UTF1632State *psenc, size_t *nresult) 101*ad30f8e7SGabor Kovesdan { 102*ad30f8e7SGabor Kovesdan char *s0; 103*ad30f8e7SGabor Kovesdan size_t result; 104*ad30f8e7SGabor Kovesdan wchar_t wc = L'\0'; 105*ad30f8e7SGabor Kovesdan int chlenbak, endian, needlen; 106*ad30f8e7SGabor Kovesdan 107*ad30f8e7SGabor Kovesdan s0 = *s; 108*ad30f8e7SGabor Kovesdan 109*ad30f8e7SGabor Kovesdan if (s0 == NULL) { 110*ad30f8e7SGabor Kovesdan _citrus_UTF1632_init_state(ei, psenc); 111*ad30f8e7SGabor Kovesdan *nresult = 0; /* state independent */ 112*ad30f8e7SGabor Kovesdan return (0); 113*ad30f8e7SGabor Kovesdan } 114*ad30f8e7SGabor Kovesdan 115*ad30f8e7SGabor Kovesdan result = 0; 116*ad30f8e7SGabor Kovesdan chlenbak = psenc->chlen; 117*ad30f8e7SGabor Kovesdan 118*ad30f8e7SGabor Kovesdan refetch: 119*ad30f8e7SGabor Kovesdan needlen = ((ei->mode & _MODE_UTF32) != 0 || chlenbak >= 2) ? 4 : 2; 120*ad30f8e7SGabor Kovesdan 121*ad30f8e7SGabor Kovesdan while (chlenbak < needlen) { 122*ad30f8e7SGabor Kovesdan if (n == 0) 123*ad30f8e7SGabor Kovesdan goto restart; 124*ad30f8e7SGabor Kovesdan psenc->ch[chlenbak++] = *s0++; 125*ad30f8e7SGabor Kovesdan n--; 126*ad30f8e7SGabor Kovesdan result++; 127*ad30f8e7SGabor Kovesdan } 128*ad30f8e7SGabor Kovesdan 129*ad30f8e7SGabor Kovesdan /* judge endian marker */ 130*ad30f8e7SGabor Kovesdan if ((ei->mode & _MODE_UTF32) == 0) { 131*ad30f8e7SGabor Kovesdan /* UTF16 */ 132*ad30f8e7SGabor Kovesdan if (psenc->ch[0] == 0xFE && psenc->ch[1] == 0xFF) { 133*ad30f8e7SGabor Kovesdan psenc->current_endian = _ENDIAN_BIG; 134*ad30f8e7SGabor Kovesdan chlenbak = 0; 135*ad30f8e7SGabor Kovesdan goto refetch; 136*ad30f8e7SGabor Kovesdan } else if (psenc->ch[0] == 0xFF && psenc->ch[1] == 0xFE) { 137*ad30f8e7SGabor Kovesdan psenc->current_endian = _ENDIAN_LITTLE; 138*ad30f8e7SGabor Kovesdan chlenbak = 0; 139*ad30f8e7SGabor Kovesdan goto refetch; 140*ad30f8e7SGabor Kovesdan } 141*ad30f8e7SGabor Kovesdan } else { 142*ad30f8e7SGabor Kovesdan /* UTF32 */ 143*ad30f8e7SGabor Kovesdan if (psenc->ch[0] == 0x00 && psenc->ch[1] == 0x00 && 144*ad30f8e7SGabor Kovesdan psenc->ch[2] == 0xFE && psenc->ch[3] == 0xFF) { 145*ad30f8e7SGabor Kovesdan psenc->current_endian = _ENDIAN_BIG; 146*ad30f8e7SGabor Kovesdan chlenbak = 0; 147*ad30f8e7SGabor Kovesdan goto refetch; 148*ad30f8e7SGabor Kovesdan } else if (psenc->ch[0] == 0xFF && psenc->ch[1] == 0xFE && 149*ad30f8e7SGabor Kovesdan psenc->ch[2] == 0x00 && psenc->ch[3] == 0x00) { 150*ad30f8e7SGabor Kovesdan psenc->current_endian = _ENDIAN_LITTLE; 151*ad30f8e7SGabor Kovesdan chlenbak = 0; 152*ad30f8e7SGabor Kovesdan goto refetch; 153*ad30f8e7SGabor Kovesdan } 154*ad30f8e7SGabor Kovesdan } 155*ad30f8e7SGabor Kovesdan endian = ((ei->mode & _MODE_FORCE_ENDIAN) != 0 || 156*ad30f8e7SGabor Kovesdan psenc->current_endian == _ENDIAN_UNKNOWN) ? ei->preffered_endian : 157*ad30f8e7SGabor Kovesdan psenc->current_endian; 158*ad30f8e7SGabor Kovesdan 159*ad30f8e7SGabor Kovesdan /* get wc */ 160*ad30f8e7SGabor Kovesdan if ((ei->mode & _MODE_UTF32) == 0) { 161*ad30f8e7SGabor Kovesdan /* UTF16 */ 162*ad30f8e7SGabor Kovesdan if (needlen == 2) { 163*ad30f8e7SGabor Kovesdan switch (endian) { 164*ad30f8e7SGabor Kovesdan case _ENDIAN_LITTLE: 165*ad30f8e7SGabor Kovesdan wc = (psenc->ch[0] | 166*ad30f8e7SGabor Kovesdan ((wchar_t)psenc->ch[1] << 8)); 167*ad30f8e7SGabor Kovesdan break; 168*ad30f8e7SGabor Kovesdan case _ENDIAN_BIG: 169*ad30f8e7SGabor Kovesdan wc = (psenc->ch[1] | 170*ad30f8e7SGabor Kovesdan ((wchar_t)psenc->ch[0] << 8)); 171*ad30f8e7SGabor Kovesdan break; 172*ad30f8e7SGabor Kovesdan default: 173*ad30f8e7SGabor Kovesdan goto ilseq; 174*ad30f8e7SGabor Kovesdan } 175*ad30f8e7SGabor Kovesdan if (wc >= 0xD800 && wc <= 0xDBFF) { 176*ad30f8e7SGabor Kovesdan /* surrogate high */ 177*ad30f8e7SGabor Kovesdan needlen = 4; 178*ad30f8e7SGabor Kovesdan goto refetch; 179*ad30f8e7SGabor Kovesdan } 180*ad30f8e7SGabor Kovesdan } else { 181*ad30f8e7SGabor Kovesdan /* surrogate low */ 182*ad30f8e7SGabor Kovesdan wc -= 0xD800; /* wc : surrogate high (see above) */ 183*ad30f8e7SGabor Kovesdan wc <<= 10; 184*ad30f8e7SGabor Kovesdan switch (endian) { 185*ad30f8e7SGabor Kovesdan case _ENDIAN_LITTLE: 186*ad30f8e7SGabor Kovesdan if (psenc->ch[3] < 0xDC || psenc->ch[3] > 0xDF) 187*ad30f8e7SGabor Kovesdan goto ilseq; 188*ad30f8e7SGabor Kovesdan wc |= psenc->ch[2]; 189*ad30f8e7SGabor Kovesdan wc |= (wchar_t)(psenc->ch[3] & 3) << 8; 190*ad30f8e7SGabor Kovesdan break; 191*ad30f8e7SGabor Kovesdan case _ENDIAN_BIG: 192*ad30f8e7SGabor Kovesdan if (psenc->ch[2]<0xDC || psenc->ch[2]>0xDF) 193*ad30f8e7SGabor Kovesdan goto ilseq; 194*ad30f8e7SGabor Kovesdan wc |= psenc->ch[3]; 195*ad30f8e7SGabor Kovesdan wc |= (wchar_t)(psenc->ch[2] & 3) << 8; 196*ad30f8e7SGabor Kovesdan break; 197*ad30f8e7SGabor Kovesdan default: 198*ad30f8e7SGabor Kovesdan goto ilseq; 199*ad30f8e7SGabor Kovesdan } 200*ad30f8e7SGabor Kovesdan wc += 0x10000; 201*ad30f8e7SGabor Kovesdan } 202*ad30f8e7SGabor Kovesdan } else { 203*ad30f8e7SGabor Kovesdan /* UTF32 */ 204*ad30f8e7SGabor Kovesdan switch (endian) { 205*ad30f8e7SGabor Kovesdan case _ENDIAN_LITTLE: 206*ad30f8e7SGabor Kovesdan wc = (psenc->ch[0] | 207*ad30f8e7SGabor Kovesdan ((wchar_t)psenc->ch[1] << 8) | 208*ad30f8e7SGabor Kovesdan ((wchar_t)psenc->ch[2] << 16) | 209*ad30f8e7SGabor Kovesdan ((wchar_t)psenc->ch[3] << 24)); 210*ad30f8e7SGabor Kovesdan break; 211*ad30f8e7SGabor Kovesdan case _ENDIAN_BIG: 212*ad30f8e7SGabor Kovesdan wc = (psenc->ch[3] | 213*ad30f8e7SGabor Kovesdan ((wchar_t)psenc->ch[2] << 8) | 214*ad30f8e7SGabor Kovesdan ((wchar_t)psenc->ch[1] << 16) | 215*ad30f8e7SGabor Kovesdan ((wchar_t)psenc->ch[0] << 24)); 216*ad30f8e7SGabor Kovesdan break; 217*ad30f8e7SGabor Kovesdan default: 218*ad30f8e7SGabor Kovesdan goto ilseq; 219*ad30f8e7SGabor Kovesdan } 220*ad30f8e7SGabor Kovesdan if (wc >= 0xD800 && wc <= 0xDFFF) 221*ad30f8e7SGabor Kovesdan goto ilseq; 222*ad30f8e7SGabor Kovesdan } 223*ad30f8e7SGabor Kovesdan 224*ad30f8e7SGabor Kovesdan 225*ad30f8e7SGabor Kovesdan *pwc = wc; 226*ad30f8e7SGabor Kovesdan psenc->chlen = 0; 227*ad30f8e7SGabor Kovesdan *nresult = result; 228*ad30f8e7SGabor Kovesdan *s = s0; 229*ad30f8e7SGabor Kovesdan 230*ad30f8e7SGabor Kovesdan return (0); 231*ad30f8e7SGabor Kovesdan 232*ad30f8e7SGabor Kovesdan ilseq: 233*ad30f8e7SGabor Kovesdan *nresult = (size_t)-1; 234*ad30f8e7SGabor Kovesdan psenc->chlen = 0; 235*ad30f8e7SGabor Kovesdan return (EILSEQ); 236*ad30f8e7SGabor Kovesdan 237*ad30f8e7SGabor Kovesdan restart: 238*ad30f8e7SGabor Kovesdan *nresult = (size_t)-2; 239*ad30f8e7SGabor Kovesdan psenc->chlen = chlenbak; 240*ad30f8e7SGabor Kovesdan *s = s0; 241*ad30f8e7SGabor Kovesdan return (0); 242*ad30f8e7SGabor Kovesdan } 243*ad30f8e7SGabor Kovesdan 244*ad30f8e7SGabor Kovesdan static int 245*ad30f8e7SGabor Kovesdan _citrus_UTF1632_wcrtomb_priv(_UTF1632EncodingInfo *ei, char *s, size_t n, 246*ad30f8e7SGabor Kovesdan wchar_t wc, _UTF1632State *psenc, size_t *nresult) 247*ad30f8e7SGabor Kovesdan { 248*ad30f8e7SGabor Kovesdan wchar_t wc2; 249*ad30f8e7SGabor Kovesdan static const char _bom[4] = { 250*ad30f8e7SGabor Kovesdan 0x00, 0x00, 0xFE, 0xFF, 251*ad30f8e7SGabor Kovesdan }; 252*ad30f8e7SGabor Kovesdan const char *bom = &_bom[0]; 253*ad30f8e7SGabor Kovesdan size_t cnt; 254*ad30f8e7SGabor Kovesdan 255*ad30f8e7SGabor Kovesdan cnt = (size_t)0; 256*ad30f8e7SGabor Kovesdan if (psenc->current_endian == _ENDIAN_UNKNOWN) { 257*ad30f8e7SGabor Kovesdan if ((ei->mode & _MODE_FORCE_ENDIAN) == 0) { 258*ad30f8e7SGabor Kovesdan if (ei->mode & _MODE_UTF32) 259*ad30f8e7SGabor Kovesdan cnt = 4; 260*ad30f8e7SGabor Kovesdan else { 261*ad30f8e7SGabor Kovesdan cnt = 2; 262*ad30f8e7SGabor Kovesdan bom += 2; 263*ad30f8e7SGabor Kovesdan } 264*ad30f8e7SGabor Kovesdan if (n < cnt) 265*ad30f8e7SGabor Kovesdan goto e2big; 266*ad30f8e7SGabor Kovesdan memcpy(s, bom, cnt); 267*ad30f8e7SGabor Kovesdan s += cnt, n -= cnt; 268*ad30f8e7SGabor Kovesdan } 269*ad30f8e7SGabor Kovesdan psenc->current_endian = ei->preffered_endian; 270*ad30f8e7SGabor Kovesdan } 271*ad30f8e7SGabor Kovesdan 272*ad30f8e7SGabor Kovesdan wc2 = 0; 273*ad30f8e7SGabor Kovesdan if ((ei->mode & _MODE_UTF32)==0) { 274*ad30f8e7SGabor Kovesdan /* UTF16 */ 275*ad30f8e7SGabor Kovesdan if (wc > 0xFFFF) { 276*ad30f8e7SGabor Kovesdan /* surrogate */ 277*ad30f8e7SGabor Kovesdan if (wc > 0x10FFFF) 278*ad30f8e7SGabor Kovesdan goto ilseq; 279*ad30f8e7SGabor Kovesdan if (n < 4) 280*ad30f8e7SGabor Kovesdan goto e2big; 281*ad30f8e7SGabor Kovesdan cnt += 4; 282*ad30f8e7SGabor Kovesdan wc -= 0x10000; 283*ad30f8e7SGabor Kovesdan wc2 = (wc & 0x3FF) | 0xDC00; 284*ad30f8e7SGabor Kovesdan wc = (wc>>10) | 0xD800; 285*ad30f8e7SGabor Kovesdan } else { 286*ad30f8e7SGabor Kovesdan if (n < 2) 287*ad30f8e7SGabor Kovesdan goto e2big; 288*ad30f8e7SGabor Kovesdan cnt += 2; 289*ad30f8e7SGabor Kovesdan } 290*ad30f8e7SGabor Kovesdan 291*ad30f8e7SGabor Kovesdan surrogate: 292*ad30f8e7SGabor Kovesdan switch (psenc->current_endian) { 293*ad30f8e7SGabor Kovesdan case _ENDIAN_BIG: 294*ad30f8e7SGabor Kovesdan s[1] = wc; 295*ad30f8e7SGabor Kovesdan s[0] = (wc >>= 8); 296*ad30f8e7SGabor Kovesdan break; 297*ad30f8e7SGabor Kovesdan case _ENDIAN_LITTLE: 298*ad30f8e7SGabor Kovesdan s[0] = wc; 299*ad30f8e7SGabor Kovesdan s[1] = (wc >>= 8); 300*ad30f8e7SGabor Kovesdan break; 301*ad30f8e7SGabor Kovesdan } 302*ad30f8e7SGabor Kovesdan if (wc2 != 0) { 303*ad30f8e7SGabor Kovesdan wc = wc2; 304*ad30f8e7SGabor Kovesdan wc2 = 0; 305*ad30f8e7SGabor Kovesdan s += 2; 306*ad30f8e7SGabor Kovesdan goto surrogate; 307*ad30f8e7SGabor Kovesdan } 308*ad30f8e7SGabor Kovesdan } else { 309*ad30f8e7SGabor Kovesdan /* UTF32 */ 310*ad30f8e7SGabor Kovesdan if (wc >= 0xD800 && wc <= 0xDFFF) 311*ad30f8e7SGabor Kovesdan goto ilseq; 312*ad30f8e7SGabor Kovesdan if (n < 4) 313*ad30f8e7SGabor Kovesdan goto e2big; 314*ad30f8e7SGabor Kovesdan cnt += 4; 315*ad30f8e7SGabor Kovesdan switch (psenc->current_endian) { 316*ad30f8e7SGabor Kovesdan case _ENDIAN_BIG: 317*ad30f8e7SGabor Kovesdan s[3] = wc; 318*ad30f8e7SGabor Kovesdan s[2] = (wc >>= 8); 319*ad30f8e7SGabor Kovesdan s[1] = (wc >>= 8); 320*ad30f8e7SGabor Kovesdan s[0] = (wc >>= 8); 321*ad30f8e7SGabor Kovesdan break; 322*ad30f8e7SGabor Kovesdan case _ENDIAN_LITTLE: 323*ad30f8e7SGabor Kovesdan s[0] = wc; 324*ad30f8e7SGabor Kovesdan s[1] = (wc >>= 8); 325*ad30f8e7SGabor Kovesdan s[2] = (wc >>= 8); 326*ad30f8e7SGabor Kovesdan s[3] = (wc >>= 8); 327*ad30f8e7SGabor Kovesdan break; 328*ad30f8e7SGabor Kovesdan } 329*ad30f8e7SGabor Kovesdan } 330*ad30f8e7SGabor Kovesdan *nresult = cnt; 331*ad30f8e7SGabor Kovesdan 332*ad30f8e7SGabor Kovesdan return (0); 333*ad30f8e7SGabor Kovesdan 334*ad30f8e7SGabor Kovesdan ilseq: 335*ad30f8e7SGabor Kovesdan *nresult = (size_t)-1; 336*ad30f8e7SGabor Kovesdan return (EILSEQ); 337*ad30f8e7SGabor Kovesdan e2big: 338*ad30f8e7SGabor Kovesdan *nresult = (size_t)-1; 339*ad30f8e7SGabor Kovesdan return (E2BIG); 340*ad30f8e7SGabor Kovesdan } 341*ad30f8e7SGabor Kovesdan 342*ad30f8e7SGabor Kovesdan static void 343*ad30f8e7SGabor Kovesdan parse_variable(_UTF1632EncodingInfo * __restrict ei, 344*ad30f8e7SGabor Kovesdan const void * __restrict var, size_t lenvar) 345*ad30f8e7SGabor Kovesdan { 346*ad30f8e7SGabor Kovesdan const char *p; 347*ad30f8e7SGabor Kovesdan 348*ad30f8e7SGabor Kovesdan p = var; 349*ad30f8e7SGabor Kovesdan while (lenvar > 0) { 350*ad30f8e7SGabor Kovesdan switch (*p) { 351*ad30f8e7SGabor Kovesdan case 'B': 352*ad30f8e7SGabor Kovesdan case 'b': 353*ad30f8e7SGabor Kovesdan MATCH(big, ei->preffered_endian = _ENDIAN_BIG); 354*ad30f8e7SGabor Kovesdan break; 355*ad30f8e7SGabor Kovesdan case 'L': 356*ad30f8e7SGabor Kovesdan case 'l': 357*ad30f8e7SGabor Kovesdan MATCH(little, ei->preffered_endian = _ENDIAN_LITTLE); 358*ad30f8e7SGabor Kovesdan break; 359*ad30f8e7SGabor Kovesdan case 'i': 360*ad30f8e7SGabor Kovesdan case 'I': 361*ad30f8e7SGabor Kovesdan MATCH(internal, ei->preffered_endian = _ENDIAN_INTERNAL); 362*ad30f8e7SGabor Kovesdan break; 363*ad30f8e7SGabor Kovesdan case 's': 364*ad30f8e7SGabor Kovesdan case 'S': 365*ad30f8e7SGabor Kovesdan MATCH(swapped, ei->preffered_endian = _ENDIAN_SWAPPED); 366*ad30f8e7SGabor Kovesdan break; 367*ad30f8e7SGabor Kovesdan case 'F': 368*ad30f8e7SGabor Kovesdan case 'f': 369*ad30f8e7SGabor Kovesdan MATCH(force, ei->mode |= _MODE_FORCE_ENDIAN); 370*ad30f8e7SGabor Kovesdan break; 371*ad30f8e7SGabor Kovesdan case 'U': 372*ad30f8e7SGabor Kovesdan case 'u': 373*ad30f8e7SGabor Kovesdan MATCH(utf32, ei->mode |= _MODE_UTF32); 374*ad30f8e7SGabor Kovesdan break; 375*ad30f8e7SGabor Kovesdan } 376*ad30f8e7SGabor Kovesdan p++; 377*ad30f8e7SGabor Kovesdan lenvar--; 378*ad30f8e7SGabor Kovesdan } 379*ad30f8e7SGabor Kovesdan } 380*ad30f8e7SGabor Kovesdan 381*ad30f8e7SGabor Kovesdan static int 382*ad30f8e7SGabor Kovesdan /*ARGSUSED*/ 383*ad30f8e7SGabor Kovesdan _citrus_UTF1632_encoding_module_init(_UTF1632EncodingInfo * __restrict ei, 384*ad30f8e7SGabor Kovesdan const void * __restrict var, size_t lenvar) 385*ad30f8e7SGabor Kovesdan { 386*ad30f8e7SGabor Kovesdan 387*ad30f8e7SGabor Kovesdan memset((void *)ei, 0, sizeof(*ei)); 388*ad30f8e7SGabor Kovesdan 389*ad30f8e7SGabor Kovesdan parse_variable(ei, var, lenvar); 390*ad30f8e7SGabor Kovesdan 391*ad30f8e7SGabor Kovesdan ei->cur_max = ((ei->mode&_MODE_UTF32) == 0) ? 6 : 8; 392*ad30f8e7SGabor Kovesdan /* 6: endian + surrogate */ 393*ad30f8e7SGabor Kovesdan /* 8: endian + normal */ 394*ad30f8e7SGabor Kovesdan 395*ad30f8e7SGabor Kovesdan if (ei->preffered_endian == _ENDIAN_UNKNOWN) { 396*ad30f8e7SGabor Kovesdan ei->preffered_endian = _ENDIAN_BIG; 397*ad30f8e7SGabor Kovesdan } 398*ad30f8e7SGabor Kovesdan 399*ad30f8e7SGabor Kovesdan return (0); 400*ad30f8e7SGabor Kovesdan } 401*ad30f8e7SGabor Kovesdan 402*ad30f8e7SGabor Kovesdan static void 403*ad30f8e7SGabor Kovesdan /*ARGSUSED*/ 404*ad30f8e7SGabor Kovesdan _citrus_UTF1632_encoding_module_uninit(_UTF1632EncodingInfo *ei __unused) 405*ad30f8e7SGabor Kovesdan { 406*ad30f8e7SGabor Kovesdan 407*ad30f8e7SGabor Kovesdan } 408*ad30f8e7SGabor Kovesdan 409*ad30f8e7SGabor Kovesdan static __inline int 410*ad30f8e7SGabor Kovesdan /*ARGSUSED*/ 411*ad30f8e7SGabor Kovesdan _citrus_UTF1632_stdenc_wctocs(_UTF1632EncodingInfo * __restrict ei __unused, 412*ad30f8e7SGabor Kovesdan _csid_t * __restrict csid, _index_t * __restrict idx, _wc_t wc) 413*ad30f8e7SGabor Kovesdan { 414*ad30f8e7SGabor Kovesdan 415*ad30f8e7SGabor Kovesdan *csid = 0; 416*ad30f8e7SGabor Kovesdan *idx = (_index_t)wc; 417*ad30f8e7SGabor Kovesdan 418*ad30f8e7SGabor Kovesdan return (0); 419*ad30f8e7SGabor Kovesdan } 420*ad30f8e7SGabor Kovesdan 421*ad30f8e7SGabor Kovesdan static __inline int 422*ad30f8e7SGabor Kovesdan /*ARGSUSED*/ 423*ad30f8e7SGabor Kovesdan _citrus_UTF1632_stdenc_cstowc(_UTF1632EncodingInfo * __restrict ei __unused, 424*ad30f8e7SGabor Kovesdan _wc_t * __restrict wc, _csid_t csid, _index_t idx) 425*ad30f8e7SGabor Kovesdan { 426*ad30f8e7SGabor Kovesdan 427*ad30f8e7SGabor Kovesdan if (csid != 0) 428*ad30f8e7SGabor Kovesdan return (EILSEQ); 429*ad30f8e7SGabor Kovesdan 430*ad30f8e7SGabor Kovesdan *wc = (_wc_t)idx; 431*ad30f8e7SGabor Kovesdan 432*ad30f8e7SGabor Kovesdan return (0); 433*ad30f8e7SGabor Kovesdan } 434*ad30f8e7SGabor Kovesdan 435*ad30f8e7SGabor Kovesdan static __inline int 436*ad30f8e7SGabor Kovesdan /*ARGSUSED*/ 437*ad30f8e7SGabor Kovesdan _citrus_UTF1632_stdenc_get_state_desc_generic(_UTF1632EncodingInfo * __restrict ei __unused, 438*ad30f8e7SGabor Kovesdan _UTF1632State * __restrict psenc, int * __restrict rstate) 439*ad30f8e7SGabor Kovesdan { 440*ad30f8e7SGabor Kovesdan 441*ad30f8e7SGabor Kovesdan *rstate = (psenc->chlen == 0) ? _STDENC_SDGEN_INITIAL : 442*ad30f8e7SGabor Kovesdan _STDENC_SDGEN_INCOMPLETE_CHAR; 443*ad30f8e7SGabor Kovesdan return (0); 444*ad30f8e7SGabor Kovesdan } 445*ad30f8e7SGabor Kovesdan 446*ad30f8e7SGabor Kovesdan /* ---------------------------------------------------------------------- 447*ad30f8e7SGabor Kovesdan * public interface for stdenc 448*ad30f8e7SGabor Kovesdan */ 449*ad30f8e7SGabor Kovesdan 450*ad30f8e7SGabor Kovesdan _CITRUS_STDENC_DECLS(UTF1632); 451*ad30f8e7SGabor Kovesdan _CITRUS_STDENC_DEF_OPS(UTF1632); 452*ad30f8e7SGabor Kovesdan 453*ad30f8e7SGabor Kovesdan #include "citrus_stdenc_template.h" 454