1*ae771770SStanislav Sedov /* 2*ae771770SStanislav Sedov * Copyright (c) 2004, 2006, 2007, 2008 Kungliga Tekniska Högskolan 3*ae771770SStanislav Sedov * (Royal Institute of Technology, Stockholm, Sweden). 4*ae771770SStanislav Sedov * All rights reserved. 5*ae771770SStanislav Sedov * 6*ae771770SStanislav Sedov * Redistribution and use in source and binary forms, with or without 7*ae771770SStanislav Sedov * modification, are permitted provided that the following conditions 8*ae771770SStanislav Sedov * are met: 9*ae771770SStanislav Sedov * 10*ae771770SStanislav Sedov * 1. Redistributions of source code must retain the above copyright 11*ae771770SStanislav Sedov * notice, this list of conditions and the following disclaimer. 12*ae771770SStanislav Sedov * 13*ae771770SStanislav Sedov * 2. Redistributions in binary form must reproduce the above copyright 14*ae771770SStanislav Sedov * notice, this list of conditions and the following disclaimer in the 15*ae771770SStanislav Sedov * documentation and/or other materials provided with the distribution. 16*ae771770SStanislav Sedov * 17*ae771770SStanislav Sedov * 3. Neither the name of the Institute nor the names of its contributors 18*ae771770SStanislav Sedov * may be used to endorse or promote products derived from this software 19*ae771770SStanislav Sedov * without specific prior written permission. 20*ae771770SStanislav Sedov * 21*ae771770SStanislav Sedov * THIS SOFTWARE IS PROVIDED BY THE INSTITUTE AND CONTRIBUTORS ``AS IS'' AND 22*ae771770SStanislav Sedov * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 23*ae771770SStanislav Sedov * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 24*ae771770SStanislav Sedov * ARE DISCLAIMED. IN NO EVENT SHALL THE INSTITUTE OR CONTRIBUTORS BE LIABLE 25*ae771770SStanislav Sedov * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 26*ae771770SStanislav Sedov * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 27*ae771770SStanislav Sedov * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 28*ae771770SStanislav Sedov * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 29*ae771770SStanislav Sedov * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 30*ae771770SStanislav Sedov * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 31*ae771770SStanislav Sedov * SUCH DAMAGE. 32*ae771770SStanislav Sedov */ 33*ae771770SStanislav Sedov 34*ae771770SStanislav Sedov #include <config.h> 35*ae771770SStanislav Sedov #include "windlocl.h" 36*ae771770SStanislav Sedov 37*ae771770SStanislav Sedov static int 38*ae771770SStanislav Sedov utf8toutf32(const unsigned char **pp, uint32_t *out) 39*ae771770SStanislav Sedov { 40*ae771770SStanislav Sedov const unsigned char *p = *pp; 41*ae771770SStanislav Sedov unsigned c = *p; 42*ae771770SStanislav Sedov 43*ae771770SStanislav Sedov if (c & 0x80) { 44*ae771770SStanislav Sedov if ((c & 0xE0) == 0xC0) { 45*ae771770SStanislav Sedov const unsigned c2 = *++p; 46*ae771770SStanislav Sedov if ((c2 & 0xC0) == 0x80) { 47*ae771770SStanislav Sedov *out = ((c & 0x1F) << 6) 48*ae771770SStanislav Sedov | (c2 & 0x3F); 49*ae771770SStanislav Sedov } else { 50*ae771770SStanislav Sedov return WIND_ERR_INVALID_UTF8; 51*ae771770SStanislav Sedov } 52*ae771770SStanislav Sedov } else if ((c & 0xF0) == 0xE0) { 53*ae771770SStanislav Sedov const unsigned c2 = *++p; 54*ae771770SStanislav Sedov if ((c2 & 0xC0) == 0x80) { 55*ae771770SStanislav Sedov const unsigned c3 = *++p; 56*ae771770SStanislav Sedov if ((c3 & 0xC0) == 0x80) { 57*ae771770SStanislav Sedov *out = ((c & 0x0F) << 12) 58*ae771770SStanislav Sedov | ((c2 & 0x3F) << 6) 59*ae771770SStanislav Sedov | (c3 & 0x3F); 60*ae771770SStanislav Sedov } else { 61*ae771770SStanislav Sedov return WIND_ERR_INVALID_UTF8; 62*ae771770SStanislav Sedov } 63*ae771770SStanislav Sedov } else { 64*ae771770SStanislav Sedov return WIND_ERR_INVALID_UTF8; 65*ae771770SStanislav Sedov } 66*ae771770SStanislav Sedov } else if ((c & 0xF8) == 0xF0) { 67*ae771770SStanislav Sedov const unsigned c2 = *++p; 68*ae771770SStanislav Sedov if ((c2 & 0xC0) == 0x80) { 69*ae771770SStanislav Sedov const unsigned c3 = *++p; 70*ae771770SStanislav Sedov if ((c3 & 0xC0) == 0x80) { 71*ae771770SStanislav Sedov const unsigned c4 = *++p; 72*ae771770SStanislav Sedov if ((c4 & 0xC0) == 0x80) { 73*ae771770SStanislav Sedov *out = ((c & 0x07) << 18) 74*ae771770SStanislav Sedov | ((c2 & 0x3F) << 12) 75*ae771770SStanislav Sedov | ((c3 & 0x3F) << 6) 76*ae771770SStanislav Sedov | (c4 & 0x3F); 77*ae771770SStanislav Sedov } else { 78*ae771770SStanislav Sedov return WIND_ERR_INVALID_UTF8; 79*ae771770SStanislav Sedov } 80*ae771770SStanislav Sedov } else { 81*ae771770SStanislav Sedov return WIND_ERR_INVALID_UTF8; 82*ae771770SStanislav Sedov } 83*ae771770SStanislav Sedov } else { 84*ae771770SStanislav Sedov return WIND_ERR_INVALID_UTF8; 85*ae771770SStanislav Sedov } 86*ae771770SStanislav Sedov } else { 87*ae771770SStanislav Sedov return WIND_ERR_INVALID_UTF8; 88*ae771770SStanislav Sedov } 89*ae771770SStanislav Sedov } else { 90*ae771770SStanislav Sedov *out = c; 91*ae771770SStanislav Sedov } 92*ae771770SStanislav Sedov 93*ae771770SStanislav Sedov *pp = p; 94*ae771770SStanislav Sedov 95*ae771770SStanislav Sedov return 0; 96*ae771770SStanislav Sedov } 97*ae771770SStanislav Sedov 98*ae771770SStanislav Sedov /** 99*ae771770SStanislav Sedov * Convert an UTF-8 string to an UCS4 string. 100*ae771770SStanislav Sedov * 101*ae771770SStanislav Sedov * @param in an UTF-8 string to convert. 102*ae771770SStanislav Sedov * @param out the resulting UCS4 strint, must be at least 103*ae771770SStanislav Sedov * wind_utf8ucs4_length() long. If out is NULL, the function will 104*ae771770SStanislav Sedov * calculate the needed space for the out variable (just like 105*ae771770SStanislav Sedov * wind_utf8ucs4_length()). 106*ae771770SStanislav Sedov * @param out_len before processing out_len should be the length of 107*ae771770SStanislav Sedov * the out variable, after processing it will be the length of the out 108*ae771770SStanislav Sedov * string. 109*ae771770SStanislav Sedov * 110*ae771770SStanislav Sedov * @return returns 0 on success, an wind error code otherwise 111*ae771770SStanislav Sedov * @ingroup wind 112*ae771770SStanislav Sedov */ 113*ae771770SStanislav Sedov 114*ae771770SStanislav Sedov int 115*ae771770SStanislav Sedov wind_utf8ucs4(const char *in, uint32_t *out, size_t *out_len) 116*ae771770SStanislav Sedov { 117*ae771770SStanislav Sedov const unsigned char *p; 118*ae771770SStanislav Sedov size_t o = 0; 119*ae771770SStanislav Sedov int ret; 120*ae771770SStanislav Sedov 121*ae771770SStanislav Sedov for (p = (const unsigned char *)in; *p != '\0'; ++p) { 122*ae771770SStanislav Sedov uint32_t u; 123*ae771770SStanislav Sedov 124*ae771770SStanislav Sedov ret = utf8toutf32(&p, &u); 125*ae771770SStanislav Sedov if (ret) 126*ae771770SStanislav Sedov return ret; 127*ae771770SStanislav Sedov 128*ae771770SStanislav Sedov if (out) { 129*ae771770SStanislav Sedov if (o >= *out_len) 130*ae771770SStanislav Sedov return WIND_ERR_OVERRUN; 131*ae771770SStanislav Sedov out[o] = u; 132*ae771770SStanislav Sedov } 133*ae771770SStanislav Sedov o++; 134*ae771770SStanislav Sedov } 135*ae771770SStanislav Sedov *out_len = o; 136*ae771770SStanislav Sedov return 0; 137*ae771770SStanislav Sedov } 138*ae771770SStanislav Sedov 139*ae771770SStanislav Sedov /** 140*ae771770SStanislav Sedov * Calculate the length of from converting a UTF-8 string to a UCS4 141*ae771770SStanislav Sedov * string. 142*ae771770SStanislav Sedov * 143*ae771770SStanislav Sedov * @param in an UTF-8 string to convert. 144*ae771770SStanislav Sedov * @param out_len the length of the resulting UCS4 string. 145*ae771770SStanislav Sedov * 146*ae771770SStanislav Sedov * @return returns 0 on success, an wind error code otherwise 147*ae771770SStanislav Sedov * @ingroup wind 148*ae771770SStanislav Sedov */ 149*ae771770SStanislav Sedov 150*ae771770SStanislav Sedov int 151*ae771770SStanislav Sedov wind_utf8ucs4_length(const char *in, size_t *out_len) 152*ae771770SStanislav Sedov { 153*ae771770SStanislav Sedov return wind_utf8ucs4(in, NULL, out_len); 154*ae771770SStanislav Sedov } 155*ae771770SStanislav Sedov 156*ae771770SStanislav Sedov static const char first_char[4] = 157*ae771770SStanislav Sedov { 0x00, 0xC0, 0xE0, 0xF0 }; 158*ae771770SStanislav Sedov 159*ae771770SStanislav Sedov /** 160*ae771770SStanislav Sedov * Convert an UCS4 string to a UTF-8 string. 161*ae771770SStanislav Sedov * 162*ae771770SStanislav Sedov * @param in an UCS4 string to convert. 163*ae771770SStanislav Sedov * @param in_len the length input array. 164*ae771770SStanislav Sedov 165*ae771770SStanislav Sedov * @param out the resulting UTF-8 strint, must be at least 166*ae771770SStanislav Sedov * wind_ucs4utf8_length() + 1 long (the extra char for the NUL). If 167*ae771770SStanislav Sedov * out is NULL, the function will calculate the needed space for the 168*ae771770SStanislav Sedov * out variable (just like wind_ucs4utf8_length()). 169*ae771770SStanislav Sedov 170*ae771770SStanislav Sedov * @param out_len before processing out_len should be the length of 171*ae771770SStanislav Sedov * the out variable, after processing it will be the length of the out 172*ae771770SStanislav Sedov * string. 173*ae771770SStanislav Sedov * 174*ae771770SStanislav Sedov * @return returns 0 on success, an wind error code otherwise 175*ae771770SStanislav Sedov * @ingroup wind 176*ae771770SStanislav Sedov */ 177*ae771770SStanislav Sedov 178*ae771770SStanislav Sedov int 179*ae771770SStanislav Sedov wind_ucs4utf8(const uint32_t *in, size_t in_len, char *out, size_t *out_len) 180*ae771770SStanislav Sedov { 181*ae771770SStanislav Sedov uint32_t ch; 182*ae771770SStanislav Sedov size_t i, len, o; 183*ae771770SStanislav Sedov 184*ae771770SStanislav Sedov for (o = 0, i = 0; i < in_len; i++) { 185*ae771770SStanislav Sedov ch = in[i]; 186*ae771770SStanislav Sedov 187*ae771770SStanislav Sedov if (ch < 0x80) { 188*ae771770SStanislav Sedov len = 1; 189*ae771770SStanislav Sedov } else if (ch < 0x800) { 190*ae771770SStanislav Sedov len = 2; 191*ae771770SStanislav Sedov } else if (ch < 0x10000) { 192*ae771770SStanislav Sedov len = 3; 193*ae771770SStanislav Sedov } else if (ch <= 0x10FFFF) { 194*ae771770SStanislav Sedov len = 4; 195*ae771770SStanislav Sedov } else 196*ae771770SStanislav Sedov return WIND_ERR_INVALID_UTF32; 197*ae771770SStanislav Sedov 198*ae771770SStanislav Sedov o += len; 199*ae771770SStanislav Sedov 200*ae771770SStanislav Sedov if (out) { 201*ae771770SStanislav Sedov if (o >= *out_len) 202*ae771770SStanislav Sedov return WIND_ERR_OVERRUN; 203*ae771770SStanislav Sedov 204*ae771770SStanislav Sedov switch(len) { 205*ae771770SStanislav Sedov case 4: 206*ae771770SStanislav Sedov out[3] = (ch | 0x80) & 0xbf; 207*ae771770SStanislav Sedov ch = ch << 6; 208*ae771770SStanislav Sedov case 3: 209*ae771770SStanislav Sedov out[2] = (ch | 0x80) & 0xbf; 210*ae771770SStanislav Sedov ch = ch << 6; 211*ae771770SStanislav Sedov case 2: 212*ae771770SStanislav Sedov out[1] = (ch | 0x80) & 0xbf; 213*ae771770SStanislav Sedov ch = ch << 6; 214*ae771770SStanislav Sedov case 1: 215*ae771770SStanislav Sedov out[0] = ch | first_char[len - 1]; 216*ae771770SStanislav Sedov } 217*ae771770SStanislav Sedov } 218*ae771770SStanislav Sedov out += len; 219*ae771770SStanislav Sedov } 220*ae771770SStanislav Sedov if (out) { 221*ae771770SStanislav Sedov if (o + 1 >= *out_len) 222*ae771770SStanislav Sedov return WIND_ERR_OVERRUN; 223*ae771770SStanislav Sedov *out = '\0'; 224*ae771770SStanislav Sedov } 225*ae771770SStanislav Sedov *out_len = o; 226*ae771770SStanislav Sedov return 0; 227*ae771770SStanislav Sedov } 228*ae771770SStanislav Sedov 229*ae771770SStanislav Sedov /** 230*ae771770SStanislav Sedov * Calculate the length of from converting a UCS4 string to an UTF-8 string. 231*ae771770SStanislav Sedov * 232*ae771770SStanislav Sedov * @param in an UCS4 string to convert. 233*ae771770SStanislav Sedov * @param in_len the length of UCS4 string to convert. 234*ae771770SStanislav Sedov * @param out_len the length of the resulting UTF-8 string. 235*ae771770SStanislav Sedov * 236*ae771770SStanislav Sedov * @return returns 0 on success, an wind error code otherwise 237*ae771770SStanislav Sedov * @ingroup wind 238*ae771770SStanislav Sedov */ 239*ae771770SStanislav Sedov 240*ae771770SStanislav Sedov int 241*ae771770SStanislav Sedov wind_ucs4utf8_length(const uint32_t *in, size_t in_len, size_t *out_len) 242*ae771770SStanislav Sedov { 243*ae771770SStanislav Sedov return wind_ucs4utf8(in, in_len, NULL, out_len); 244*ae771770SStanislav Sedov } 245*ae771770SStanislav Sedov 246*ae771770SStanislav Sedov /** 247*ae771770SStanislav Sedov * Read in an UCS2 from a buffer. 248*ae771770SStanislav Sedov * 249*ae771770SStanislav Sedov * @param ptr The input buffer to read from. 250*ae771770SStanislav Sedov * @param len the length of the input buffer. 251*ae771770SStanislav Sedov * @param flags Flags to control the behavior of the function. 252*ae771770SStanislav Sedov * @param out the output UCS2, the array must be at least out/2 long. 253*ae771770SStanislav Sedov * @param out_len the output length 254*ae771770SStanislav Sedov * 255*ae771770SStanislav Sedov * @return returns 0 on success, an wind error code otherwise. 256*ae771770SStanislav Sedov * @ingroup wind 257*ae771770SStanislav Sedov */ 258*ae771770SStanislav Sedov 259*ae771770SStanislav Sedov int 260*ae771770SStanislav Sedov wind_ucs2read(const void *ptr, size_t len, unsigned int *flags, 261*ae771770SStanislav Sedov uint16_t *out, size_t *out_len) 262*ae771770SStanislav Sedov { 263*ae771770SStanislav Sedov const unsigned char *p = ptr; 264*ae771770SStanislav Sedov int little = ((*flags) & WIND_RW_LE); 265*ae771770SStanislav Sedov size_t olen = *out_len; 266*ae771770SStanislav Sedov 267*ae771770SStanislav Sedov /** if len is zero, flags are unchanged */ 268*ae771770SStanislav Sedov if (len == 0) { 269*ae771770SStanislav Sedov *out_len = 0; 270*ae771770SStanislav Sedov return 0; 271*ae771770SStanislav Sedov } 272*ae771770SStanislav Sedov 273*ae771770SStanislav Sedov /** if len is odd, WIND_ERR_LENGTH_NOT_MOD2 is returned */ 274*ae771770SStanislav Sedov if (len & 1) 275*ae771770SStanislav Sedov return WIND_ERR_LENGTH_NOT_MOD2; 276*ae771770SStanislav Sedov 277*ae771770SStanislav Sedov /** 278*ae771770SStanislav Sedov * If the flags WIND_RW_BOM is set, check for BOM. If not BOM is 279*ae771770SStanislav Sedov * found, check is LE/BE flag is already and use that otherwise 280*ae771770SStanislav Sedov * fail with WIND_ERR_NO_BOM. When done, clear WIND_RW_BOM and 281*ae771770SStanislav Sedov * the LE/BE flag and set the resulting LE/BE flag. 282*ae771770SStanislav Sedov */ 283*ae771770SStanislav Sedov if ((*flags) & WIND_RW_BOM) { 284*ae771770SStanislav Sedov uint16_t bom = (p[0] << 8) + p[1]; 285*ae771770SStanislav Sedov if (bom == 0xfffe || bom == 0xfeff) { 286*ae771770SStanislav Sedov little = (bom == 0xfffe); 287*ae771770SStanislav Sedov p += 2; 288*ae771770SStanislav Sedov len -= 2; 289*ae771770SStanislav Sedov } else if (((*flags) & (WIND_RW_LE|WIND_RW_BE)) != 0) { 290*ae771770SStanislav Sedov /* little already set */ 291*ae771770SStanislav Sedov } else 292*ae771770SStanislav Sedov return WIND_ERR_NO_BOM; 293*ae771770SStanislav Sedov *flags = ((*flags) & ~(WIND_RW_BOM|WIND_RW_LE|WIND_RW_BE)); 294*ae771770SStanislav Sedov *flags |= little ? WIND_RW_LE : WIND_RW_BE; 295*ae771770SStanislav Sedov } 296*ae771770SStanislav Sedov 297*ae771770SStanislav Sedov while (len) { 298*ae771770SStanislav Sedov if (olen < 1) 299*ae771770SStanislav Sedov return WIND_ERR_OVERRUN; 300*ae771770SStanislav Sedov if (little) 301*ae771770SStanislav Sedov *out = (p[1] << 8) + p[0]; 302*ae771770SStanislav Sedov else 303*ae771770SStanislav Sedov *out = (p[0] << 8) + p[1]; 304*ae771770SStanislav Sedov out++; p += 2; len -= 2; olen--; 305*ae771770SStanislav Sedov } 306*ae771770SStanislav Sedov *out_len -= olen; 307*ae771770SStanislav Sedov return 0; 308*ae771770SStanislav Sedov } 309*ae771770SStanislav Sedov 310*ae771770SStanislav Sedov /** 311*ae771770SStanislav Sedov * Write an UCS2 string to a buffer. 312*ae771770SStanislav Sedov * 313*ae771770SStanislav Sedov * @param in The input UCS2 string. 314*ae771770SStanislav Sedov * @param in_len the length of the input buffer. 315*ae771770SStanislav Sedov * @param flags Flags to control the behavior of the function. 316*ae771770SStanislav Sedov * @param ptr The input buffer to write to, the array must be at least 317*ae771770SStanislav Sedov * (in + 1) * 2 bytes long. 318*ae771770SStanislav Sedov * @param out_len the output length 319*ae771770SStanislav Sedov * 320*ae771770SStanislav Sedov * @return returns 0 on success, an wind error code otherwise. 321*ae771770SStanislav Sedov * @ingroup wind 322*ae771770SStanislav Sedov */ 323*ae771770SStanislav Sedov 324*ae771770SStanislav Sedov int 325*ae771770SStanislav Sedov wind_ucs2write(const uint16_t *in, size_t in_len, unsigned int *flags, 326*ae771770SStanislav Sedov void *ptr, size_t *out_len) 327*ae771770SStanislav Sedov { 328*ae771770SStanislav Sedov unsigned char *p = ptr; 329*ae771770SStanislav Sedov size_t len = *out_len; 330*ae771770SStanislav Sedov 331*ae771770SStanislav Sedov /** If in buffer is not of length be mod 2, WIND_ERR_LENGTH_NOT_MOD2 is returned*/ 332*ae771770SStanislav Sedov if (len & 1) 333*ae771770SStanislav Sedov return WIND_ERR_LENGTH_NOT_MOD2; 334*ae771770SStanislav Sedov 335*ae771770SStanislav Sedov /** On zero input length, flags are preserved */ 336*ae771770SStanislav Sedov if (in_len == 0) { 337*ae771770SStanislav Sedov *out_len = 0; 338*ae771770SStanislav Sedov return 0; 339*ae771770SStanislav Sedov } 340*ae771770SStanislav Sedov /** If flags have WIND_RW_BOM set, the byte order mark is written 341*ae771770SStanislav Sedov * first to the output data */ 342*ae771770SStanislav Sedov if ((*flags) & WIND_RW_BOM) { 343*ae771770SStanislav Sedov uint16_t bom = 0xfffe; 344*ae771770SStanislav Sedov 345*ae771770SStanislav Sedov if (len < 2) 346*ae771770SStanislav Sedov return WIND_ERR_OVERRUN; 347*ae771770SStanislav Sedov 348*ae771770SStanislav Sedov if ((*flags) & WIND_RW_LE) { 349*ae771770SStanislav Sedov p[0] = (bom >> 8) & 0xff; 350*ae771770SStanislav Sedov p[1] = (bom ) & 0xff; 351*ae771770SStanislav Sedov } else { 352*ae771770SStanislav Sedov p[1] = (bom ) & 0xff; 353*ae771770SStanislav Sedov p[0] = (bom >> 8) & 0xff; 354*ae771770SStanislav Sedov } 355*ae771770SStanislav Sedov len -= 2; 356*ae771770SStanislav Sedov } 357*ae771770SStanislav Sedov 358*ae771770SStanislav Sedov while (in_len) { 359*ae771770SStanislav Sedov /** If the output wont fit into out_len, WIND_ERR_OVERRUN is returned */ 360*ae771770SStanislav Sedov if (len < 2) 361*ae771770SStanislav Sedov return WIND_ERR_OVERRUN; 362*ae771770SStanislav Sedov if ((*flags) & WIND_RW_LE) { 363*ae771770SStanislav Sedov p[0] = (in[0] >> 8) & 0xff; 364*ae771770SStanislav Sedov p[1] = (in[0] ) & 0xff; 365*ae771770SStanislav Sedov } else { 366*ae771770SStanislav Sedov p[1] = (in[0] ) & 0xff; 367*ae771770SStanislav Sedov p[0] = (in[0] >> 8) & 0xff; 368*ae771770SStanislav Sedov } 369*ae771770SStanislav Sedov len -= 2; 370*ae771770SStanislav Sedov in_len--; 371*ae771770SStanislav Sedov p += 2; 372*ae771770SStanislav Sedov in++; 373*ae771770SStanislav Sedov } 374*ae771770SStanislav Sedov *out_len -= len; 375*ae771770SStanislav Sedov return 0; 376*ae771770SStanislav Sedov } 377*ae771770SStanislav Sedov 378*ae771770SStanislav Sedov 379*ae771770SStanislav Sedov /** 380*ae771770SStanislav Sedov * Convert an UTF-8 string to an UCS2 string. 381*ae771770SStanislav Sedov * 382*ae771770SStanislav Sedov * @param in an UTF-8 string to convert. 383*ae771770SStanislav Sedov * @param out the resulting UCS2 strint, must be at least 384*ae771770SStanislav Sedov * wind_utf8ucs2_length() long. If out is NULL, the function will 385*ae771770SStanislav Sedov * calculate the needed space for the out variable (just like 386*ae771770SStanislav Sedov * wind_utf8ucs2_length()). 387*ae771770SStanislav Sedov * @param out_len before processing out_len should be the length of 388*ae771770SStanislav Sedov * the out variable, after processing it will be the length of the out 389*ae771770SStanislav Sedov * string. 390*ae771770SStanislav Sedov * 391*ae771770SStanislav Sedov * @return returns 0 on success, an wind error code otherwise 392*ae771770SStanislav Sedov * @ingroup wind 393*ae771770SStanislav Sedov */ 394*ae771770SStanislav Sedov 395*ae771770SStanislav Sedov int 396*ae771770SStanislav Sedov wind_utf8ucs2(const char *in, uint16_t *out, size_t *out_len) 397*ae771770SStanislav Sedov { 398*ae771770SStanislav Sedov const unsigned char *p; 399*ae771770SStanislav Sedov size_t o = 0; 400*ae771770SStanislav Sedov int ret; 401*ae771770SStanislav Sedov 402*ae771770SStanislav Sedov for (p = (const unsigned char *)in; *p != '\0'; ++p) { 403*ae771770SStanislav Sedov uint32_t u; 404*ae771770SStanislav Sedov 405*ae771770SStanislav Sedov ret = utf8toutf32(&p, &u); 406*ae771770SStanislav Sedov if (ret) 407*ae771770SStanislav Sedov return ret; 408*ae771770SStanislav Sedov 409*ae771770SStanislav Sedov if (u & 0xffff0000) 410*ae771770SStanislav Sedov return WIND_ERR_NOT_UTF16; 411*ae771770SStanislav Sedov 412*ae771770SStanislav Sedov if (out) { 413*ae771770SStanislav Sedov if (o >= *out_len) 414*ae771770SStanislav Sedov return WIND_ERR_OVERRUN; 415*ae771770SStanislav Sedov out[o] = u; 416*ae771770SStanislav Sedov } 417*ae771770SStanislav Sedov o++; 418*ae771770SStanislav Sedov } 419*ae771770SStanislav Sedov *out_len = o; 420*ae771770SStanislav Sedov return 0; 421*ae771770SStanislav Sedov } 422*ae771770SStanislav Sedov 423*ae771770SStanislav Sedov /** 424*ae771770SStanislav Sedov * Calculate the length of from converting a UTF-8 string to a UCS2 425*ae771770SStanislav Sedov * string. 426*ae771770SStanislav Sedov * 427*ae771770SStanislav Sedov * @param in an UTF-8 string to convert. 428*ae771770SStanislav Sedov * @param out_len the length of the resulting UCS4 string. 429*ae771770SStanislav Sedov * 430*ae771770SStanislav Sedov * @return returns 0 on success, an wind error code otherwise 431*ae771770SStanislav Sedov * @ingroup wind 432*ae771770SStanislav Sedov */ 433*ae771770SStanislav Sedov 434*ae771770SStanislav Sedov int 435*ae771770SStanislav Sedov wind_utf8ucs2_length(const char *in, size_t *out_len) 436*ae771770SStanislav Sedov { 437*ae771770SStanislav Sedov return wind_utf8ucs2(in, NULL, out_len); 438*ae771770SStanislav Sedov } 439*ae771770SStanislav Sedov 440*ae771770SStanislav Sedov /** 441*ae771770SStanislav Sedov * Convert an UCS2 string to a UTF-8 string. 442*ae771770SStanislav Sedov * 443*ae771770SStanislav Sedov * @param in an UCS2 string to convert. 444*ae771770SStanislav Sedov * @param in_len the length of the in UCS2 string. 445*ae771770SStanislav Sedov * @param out the resulting UTF-8 strint, must be at least 446*ae771770SStanislav Sedov * wind_ucs2utf8_length() long. If out is NULL, the function will 447*ae771770SStanislav Sedov * calculate the needed space for the out variable (just like 448*ae771770SStanislav Sedov * wind_ucs2utf8_length()). 449*ae771770SStanislav Sedov * @param out_len before processing out_len should be the length of 450*ae771770SStanislav Sedov * the out variable, after processing it will be the length of the out 451*ae771770SStanislav Sedov * string. 452*ae771770SStanislav Sedov * 453*ae771770SStanislav Sedov * @return returns 0 on success, an wind error code otherwise 454*ae771770SStanislav Sedov * @ingroup wind 455*ae771770SStanislav Sedov */ 456*ae771770SStanislav Sedov 457*ae771770SStanislav Sedov int 458*ae771770SStanislav Sedov wind_ucs2utf8(const uint16_t *in, size_t in_len, char *out, size_t *out_len) 459*ae771770SStanislav Sedov { 460*ae771770SStanislav Sedov uint16_t ch; 461*ae771770SStanislav Sedov size_t i, len, o; 462*ae771770SStanislav Sedov 463*ae771770SStanislav Sedov for (o = 0, i = 0; i < in_len; i++) { 464*ae771770SStanislav Sedov ch = in[i]; 465*ae771770SStanislav Sedov 466*ae771770SStanislav Sedov if (ch < 0x80) { 467*ae771770SStanislav Sedov len = 1; 468*ae771770SStanislav Sedov } else if (ch < 0x800) { 469*ae771770SStanislav Sedov len = 2; 470*ae771770SStanislav Sedov } else 471*ae771770SStanislav Sedov len = 3; 472*ae771770SStanislav Sedov 473*ae771770SStanislav Sedov o += len; 474*ae771770SStanislav Sedov 475*ae771770SStanislav Sedov if (out) { 476*ae771770SStanislav Sedov if (o >= *out_len) 477*ae771770SStanislav Sedov return WIND_ERR_OVERRUN; 478*ae771770SStanislav Sedov 479*ae771770SStanislav Sedov switch(len) { 480*ae771770SStanislav Sedov case 3: 481*ae771770SStanislav Sedov out[2] = (ch | 0x80) & 0xbf; 482*ae771770SStanislav Sedov ch = ch << 6; 483*ae771770SStanislav Sedov case 2: 484*ae771770SStanislav Sedov out[1] = (ch | 0x80) & 0xbf; 485*ae771770SStanislav Sedov ch = ch << 6; 486*ae771770SStanislav Sedov case 1: 487*ae771770SStanislav Sedov out[0] = ch | first_char[len - 1]; 488*ae771770SStanislav Sedov } 489*ae771770SStanislav Sedov out += len; 490*ae771770SStanislav Sedov } 491*ae771770SStanislav Sedov } 492*ae771770SStanislav Sedov if (out) { 493*ae771770SStanislav Sedov if (o >= *out_len) 494*ae771770SStanislav Sedov return WIND_ERR_OVERRUN; 495*ae771770SStanislav Sedov *out = '\0'; 496*ae771770SStanislav Sedov } 497*ae771770SStanislav Sedov *out_len = o; 498*ae771770SStanislav Sedov return 0; 499*ae771770SStanislav Sedov } 500*ae771770SStanislav Sedov 501*ae771770SStanislav Sedov /** 502*ae771770SStanislav Sedov * Calculate the length of from converting a UCS2 string to an UTF-8 string. 503*ae771770SStanislav Sedov * 504*ae771770SStanislav Sedov * @param in an UCS2 string to convert. 505*ae771770SStanislav Sedov * @param in_len an UCS2 string length to convert. 506*ae771770SStanislav Sedov * @param out_len the length of the resulting UTF-8 string. 507*ae771770SStanislav Sedov * 508*ae771770SStanislav Sedov * @return returns 0 on success, an wind error code otherwise 509*ae771770SStanislav Sedov * @ingroup wind 510*ae771770SStanislav Sedov */ 511*ae771770SStanislav Sedov 512*ae771770SStanislav Sedov int 513*ae771770SStanislav Sedov wind_ucs2utf8_length(const uint16_t *in, size_t in_len, size_t *out_len) 514*ae771770SStanislav Sedov { 515*ae771770SStanislav Sedov return wind_ucs2utf8(in, in_len, NULL, out_len); 516*ae771770SStanislav Sedov } 517