1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 /* 28 * Unicode conversions (yet more) 29 */ 30 31 #include <stdio.h> 32 #include <stdlib.h> 33 #include <string.h> 34 #include <errno.h> 35 #include <iconv.h> 36 #include <libintl.h> 37 38 #include <sys/u8_textprep.h> 39 40 #include <netsmb/smb_lib.h> 41 #include "charsets.h" 42 43 44 /* 45 * Number of unicode symbols in the string, 46 * not including the 2-byte null terminator. 47 * (multiply by two for storage size) 48 */ 49 size_t 50 unicode_strlen(const uint16_t *us) 51 { 52 size_t len = 0; 53 while (*us++) 54 len++; 55 return (len); 56 } 57 58 static char *convert_ucs2xx_to_utf8(iconv_t, const uint16_t *); 59 60 /* 61 * Convert (native) Unicode string to UTF-8. 62 * Returns allocated memory. 63 */ 64 char * 65 convert_unicode_to_utf8(uint16_t *us) 66 { 67 static iconv_t cd1 = (iconv_t)-1; 68 69 /* Get conversion descriptor (to, from) */ 70 if (cd1 == (iconv_t)-1) 71 cd1 = iconv_open("UTF-8", "UCS-2"); 72 73 return (convert_ucs2xx_to_utf8(cd1, us)); 74 } 75 76 /* 77 * Convert little-endian Unicode string to UTF-8. 78 * Returns allocated memory. 79 */ 80 char * 81 convert_leunicode_to_utf8(unsigned short *us) 82 { 83 static iconv_t cd2 = (iconv_t)-1; 84 85 /* Get conversion descriptor (to, from) */ 86 if (cd2 == (iconv_t)-1) 87 cd2 = iconv_open("UTF-8", "UCS-2LE"); 88 89 return (convert_ucs2xx_to_utf8(cd2, us)); 90 } 91 92 static char * 93 convert_ucs2xx_to_utf8(iconv_t cd, const uint16_t *us) 94 { 95 char *obuf, *optr; 96 const char *iptr; 97 size_t ileft, obsize, oleft, ret; 98 99 if (cd == (iconv_t)-1) { 100 smb_error(dgettext(TEXT_DOMAIN, 101 "iconv_open(UTF-8/UCS-2)"), -1); 102 return (NULL); 103 } 104 105 iptr = (const char *)us; 106 ileft = unicode_strlen(us); 107 ileft *= 2; /* now bytes */ 108 109 /* Worst-case output size is 2x input size. */ 110 oleft = ileft * 2; 111 obsize = oleft + 2; /* room for null */ 112 obuf = malloc(obsize); 113 if (!obuf) 114 return (NULL); 115 optr = obuf; 116 117 ret = iconv(cd, &iptr, &ileft, &optr, &oleft); 118 *optr = '\0'; 119 if (ret == (size_t)-1) { 120 smb_error(dgettext(TEXT_DOMAIN, 121 "iconv(%s) failed"), errno, obuf); 122 } 123 if (ileft) { 124 smb_error(dgettext(TEXT_DOMAIN, 125 "iconv(%s) failed"), -1, obuf); 126 /* 127 * XXX: What's better? return NULL? 128 * The truncated string? << for now 129 */ 130 } 131 132 return (obuf); 133 } 134 135 static uint16_t *convert_utf8_to_ucs2xx(iconv_t, const char *); 136 137 /* 138 * Convert UTF-8 string to Unicode. 139 * Returns allocated memory. 140 */ 141 uint16_t * 142 convert_utf8_to_unicode(const char *utf8_string) 143 { 144 static iconv_t cd3 = (iconv_t)-1; 145 146 /* Get conversion descriptor (to, from) */ 147 if (cd3 == (iconv_t)-1) 148 cd3 = iconv_open("UCS-2", "UTF-8"); 149 return (convert_utf8_to_ucs2xx(cd3, utf8_string)); 150 } 151 152 /* 153 * Convert UTF-8 string to little-endian Unicode. 154 * Returns allocated memory. 155 */ 156 uint16_t * 157 convert_utf8_to_leunicode(const char *utf8_string) 158 { 159 static iconv_t cd4 = (iconv_t)-1; 160 161 /* Get conversion descriptor (to, from) */ 162 if (cd4 == (iconv_t)-1) 163 cd4 = iconv_open("UCS-2LE", "UTF-8"); 164 return (convert_utf8_to_ucs2xx(cd4, utf8_string)); 165 } 166 167 static uint16_t * 168 convert_utf8_to_ucs2xx(iconv_t cd, const char *utf8_string) 169 { 170 uint16_t *obuf, *optr; 171 const char *iptr; 172 size_t ileft, obsize, oleft, ret; 173 174 if (cd == (iconv_t)-1) { 175 smb_error(dgettext(TEXT_DOMAIN, 176 "iconv_open(UCS-2/UTF-8)"), -1); 177 return (NULL); 178 } 179 180 iptr = utf8_string; 181 ileft = strlen(iptr); 182 183 /* Worst-case output size is 2x input size. */ 184 oleft = ileft * 2; 185 obsize = oleft + 2; /* room for null */ 186 obuf = malloc(obsize); 187 if (!obuf) 188 return (NULL); 189 optr = obuf; 190 191 ret = iconv(cd, &iptr, &ileft, (char **)&optr, &oleft); 192 *optr = '\0'; 193 if (ret == (size_t)-1) { 194 smb_error(dgettext(TEXT_DOMAIN, 195 "iconv(%s) failed"), errno, utf8_string); 196 } 197 if (ileft) { 198 smb_error(dgettext(TEXT_DOMAIN, 199 "iconv(%s) failed"), -1, utf8_string); 200 /* 201 * XXX: What's better? return NULL? 202 * The truncated string? << for now 203 */ 204 } 205 206 return (obuf); 207 } 208