1 /* 2 * Copyright (c) 2010 Marcel Moolenaar 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24 * SUCH DAMAGE. 25 */ 26 27 #include <sys/cdefs.h> 28 29 #include <sys/types.h> 30 #include <errno.h> 31 #include <stand.h> 32 #include <efichar.h> 33 34 int 35 ucs2len(const CHAR16 *str) 36 { 37 int i; 38 39 i = 0; 40 while (*str++) 41 i++; 42 return (i); 43 } 44 45 /* 46 * If nm were converted to utf8, what what would strlen 47 * return on the resulting string? 48 */ 49 static size_t 50 utf8_len_of_ucs2(const CHAR16 *nm) 51 { 52 size_t len; 53 CHAR16 c; 54 55 len = 0; 56 while (*nm) { 57 c = *nm++; 58 if (c > 0x7ff) 59 len += 3; 60 else if (c > 0x7f) 61 len += 2; 62 else 63 len++; 64 } 65 66 return (len); 67 } 68 69 int 70 ucs2_to_utf8(const CHAR16 *nm, char **name) 71 { 72 size_t len, sz; 73 CHAR16 c; 74 char *cp; 75 int freeit = *name == NULL; 76 77 sz = utf8_len_of_ucs2(nm) + 1; 78 len = 0; 79 if (*name != NULL) 80 cp = *name; 81 else 82 cp = *name = malloc(sz); 83 if (*name == NULL) 84 return (ENOMEM); 85 86 while (*nm) { 87 c = *nm++; 88 if (c > 0x7ff) { 89 if (len++ < sz) 90 *cp++ = (char)(0xE0 | (c >> 12)); 91 if (len++ < sz) 92 *cp++ = (char)(0x80 | ((c >> 6) & 0x3f)); 93 if (len++ < sz) 94 *cp++ = (char)(0x80 | (c & 0x3f)); 95 } else if (c > 0x7f) { 96 if (len++ < sz) 97 *cp++ = (char)(0xC0 | ((c >> 6) & 0x1f)); 98 if (len++ < sz) 99 *cp++ = (char)(0x80 | (c & 0x3f)); 100 } else { 101 if (len++ < sz) 102 *cp++ = (char)(c & 0x7f); 103 } 104 } 105 106 if (len >= sz) { 107 /* Absent bugs, we'll never return EOVERFLOW */ 108 if (freeit) { 109 free(*name); 110 *name = NULL; 111 } 112 return (EOVERFLOW); 113 } 114 *cp++ = '\0'; 115 116 return (0); 117 } 118 119 int 120 utf8_to_ucs2(const char *name, CHAR16 **nmp, size_t *len) 121 { 122 CHAR16 *nm; 123 size_t sz; 124 uint32_t ucs4; 125 int c, bytes; 126 int freeit = *nmp == NULL; 127 128 sz = strlen(name) * 2 + 2; 129 if (*nmp == NULL) 130 *nmp = malloc(sz); 131 if (*nmp == NULL) 132 return (ENOMEM); 133 nm = *nmp; 134 *len = sz; 135 136 ucs4 = 0; 137 bytes = 0; 138 while (sz > 1 && *name != '\0') { 139 c = *name++; 140 /* 141 * Conditionalize on the two major character types: 142 * initial and followup characters. 143 */ 144 if ((c & 0xc0) != 0x80) { 145 /* Initial characters. */ 146 if (bytes != 0) 147 goto ilseq; 148 if ((c & 0xf8) == 0xf0) { 149 ucs4 = c & 0x07; 150 bytes = 3; 151 } else if ((c & 0xf0) == 0xe0) { 152 ucs4 = c & 0x0f; 153 bytes = 2; 154 } else if ((c & 0xe0) == 0xc0) { 155 ucs4 = c & 0x1f; 156 bytes = 1; 157 } else { 158 ucs4 = c & 0x7f; 159 bytes = 0; 160 } 161 } else { 162 /* Followup characters. */ 163 if (bytes > 0) { 164 ucs4 = (ucs4 << 6) + (c & 0x3f); 165 bytes--; 166 } else if (bytes == 0) { 167 goto ilseq; 168 } 169 } 170 if (bytes == 0) { 171 if (ucs4 > 0xffff) 172 goto ilseq; 173 *nm++ = (CHAR16)ucs4; 174 sz -= 2; 175 } 176 } 177 if (sz < 2) { 178 if (freeit) { 179 free(nm); 180 *nmp = NULL; 181 } 182 return (EDOOFUS); 183 } 184 sz -= 2; 185 *nm = 0; 186 *len -= sz; 187 return (0); 188 ilseq: 189 if (freeit) { 190 free(nm); 191 *nmp = NULL; 192 } 193 return (EILSEQ); 194 } 195