1 /*- 2 * Copyright (c) 2010 Marcel Moolenaar 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24 * SUCH DAMAGE. 25 */ 26 27 #include <sys/types.h> 28 #ifndef _KERNEL 29 #include <errno.h> 30 #endif 31 #ifdef _STANDALONE 32 #include <stand.h> 33 #else 34 #ifdef _KERNEL 35 #include <sys/malloc.h> 36 #include <sys/systm.h> 37 #else 38 #include <stddef.h> 39 #include <stdlib.h> 40 #include <string.h> 41 #include <unistd.h> 42 #endif 43 #include <sys/efi.h> 44 #include <machine/efi.h> 45 #endif 46 47 #include "efichar.h" 48 49 int 50 ucs2len(const efi_char *str) 51 { 52 int i; 53 54 i = 0; 55 while (*str++) 56 i++; 57 return (i); 58 } 59 60 /* 61 * If nm were converted to utf8, what what would strlen 62 * return on the resulting string? 63 */ 64 static size_t 65 utf8_len_of_ucs2(const efi_char *nm) 66 { 67 size_t len; 68 efi_char c; 69 70 len = 0; 71 while (*nm) { 72 c = *nm++; 73 if (c > 0x7ff) 74 len += 3; 75 else if (c > 0x7f) 76 len += 2; 77 else 78 len++; 79 } 80 81 return (len); 82 } 83 84 int 85 ucs2_to_utf8(const efi_char *nm, char **name) 86 { 87 size_t len, sz; 88 efi_char c; 89 char *cp; 90 int freeit = *name == NULL; 91 92 sz = utf8_len_of_ucs2(nm) + 1; 93 len = 0; 94 if (*name != NULL) 95 cp = *name; 96 else 97 cp = *name = EFICHAR_MALLOC(sz); 98 if (*name == NULL) 99 return (ENOMEM); 100 101 while (*nm) { 102 c = *nm++; 103 if (c > 0x7ff) { 104 if (len++ < sz) 105 *cp++ = (char)(0xE0 | (c >> 12)); 106 if (len++ < sz) 107 *cp++ = (char)(0x80 | ((c >> 6) & 0x3f)); 108 if (len++ < sz) 109 *cp++ = (char)(0x80 | (c & 0x3f)); 110 } else if (c > 0x7f) { 111 if (len++ < sz) 112 *cp++ = (char)(0xC0 | ((c >> 6) & 0x1f)); 113 if (len++ < sz) 114 *cp++ = (char)(0x80 | (c & 0x3f)); 115 } else { 116 if (len++ < sz) 117 *cp++ = (char)(c & 0x7f); 118 } 119 } 120 121 if (len >= sz) { 122 /* Absent bugs, we'll never return EOVERFLOW */ 123 if (freeit) { 124 EFICHAR_FREE(*name); 125 *name = NULL; 126 } 127 return (EOVERFLOW); 128 } 129 *cp++ = '\0'; 130 131 return (0); 132 } 133 134 int 135 utf8_to_ucs2(const char *name, efi_char **nmp, size_t *len) 136 { 137 efi_char *nm; 138 size_t sz; 139 uint32_t ucs4; 140 int c, bytes; 141 int freeit = *nmp == NULL; 142 143 sz = strlen(name) * 2 + 2; 144 if (*nmp == NULL) 145 *nmp = EFICHAR_MALLOC(sz); 146 if (*nmp == NULL) 147 return (ENOMEM); 148 nm = *nmp; 149 *len = sz; 150 151 ucs4 = 0; 152 bytes = 0; 153 while (sz > 1 && *name != '\0') { 154 c = *name++; 155 /* 156 * Conditionalize on the two major character types: 157 * initial and followup characters. 158 */ 159 if ((c & 0xc0) != 0x80) { 160 /* Initial characters. */ 161 if (bytes != 0) 162 goto ilseq; 163 if ((c & 0xf8) == 0xf0) { 164 ucs4 = c & 0x07; 165 bytes = 3; 166 } else if ((c & 0xf0) == 0xe0) { 167 ucs4 = c & 0x0f; 168 bytes = 2; 169 } else if ((c & 0xe0) == 0xc0) { 170 ucs4 = c & 0x1f; 171 bytes = 1; 172 } else { 173 ucs4 = c & 0x7f; 174 bytes = 0; 175 } 176 } else { 177 /* Followup characters. */ 178 if (bytes > 0) { 179 ucs4 = (ucs4 << 6) + (c & 0x3f); 180 bytes--; 181 } else if (bytes == 0) 182 goto ilseq; 183 } 184 if (bytes == 0) { 185 if (ucs4 > 0xffff) 186 goto ilseq; 187 *nm++ = (efi_char)ucs4; 188 sz -= 2; 189 } 190 } 191 if (sz < 2) { 192 if (freeit) { 193 EFICHAR_FREE(nm); 194 *nmp = NULL; 195 } 196 return (EDOOFUS); 197 } 198 sz -= 2; 199 *nm = 0; 200 *len -= sz; 201 return (0); 202 ilseq: 203 if (freeit) { 204 EFICHAR_FREE(nm); 205 *nmp = NULL; 206 } 207 return (EILSEQ); 208 } 209