1 /*- 2 * Copyright (c) 2010 Marcel Moolenaar 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24 * SUCH DAMAGE. 25 */ 26 27 #include <sys/cdefs.h> 28 __FBSDID("$FreeBSD$"); 29 30 #include <sys/types.h> 31 #include <errno.h> 32 #ifdef _STANDALONE 33 #include <stand.h> 34 #else 35 #include <stddef.h> 36 #include <stdlib.h> 37 #include <string.h> 38 #include <unistd.h> 39 #include <sys/efi.h> 40 #include <machine/efi.h> 41 #endif 42 43 #include "efichar.h" 44 45 int 46 ucs2len(const efi_char *str) 47 { 48 int i; 49 50 i = 0; 51 while (*str++) 52 i++; 53 return (i); 54 } 55 56 /* 57 * If nm were converted to utf8, what what would strlen 58 * return on the resulting string? 59 */ 60 static size_t 61 utf8_len_of_ucs2(const efi_char *nm) 62 { 63 size_t len; 64 efi_char c; 65 66 len = 0; 67 while (*nm) { 68 c = *nm++; 69 if (c > 0x7ff) 70 len += 3; 71 else if (c > 0x7f) 72 len += 2; 73 else 74 len++; 75 } 76 77 return (len); 78 } 79 80 int 81 ucs2_to_utf8(const efi_char *nm, char **name) 82 { 83 size_t len, sz; 84 efi_char c; 85 char *cp; 86 int freeit = *name == NULL; 87 88 sz = utf8_len_of_ucs2(nm) + 1; 89 len = 0; 90 if (*name != NULL) 91 cp = *name; 92 else 93 cp = *name = malloc(sz); 94 if (*name == NULL) 95 return (ENOMEM); 96 97 while (*nm) { 98 c = *nm++; 99 if (c > 0x7ff) { 100 if (len++ < sz) 101 *cp++ = (char)(0xE0 | (c >> 12)); 102 if (len++ < sz) 103 *cp++ = (char)(0x80 | ((c >> 6) & 0x3f)); 104 if (len++ < sz) 105 *cp++ = (char)(0x80 | (c & 0x3f)); 106 } else if (c > 0x7f) { 107 if (len++ < sz) 108 *cp++ = (char)(0xC0 | ((c >> 6) & 0x1f)); 109 if (len++ < sz) 110 *cp++ = (char)(0x80 | (c & 0x3f)); 111 } else { 112 if (len++ < sz) 113 *cp++ = (char)(c & 0x7f); 114 } 115 } 116 117 if (len >= sz) { 118 /* Absent bugs, we'll never return EOVERFLOW */ 119 if (freeit) { 120 free(*name); 121 *name = NULL; 122 } 123 return (EOVERFLOW); 124 } 125 *cp++ = '\0'; 126 127 return (0); 128 } 129 130 int 131 utf8_to_ucs2(const char *name, efi_char **nmp, size_t *len) 132 { 133 efi_char *nm; 134 size_t sz; 135 uint32_t ucs4; 136 int c, bytes; 137 int freeit = *nmp == NULL; 138 139 sz = strlen(name) * 2 + 2; 140 if (*nmp == NULL) 141 *nmp = malloc(sz); 142 if (*nmp == NULL) 143 return (ENOMEM); 144 nm = *nmp; 145 *len = sz; 146 147 ucs4 = 0; 148 bytes = 0; 149 while (sz > 1 && *name != '\0') { 150 c = *name++; 151 /* 152 * Conditionalize on the two major character types: 153 * initial and followup characters. 154 */ 155 if ((c & 0xc0) != 0x80) { 156 /* Initial characters. */ 157 if (bytes != 0) 158 goto ilseq; 159 if ((c & 0xf8) == 0xf0) { 160 ucs4 = c & 0x07; 161 bytes = 3; 162 } else if ((c & 0xf0) == 0xe0) { 163 ucs4 = c & 0x0f; 164 bytes = 2; 165 } else if ((c & 0xe0) == 0xc0) { 166 ucs4 = c & 0x1f; 167 bytes = 1; 168 } else { 169 ucs4 = c & 0x7f; 170 bytes = 0; 171 } 172 } else { 173 /* Followup characters. */ 174 if (bytes > 0) { 175 ucs4 = (ucs4 << 6) + (c & 0x3f); 176 bytes--; 177 } else if (bytes == 0) 178 goto ilseq; 179 } 180 if (bytes == 0) { 181 if (ucs4 > 0xffff) 182 goto ilseq; 183 *nm++ = (efi_char)ucs4; 184 sz -= 2; 185 } 186 } 187 if (sz < 2) { 188 if (freeit) { 189 free(nm); 190 *nmp = NULL; 191 } 192 return (EDOOFUS); 193 } 194 sz -= 2; 195 *nm = 0; 196 *len -= sz; 197 return (0); 198 ilseq: 199 if (freeit) { 200 free(nm); 201 *nmp = NULL; 202 } 203 return (EILSEQ); 204 } 205