1 /*- 2 * Copyright (c) 2010 Marcel Moolenaar 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24 * SUCH DAMAGE. 25 */ 26 27 #include <sys/cdefs.h> 28 #include <sys/types.h> 29 #include <errno.h> 30 #ifdef _STANDALONE 31 #include <stand.h> 32 #else 33 #include <stddef.h> 34 #include <stdlib.h> 35 #include <string.h> 36 #include <unistd.h> 37 #include <sys/efi.h> 38 #include <machine/efi.h> 39 #endif 40 41 #include "efichar.h" 42 43 int 44 ucs2len(const efi_char *str) 45 { 46 int i; 47 48 i = 0; 49 while (*str++) 50 i++; 51 return (i); 52 } 53 54 /* 55 * If nm were converted to utf8, what what would strlen 56 * return on the resulting string? 57 */ 58 static size_t 59 utf8_len_of_ucs2(const efi_char *nm) 60 { 61 size_t len; 62 efi_char c; 63 64 len = 0; 65 while (*nm) { 66 c = *nm++; 67 if (c > 0x7ff) 68 len += 3; 69 else if (c > 0x7f) 70 len += 2; 71 else 72 len++; 73 } 74 75 return (len); 76 } 77 78 int 79 ucs2_to_utf8(const efi_char *nm, char **name) 80 { 81 size_t len, sz; 82 efi_char c; 83 char *cp; 84 int freeit = *name == NULL; 85 86 sz = utf8_len_of_ucs2(nm) + 1; 87 len = 0; 88 if (*name != NULL) 89 cp = *name; 90 else 91 cp = *name = malloc(sz); 92 if (*name == NULL) 93 return (ENOMEM); 94 95 while (*nm) { 96 c = *nm++; 97 if (c > 0x7ff) { 98 if (len++ < sz) 99 *cp++ = (char)(0xE0 | (c >> 12)); 100 if (len++ < sz) 101 *cp++ = (char)(0x80 | ((c >> 6) & 0x3f)); 102 if (len++ < sz) 103 *cp++ = (char)(0x80 | (c & 0x3f)); 104 } else if (c > 0x7f) { 105 if (len++ < sz) 106 *cp++ = (char)(0xC0 | ((c >> 6) & 0x1f)); 107 if (len++ < sz) 108 *cp++ = (char)(0x80 | (c & 0x3f)); 109 } else { 110 if (len++ < sz) 111 *cp++ = (char)(c & 0x7f); 112 } 113 } 114 115 if (len >= sz) { 116 /* Absent bugs, we'll never return EOVERFLOW */ 117 if (freeit) { 118 free(*name); 119 *name = NULL; 120 } 121 return (EOVERFLOW); 122 } 123 *cp++ = '\0'; 124 125 return (0); 126 } 127 128 int 129 utf8_to_ucs2(const char *name, efi_char **nmp, size_t *len) 130 { 131 efi_char *nm; 132 size_t sz; 133 uint32_t ucs4; 134 int c, bytes; 135 int freeit = *nmp == NULL; 136 137 sz = strlen(name) * 2 + 2; 138 if (*nmp == NULL) 139 *nmp = malloc(sz); 140 if (*nmp == NULL) 141 return (ENOMEM); 142 nm = *nmp; 143 *len = sz; 144 145 ucs4 = 0; 146 bytes = 0; 147 while (sz > 1 && *name != '\0') { 148 c = *name++; 149 /* 150 * Conditionalize on the two major character types: 151 * initial and followup characters. 152 */ 153 if ((c & 0xc0) != 0x80) { 154 /* Initial characters. */ 155 if (bytes != 0) 156 goto ilseq; 157 if ((c & 0xf8) == 0xf0) { 158 ucs4 = c & 0x07; 159 bytes = 3; 160 } else if ((c & 0xf0) == 0xe0) { 161 ucs4 = c & 0x0f; 162 bytes = 2; 163 } else if ((c & 0xe0) == 0xc0) { 164 ucs4 = c & 0x1f; 165 bytes = 1; 166 } else { 167 ucs4 = c & 0x7f; 168 bytes = 0; 169 } 170 } else { 171 /* Followup characters. */ 172 if (bytes > 0) { 173 ucs4 = (ucs4 << 6) + (c & 0x3f); 174 bytes--; 175 } else if (bytes == 0) 176 goto ilseq; 177 } 178 if (bytes == 0) { 179 if (ucs4 > 0xffff) 180 goto ilseq; 181 *nm++ = (efi_char)ucs4; 182 sz -= 2; 183 } 184 } 185 if (sz < 2) { 186 if (freeit) { 187 free(nm); 188 *nmp = NULL; 189 } 190 return (EDOOFUS); 191 } 192 sz -= 2; 193 *nm = 0; 194 *len -= sz; 195 return (0); 196 ilseq: 197 if (freeit) { 198 free(nm); 199 *nmp = NULL; 200 } 201 return (EILSEQ); 202 } 203