1 /*- 2 * Copyright (c) 2010 Marcel Moolenaar 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24 * SUCH DAMAGE. 25 */ 26 27 #include <sys/types.h> 28 #include <errno.h> 29 #ifdef _STANDALONE 30 #include <stand.h> 31 #else 32 #include <stddef.h> 33 #include <stdlib.h> 34 #include <string.h> 35 #include <unistd.h> 36 #include <sys/efi.h> 37 #include <machine/efi.h> 38 #endif 39 40 #include "efichar.h" 41 42 int 43 ucs2len(const efi_char *str) 44 { 45 int i; 46 47 i = 0; 48 while (*str++) 49 i++; 50 return (i); 51 } 52 53 /* 54 * If nm were converted to utf8, what what would strlen 55 * return on the resulting string? 56 */ 57 static size_t 58 utf8_len_of_ucs2(const efi_char *nm) 59 { 60 size_t len; 61 efi_char c; 62 63 len = 0; 64 while (*nm) { 65 c = *nm++; 66 if (c > 0x7ff) 67 len += 3; 68 else if (c > 0x7f) 69 len += 2; 70 else 71 len++; 72 } 73 74 return (len); 75 } 76 77 int 78 ucs2_to_utf8(const efi_char *nm, char **name) 79 { 80 size_t len, sz; 81 efi_char c; 82 char *cp; 83 int freeit = *name == NULL; 84 85 sz = utf8_len_of_ucs2(nm) + 1; 86 len = 0; 87 if (*name != NULL) 88 cp = *name; 89 else 90 cp = *name = malloc(sz); 91 if (*name == NULL) 92 return (ENOMEM); 93 94 while (*nm) { 95 c = *nm++; 96 if (c > 0x7ff) { 97 if (len++ < sz) 98 *cp++ = (char)(0xE0 | (c >> 12)); 99 if (len++ < sz) 100 *cp++ = (char)(0x80 | ((c >> 6) & 0x3f)); 101 if (len++ < sz) 102 *cp++ = (char)(0x80 | (c & 0x3f)); 103 } else if (c > 0x7f) { 104 if (len++ < sz) 105 *cp++ = (char)(0xC0 | ((c >> 6) & 0x1f)); 106 if (len++ < sz) 107 *cp++ = (char)(0x80 | (c & 0x3f)); 108 } else { 109 if (len++ < sz) 110 *cp++ = (char)(c & 0x7f); 111 } 112 } 113 114 if (len >= sz) { 115 /* Absent bugs, we'll never return EOVERFLOW */ 116 if (freeit) { 117 free(*name); 118 *name = NULL; 119 } 120 return (EOVERFLOW); 121 } 122 *cp++ = '\0'; 123 124 return (0); 125 } 126 127 int 128 utf8_to_ucs2(const char *name, efi_char **nmp, size_t *len) 129 { 130 efi_char *nm; 131 size_t sz; 132 uint32_t ucs4; 133 int c, bytes; 134 int freeit = *nmp == NULL; 135 136 sz = strlen(name) * 2 + 2; 137 if (*nmp == NULL) 138 *nmp = malloc(sz); 139 if (*nmp == NULL) 140 return (ENOMEM); 141 nm = *nmp; 142 *len = sz; 143 144 ucs4 = 0; 145 bytes = 0; 146 while (sz > 1 && *name != '\0') { 147 c = *name++; 148 /* 149 * Conditionalize on the two major character types: 150 * initial and followup characters. 151 */ 152 if ((c & 0xc0) != 0x80) { 153 /* Initial characters. */ 154 if (bytes != 0) 155 goto ilseq; 156 if ((c & 0xf8) == 0xf0) { 157 ucs4 = c & 0x07; 158 bytes = 3; 159 } else if ((c & 0xf0) == 0xe0) { 160 ucs4 = c & 0x0f; 161 bytes = 2; 162 } else if ((c & 0xe0) == 0xc0) { 163 ucs4 = c & 0x1f; 164 bytes = 1; 165 } else { 166 ucs4 = c & 0x7f; 167 bytes = 0; 168 } 169 } else { 170 /* Followup characters. */ 171 if (bytes > 0) { 172 ucs4 = (ucs4 << 6) + (c & 0x3f); 173 bytes--; 174 } else if (bytes == 0) 175 goto ilseq; 176 } 177 if (bytes == 0) { 178 if (ucs4 > 0xffff) 179 goto ilseq; 180 *nm++ = (efi_char)ucs4; 181 sz -= 2; 182 } 183 } 184 if (sz < 2) { 185 if (freeit) { 186 free(nm); 187 *nmp = NULL; 188 } 189 return (EDOOFUS); 190 } 191 sz -= 2; 192 *nm = 0; 193 *len -= sz; 194 return (0); 195 ilseq: 196 if (freeit) { 197 free(nm); 198 *nmp = NULL; 199 } 200 return (EILSEQ); 201 } 202