1 /*-
2 * Copyright (c) 2010 Marcel Moolenaar
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24 * SUCH DAMAGE.
25 */
26
27 #include <sys/types.h>
28 #ifndef _KERNEL
29 #include <errno.h>
30 #endif
31 #ifdef _STANDALONE
32 #include <stand.h>
33 #else
34 #ifdef _KERNEL
35 #include <sys/malloc.h>
36 #include <sys/systm.h>
37 #else
38 #include <stddef.h>
39 #include <stdlib.h>
40 #include <string.h>
41 #include <unistd.h>
42 #endif
43 #include <sys/efi.h>
44 #include <machine/efi.h>
45 #endif
46
47 #include "efichar.h"
48
49 int
ucs2len(const efi_char * str)50 ucs2len(const efi_char *str)
51 {
52 int i;
53
54 i = 0;
55 while (*str++)
56 i++;
57 return (i);
58 }
59
60 /*
61 * If nm were converted to utf8, what what would strlen
62 * return on the resulting string?
63 */
64 static size_t
utf8_len_of_ucs2(const efi_char * nm)65 utf8_len_of_ucs2(const efi_char *nm)
66 {
67 size_t len;
68 efi_char c;
69
70 len = 0;
71 while (*nm) {
72 c = *nm++;
73 if (c > 0x7ff)
74 len += 3;
75 else if (c > 0x7f)
76 len += 2;
77 else
78 len++;
79 }
80
81 return (len);
82 }
83
84 int
ucs2_to_utf8(const efi_char * nm,char ** name)85 ucs2_to_utf8(const efi_char *nm, char **name)
86 {
87 size_t len, sz;
88 efi_char c;
89 char *cp;
90 int freeit = *name == NULL;
91
92 sz = utf8_len_of_ucs2(nm) + 1;
93 len = 0;
94 if (*name != NULL)
95 cp = *name;
96 else
97 cp = *name = EFICHAR_MALLOC(sz);
98 if (*name == NULL)
99 return (ENOMEM);
100
101 while (*nm) {
102 c = *nm++;
103 if (c > 0x7ff) {
104 if (len++ < sz)
105 *cp++ = (char)(0xE0 | (c >> 12));
106 if (len++ < sz)
107 *cp++ = (char)(0x80 | ((c >> 6) & 0x3f));
108 if (len++ < sz)
109 *cp++ = (char)(0x80 | (c & 0x3f));
110 } else if (c > 0x7f) {
111 if (len++ < sz)
112 *cp++ = (char)(0xC0 | ((c >> 6) & 0x1f));
113 if (len++ < sz)
114 *cp++ = (char)(0x80 | (c & 0x3f));
115 } else {
116 if (len++ < sz)
117 *cp++ = (char)(c & 0x7f);
118 }
119 }
120
121 if (len >= sz) {
122 /* Absent bugs, we'll never return EOVERFLOW */
123 if (freeit) {
124 EFICHAR_FREE(*name);
125 *name = NULL;
126 }
127 return (EOVERFLOW);
128 }
129 *cp++ = '\0';
130
131 return (0);
132 }
133
134 int
utf8_to_ucs2(const char * name,efi_char ** nmp,size_t * len)135 utf8_to_ucs2(const char *name, efi_char **nmp, size_t *len)
136 {
137 efi_char *nm;
138 size_t sz;
139 uint32_t ucs4;
140 int c, bytes;
141 int freeit = *nmp == NULL;
142
143 sz = strlen(name) * 2 + 2;
144 if (*nmp == NULL)
145 *nmp = EFICHAR_MALLOC(sz);
146 if (*nmp == NULL)
147 return (ENOMEM);
148 nm = *nmp;
149 *len = sz;
150
151 ucs4 = 0;
152 bytes = 0;
153 while (sz > 1 && *name != '\0') {
154 c = *name++;
155 /*
156 * Conditionalize on the two major character types:
157 * initial and followup characters.
158 */
159 if ((c & 0xc0) != 0x80) {
160 /* Initial characters. */
161 if (bytes != 0)
162 goto ilseq;
163 if ((c & 0xf8) == 0xf0) {
164 ucs4 = c & 0x07;
165 bytes = 3;
166 } else if ((c & 0xf0) == 0xe0) {
167 ucs4 = c & 0x0f;
168 bytes = 2;
169 } else if ((c & 0xe0) == 0xc0) {
170 ucs4 = c & 0x1f;
171 bytes = 1;
172 } else {
173 ucs4 = c & 0x7f;
174 bytes = 0;
175 }
176 } else {
177 /* Followup characters. */
178 if (bytes > 0) {
179 ucs4 = (ucs4 << 6) + (c & 0x3f);
180 bytes--;
181 } else if (bytes == 0)
182 goto ilseq;
183 }
184 if (bytes == 0) {
185 if (ucs4 > 0xffff)
186 goto ilseq;
187 *nm++ = (efi_char)ucs4;
188 sz -= 2;
189 }
190 }
191 if (sz < 2) {
192 if (freeit) {
193 EFICHAR_FREE(nm);
194 *nmp = NULL;
195 }
196 return (EDOOFUS);
197 }
198 sz -= 2;
199 *nm = 0;
200 *len -= sz;
201 return (0);
202 ilseq:
203 if (freeit) {
204 EFICHAR_FREE(nm);
205 *nmp = NULL;
206 }
207 return (EILSEQ);
208 }
209