1 /*
2 * Copyright (c) 2010 Marcel Moolenaar
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24 * SUCH DAMAGE.
25 */
26
27 #include <sys/cdefs.h>
28
29 #include <sys/types.h>
30 #include <errno.h>
31 #include <stand.h>
32 #include <efichar.h>
33
34 int
ucs2len(const CHAR16 * str)35 ucs2len(const CHAR16 *str)
36 {
37 int i;
38
39 i = 0;
40 while (*str++)
41 i++;
42 return (i);
43 }
44
45 /*
46 * If nm were converted to utf8, what what would strlen
47 * return on the resulting string?
48 */
49 static size_t
utf8_len_of_ucs2(const CHAR16 * nm)50 utf8_len_of_ucs2(const CHAR16 *nm)
51 {
52 size_t len;
53 CHAR16 c;
54
55 len = 0;
56 while (*nm) {
57 c = *nm++;
58 if (c > 0x7ff)
59 len += 3;
60 else if (c > 0x7f)
61 len += 2;
62 else
63 len++;
64 }
65
66 return (len);
67 }
68
69 int
ucs2_to_utf8(const CHAR16 * nm,char ** name)70 ucs2_to_utf8(const CHAR16 *nm, char **name)
71 {
72 size_t len, sz;
73 CHAR16 c;
74 char *cp;
75 int freeit = *name == NULL;
76
77 sz = utf8_len_of_ucs2(nm) + 1;
78 len = 0;
79 if (*name != NULL)
80 cp = *name;
81 else
82 cp = *name = malloc(sz);
83 if (*name == NULL)
84 return (ENOMEM);
85
86 while (*nm) {
87 c = *nm++;
88 if (c > 0x7ff) {
89 if (len++ < sz)
90 *cp++ = (char)(0xE0 | (c >> 12));
91 if (len++ < sz)
92 *cp++ = (char)(0x80 | ((c >> 6) & 0x3f));
93 if (len++ < sz)
94 *cp++ = (char)(0x80 | (c & 0x3f));
95 } else if (c > 0x7f) {
96 if (len++ < sz)
97 *cp++ = (char)(0xC0 | ((c >> 6) & 0x1f));
98 if (len++ < sz)
99 *cp++ = (char)(0x80 | (c & 0x3f));
100 } else {
101 if (len++ < sz)
102 *cp++ = (char)(c & 0x7f);
103 }
104 }
105
106 if (len >= sz) {
107 /* Absent bugs, we'll never return EOVERFLOW */
108 if (freeit) {
109 free(*name);
110 *name = NULL;
111 }
112 return (EOVERFLOW);
113 }
114 *cp++ = '\0';
115
116 return (0);
117 }
118
119 int
utf8_to_ucs2(const char * name,CHAR16 ** nmp,size_t * len)120 utf8_to_ucs2(const char *name, CHAR16 **nmp, size_t *len)
121 {
122 CHAR16 *nm;
123 size_t sz;
124 uint32_t ucs4;
125 int c, bytes;
126 int freeit = *nmp == NULL;
127
128 sz = strlen(name) * 2 + 2;
129 if (*nmp == NULL)
130 *nmp = malloc(sz);
131 if (*nmp == NULL)
132 return (ENOMEM);
133 nm = *nmp;
134 *len = sz;
135
136 ucs4 = 0;
137 bytes = 0;
138 while (sz > 1 && *name != '\0') {
139 c = *name++;
140 /*
141 * Conditionalize on the two major character types:
142 * initial and followup characters.
143 */
144 if ((c & 0xc0) != 0x80) {
145 /* Initial characters. */
146 if (bytes != 0)
147 goto ilseq;
148 if ((c & 0xf8) == 0xf0) {
149 ucs4 = c & 0x07;
150 bytes = 3;
151 } else if ((c & 0xf0) == 0xe0) {
152 ucs4 = c & 0x0f;
153 bytes = 2;
154 } else if ((c & 0xe0) == 0xc0) {
155 ucs4 = c & 0x1f;
156 bytes = 1;
157 } else {
158 ucs4 = c & 0x7f;
159 bytes = 0;
160 }
161 } else {
162 /* Followup characters. */
163 if (bytes > 0) {
164 ucs4 = (ucs4 << 6) + (c & 0x3f);
165 bytes--;
166 } else if (bytes == 0) {
167 goto ilseq;
168 }
169 }
170 if (bytes == 0) {
171 if (ucs4 > 0xffff)
172 goto ilseq;
173 *nm++ = (CHAR16)ucs4;
174 sz -= 2;
175 }
176 }
177 if (sz < 2) {
178 if (freeit) {
179 free(nm);
180 *nmp = NULL;
181 }
182 return (EDOOFUS);
183 }
184 sz -= 2;
185 *nm = 0;
186 *len -= sz;
187 return (0);
188 ilseq:
189 if (freeit) {
190 free(nm);
191 *nmp = NULL;
192 }
193 return (EILSEQ);
194 }
195