xref: /freebsd/stand/efi/libefi/efichar.c (revision b2d2a78ad80ec68d4a17f5aef97d21686cb1e29b)
1 /*-
2  * Copyright (c) 2010 Marcel Moolenaar
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24  * SUCH DAMAGE.
25  */
26 
27 #include <sys/types.h>
28 #ifndef _KERNEL
29 #include <errno.h>
30 #endif
31 #ifdef _STANDALONE
32 #include <stand.h>
33 #else
34 #ifdef _KERNEL
35 #include <sys/malloc.h>
36 #include <sys/systm.h>
37 #else
38 #include <stddef.h>
39 #include <stdlib.h>
40 #include <string.h>
41 #include <unistd.h>
42 #endif
43 #include <sys/efi.h>
44 #include <machine/efi.h>
45 #endif
46 
47 #include "efichar.h"
48 
49 int
50 ucs2len(const efi_char *str)
51 {
52 	int i;
53 
54 	i = 0;
55 	while (*str++)
56 		i++;
57 	return (i);
58 }
59 
60 /*
61  * If nm were converted to utf8, what what would strlen
62  * return on the resulting string?
63  */
64 static size_t
65 utf8_len_of_ucs2(const efi_char *nm)
66 {
67 	size_t len;
68 	efi_char c;
69 
70 	len = 0;
71 	while (*nm) {
72 		c = *nm++;
73 		if (c > 0x7ff)
74 			len += 3;
75 		else if (c > 0x7f)
76 			len += 2;
77 		else
78 			len++;
79 	}
80 
81 	return (len);
82 }
83 
84 int
85 ucs2_to_utf8(const efi_char *nm, char **name)
86 {
87 	size_t len, sz;
88 	efi_char c;
89 	char *cp;
90 	int freeit = *name == NULL;
91 
92 	sz = utf8_len_of_ucs2(nm) + 1;
93 	len = 0;
94 	if (*name != NULL)
95 		cp = *name;
96 	else
97 		cp = *name = EFICHAR_MALLOC(sz);
98 	if (*name == NULL)
99 		return (ENOMEM);
100 
101 	while (*nm) {
102 		c = *nm++;
103 		if (c > 0x7ff) {
104 			if (len++ < sz)
105 				*cp++ = (char)(0xE0 | (c >> 12));
106 			if (len++ < sz)
107 				*cp++ = (char)(0x80 | ((c >> 6) & 0x3f));
108 			if (len++ < sz)
109 				*cp++ = (char)(0x80 | (c & 0x3f));
110 		} else if (c > 0x7f) {
111 			if (len++ < sz)
112 				*cp++ = (char)(0xC0 | ((c >> 6) & 0x1f));
113 			if (len++ < sz)
114 				*cp++ = (char)(0x80 | (c & 0x3f));
115 		} else {
116 			if (len++ < sz)
117 				*cp++ = (char)(c & 0x7f);
118 		}
119 	}
120 
121 	if (len >= sz) {
122 		/* Absent bugs, we'll never return EOVERFLOW */
123 		if (freeit) {
124 			EFICHAR_FREE(*name);
125 			*name = NULL;
126 		}
127 		return (EOVERFLOW);
128 	}
129 	*cp++ = '\0';
130 
131 	return (0);
132 }
133 
134 int
135 utf8_to_ucs2(const char *name, efi_char **nmp, size_t *len)
136 {
137 	efi_char *nm;
138 	size_t sz;
139 	uint32_t ucs4;
140 	int c, bytes;
141 	int freeit = *nmp == NULL;
142 
143 	sz = strlen(name) * 2 + 2;
144 	if (*nmp == NULL)
145 		*nmp = EFICHAR_MALLOC(sz);
146 	if (*nmp == NULL)
147 		return (ENOMEM);
148 	nm = *nmp;
149 	*len = sz;
150 
151 	ucs4 = 0;
152 	bytes = 0;
153 	while (sz > 1 && *name != '\0') {
154 		c = *name++;
155 		/*
156 		 * Conditionalize on the two major character types:
157 		 * initial and followup characters.
158 		 */
159 		if ((c & 0xc0) != 0x80) {
160 			/* Initial characters. */
161 			if (bytes != 0)
162 				goto ilseq;
163 			if ((c & 0xf8) == 0xf0) {
164 				ucs4 = c & 0x07;
165 				bytes = 3;
166 			} else if ((c & 0xf0) == 0xe0) {
167 				ucs4 = c & 0x0f;
168 				bytes = 2;
169 			} else if ((c & 0xe0) == 0xc0) {
170 				ucs4 = c & 0x1f;
171 				bytes = 1;
172 			} else {
173 				ucs4 = c & 0x7f;
174 				bytes = 0;
175 			}
176 		} else {
177 			/* Followup characters. */
178 			if (bytes > 0) {
179 				ucs4 = (ucs4 << 6) + (c & 0x3f);
180 				bytes--;
181 			} else if (bytes == 0)
182 				goto ilseq;
183 		}
184 		if (bytes == 0) {
185 			if (ucs4 > 0xffff)
186 				goto ilseq;
187 			*nm++ = (efi_char)ucs4;
188 			sz -= 2;
189 		}
190 	}
191 	if (sz < 2) {
192 		if (freeit) {
193 			EFICHAR_FREE(nm);
194 			*nmp = NULL;
195 		}
196 		return (EDOOFUS);
197 	}
198 	sz -= 2;
199 	*nm = 0;
200 	*len -= sz;
201 	return (0);
202 ilseq:
203 	if (freeit) {
204 		EFICHAR_FREE(nm);
205 		*nmp = NULL;
206 	}
207 	return (EILSEQ);
208 }
209