xref: /freebsd/lib/libc/locale/collate.c (revision 70e0bbedef95258a4dadc996d641a9bebd3f107d)
1 /*-
2  * Copyright (c) 1995 Alex Tatmanjants <alex@elvisti.kiev.ua>
3  *		at Electronni Visti IA, Kiev, Ukraine.
4  *			All rights reserved.
5  *
6  * Copyright (c) 2011 The FreeBSD Foundation
7  * All rights reserved.
8  * Portions of this software were developed by David Chisnall
9  * under sponsorship from the FreeBSD Foundation.
10  *
11  * Copyright (c) 2011 The FreeBSD Foundation
12  * All rights reserved.
13  * Portions of this software were developed by David Chisnall
14  * under sponsorship from the FreeBSD Foundation.
15  *
16  * Redistribution and use in source and binary forms, with or without
17  * modification, are permitted provided that the following conditions
18  * are met:
19  * 1. Redistributions of source code must retain the above copyright
20  *    notice, this list of conditions and the following disclaimer.
21  * 2. Redistributions in binary form must reproduce the above copyright
22  *    notice, this list of conditions and the following disclaimer in the
23  *    documentation and/or other materials provided with the distribution.
24  *
25  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND
26  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
28  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR BE LIABLE
29  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
30  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
31  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
32  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
33  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
34  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
35  * SUCH DAMAGE.
36  */
37 
38 #include <sys/cdefs.h>
39 __FBSDID("$FreeBSD$");
40 
41 #include "namespace.h"
42 #include <arpa/inet.h>
43 #include <stdio.h>
44 #include <stdlib.h>
45 #include <string.h>
46 #include <errno.h>
47 #include <unistd.h>
48 #include <sysexits.h>
49 #include "un-namespace.h"
50 
51 #include "collate.h"
52 #include "setlocale.h"
53 #include "ldpart.h"
54 
55 #include "libc_private.h"
56 
57 /*
58  * To avoid modifying the original (single-threaded) code too much, we'll just
59  * define the old globals as fields inside the table.
60  *
61  * We also modify the collation table test functions to search the thread-local
62  * table first and the global table second.
63  */
64 #define __collate_load_error (table->__collate_load_error)
65 #define __collate_substitute_nontrivial (table->__collate_substitute_nontrivial)
66 #define __collate_substitute_table_ptr (table->__collate_substitute_table_ptr)
67 #define __collate_char_pri_table_ptr (table->__collate_char_pri_table_ptr)
68 #define __collate_chain_pri_table (table->__collate_chain_pri_table)
69 
70 
71 struct xlocale_collate __xlocale_global_collate = {
72 	{{0}, "C"}, 1, 0
73 };
74 
75  struct xlocale_collate __xlocale_C_collate = {
76 	{{0}, "C"}, 1, 0
77 };
78 
79 void __collate_err(int ex, const char *f) __dead2;
80 
81 int
82 __collate_load_tables_l(const char *encoding, struct xlocale_collate *table);
83 
84 static void
85 destruct_collate(void *t)
86 {
87 	struct xlocale_collate *table = t;
88 	if (__collate_chain_pri_table) {
89 		free(__collate_chain_pri_table);
90 	}
91 	free(t);
92 }
93 
94 void *
95 __collate_load(const char *encoding, locale_t unused)
96 {
97 	if (strcmp(encoding, "C") == 0 || strcmp(encoding, "POSIX") == 0) {
98 		return &__xlocale_C_collate;
99 	}
100 	struct xlocale_collate *table = calloc(sizeof(struct xlocale_collate), 1);
101 	table->header.header.destructor = destruct_collate;
102 	// FIXME: Make sure that _LDP_CACHE is never returned.  We should be doing
103 	// the caching outside of this section
104 	if (__collate_load_tables_l(encoding, table) != _LDP_LOADED) {
105 		xlocale_release(table);
106 		return NULL;
107 	}
108 	return table;
109 }
110 
111 /**
112  * Load the collation tables for the specified encoding into the global table.
113  */
114 int
115 __collate_load_tables(const char *encoding)
116 {
117 	return __collate_load_tables_l(encoding, &__xlocale_global_collate);
118 }
119 
120 int
121 __collate_load_tables_l(const char *encoding, struct xlocale_collate *table)
122 {
123 	FILE *fp;
124 	int i, saverr, chains;
125 	uint32_t u32;
126 	char strbuf[STR_LEN], buf[PATH_MAX];
127 	void *TMP_substitute_table, *TMP_char_pri_table, *TMP_chain_pri_table;
128 
129 	/* 'encoding' must be already checked. */
130 	if (strcmp(encoding, "C") == 0 || strcmp(encoding, "POSIX") == 0) {
131 		__collate_load_error = 1;
132 		return (_LDP_CACHE);
133 	}
134 
135 	/* 'PathLocale' must be already set & checked. */
136 	/* Range checking not needed, encoding has fixed size */
137 	(void)strcpy(buf, _PathLocale);
138 	(void)strcat(buf, "/");
139 	(void)strcat(buf, encoding);
140 	(void)strcat(buf, "/LC_COLLATE");
141 	if ((fp = fopen(buf, "r")) == NULL)
142 		return (_LDP_ERROR);
143 
144 	if (fread(strbuf, sizeof(strbuf), 1, fp) != 1) {
145 		saverr = errno;
146 		(void)fclose(fp);
147 		errno = saverr;
148 		return (_LDP_ERROR);
149 	}
150 	chains = -1;
151 	if (strcmp(strbuf, COLLATE_VERSION) == 0)
152 		chains = 0;
153 	else if (strcmp(strbuf, COLLATE_VERSION1_2) == 0)
154 		chains = 1;
155 	if (chains < 0) {
156 		(void)fclose(fp);
157 		errno = EFTYPE;
158 		return (_LDP_ERROR);
159 	}
160 	if (chains) {
161 		if (fread(&u32, sizeof(u32), 1, fp) != 1) {
162 			saverr = errno;
163 			(void)fclose(fp);
164 			errno = saverr;
165 			return (_LDP_ERROR);
166 		}
167 		if ((chains = (int)ntohl(u32)) < 1) {
168 			(void)fclose(fp);
169 			errno = EFTYPE;
170 			return (_LDP_ERROR);
171 		}
172 	} else
173 		chains = TABLE_SIZE;
174 
175 	if ((TMP_substitute_table =
176 	     malloc(sizeof(__collate_substitute_table))) == NULL) {
177 		saverr = errno;
178 		(void)fclose(fp);
179 		errno = saverr;
180 		return (_LDP_ERROR);
181 	}
182 	if ((TMP_char_pri_table =
183 	     malloc(sizeof(__collate_char_pri_table))) == NULL) {
184 		saverr = errno;
185 		free(TMP_substitute_table);
186 		(void)fclose(fp);
187 		errno = saverr;
188 		return (_LDP_ERROR);
189 	}
190 	if ((TMP_chain_pri_table =
191 	     malloc(sizeof(*__collate_chain_pri_table) * chains)) == NULL) {
192 		saverr = errno;
193 		free(TMP_substitute_table);
194 		free(TMP_char_pri_table);
195 		(void)fclose(fp);
196 		errno = saverr;
197 		return (_LDP_ERROR);
198 	}
199 
200 #define FREAD(a, b, c, d) \
201 { \
202 	if (fread(a, b, c, d) != c) { \
203 		saverr = errno; \
204 		free(TMP_substitute_table); \
205 		free(TMP_char_pri_table); \
206 		free(TMP_chain_pri_table); \
207 		(void)fclose(d); \
208 		errno = saverr; \
209 		return (_LDP_ERROR); \
210 	} \
211 }
212 
213 	FREAD(TMP_substitute_table, sizeof(__collate_substitute_table), 1, fp);
214 	FREAD(TMP_char_pri_table, sizeof(__collate_char_pri_table), 1, fp);
215 	FREAD(TMP_chain_pri_table,
216 	      sizeof(*__collate_chain_pri_table), chains, fp);
217 	(void)fclose(fp);
218 
219 	if (__collate_substitute_table_ptr != NULL)
220 		free(__collate_substitute_table_ptr);
221 	__collate_substitute_table_ptr = TMP_substitute_table;
222 	if (__collate_char_pri_table_ptr != NULL)
223 		free(__collate_char_pri_table_ptr);
224 	__collate_char_pri_table_ptr = TMP_char_pri_table;
225 	for (i = 0; i < UCHAR_MAX + 1; i++) {
226 		__collate_char_pri_table[i].prim =
227 		    ntohl(__collate_char_pri_table[i].prim);
228 		__collate_char_pri_table[i].sec =
229 		    ntohl(__collate_char_pri_table[i].sec);
230 	}
231 	if (__collate_chain_pri_table != NULL)
232 		free(__collate_chain_pri_table);
233 	__collate_chain_pri_table = TMP_chain_pri_table;
234 	for (i = 0; i < chains; i++) {
235 		__collate_chain_pri_table[i].prim =
236 		    ntohl(__collate_chain_pri_table[i].prim);
237 		__collate_chain_pri_table[i].sec =
238 		    ntohl(__collate_chain_pri_table[i].sec);
239 	}
240 	__collate_substitute_nontrivial = 0;
241 	for (i = 0; i < UCHAR_MAX + 1; i++) {
242 		if (__collate_substitute_table[i][0] != i ||
243 		    __collate_substitute_table[i][1] != 0) {
244 			__collate_substitute_nontrivial = 1;
245 			break;
246 		}
247 	}
248 	__collate_load_error = 0;
249 
250 	return (_LDP_LOADED);
251 }
252 
253 u_char *
254 __collate_substitute(struct xlocale_collate *table, const u_char *s)
255 {
256 	int dest_len, len, nlen;
257 	int delta = strlen(s);
258 	u_char *dest_str = NULL;
259 
260 	if (s == NULL || *s == '\0')
261 		return (__collate_strdup(""));
262 	delta += delta / 8;
263 	dest_str = malloc(dest_len = delta);
264 	if (dest_str == NULL)
265 		__collate_err(EX_OSERR, __func__);
266 	len = 0;
267 	while (*s) {
268 		nlen = len + strlen(__collate_substitute_table[*s]);
269 		if (dest_len <= nlen) {
270 			dest_str = reallocf(dest_str, dest_len = nlen + delta);
271 			if (dest_str == NULL)
272 				__collate_err(EX_OSERR, __func__);
273 		}
274 		(void)strcpy(dest_str + len, __collate_substitute_table[*s++]);
275 		len = nlen;
276 	}
277 	return (dest_str);
278 }
279 
280 void
281 __collate_lookup(struct xlocale_collate *table, const u_char *t, int *len, int *prim, int *sec)
282 {
283 	struct __collate_st_chain_pri *p2;
284 
285 	*len = 1;
286 	*prim = *sec = 0;
287 	for (p2 = __collate_chain_pri_table; p2->str[0] != '\0'; p2++) {
288 		if (*t == p2->str[0] &&
289 		    strncmp(t, p2->str, strlen(p2->str)) == 0) {
290 			*len = strlen(p2->str);
291 			*prim = p2->prim;
292 			*sec = p2->sec;
293 			return;
294 		}
295 	}
296 	*prim = __collate_char_pri_table[*t].prim;
297 	*sec = __collate_char_pri_table[*t].sec;
298 }
299 
300 u_char *
301 __collate_strdup(u_char *s)
302 {
303 	u_char *t = strdup(s);
304 
305 	if (t == NULL)
306 		__collate_err(EX_OSERR, __func__);
307 	return (t);
308 }
309 
310 void
311 __collate_err(int ex, const char *f)
312 {
313 	const char *s;
314 	int serrno = errno;
315 
316 	s = _getprogname();
317 	_write(STDERR_FILENO, s, strlen(s));
318 	_write(STDERR_FILENO, ": ", 2);
319 	s = f;
320 	_write(STDERR_FILENO, s, strlen(s));
321 	_write(STDERR_FILENO, ": ", 2);
322 	s = strerror(serrno);
323 	_write(STDERR_FILENO, s, strlen(s));
324 	_write(STDERR_FILENO, "\n", 1);
325 	exit(ex);
326 }
327 
328 #ifdef COLLATE_DEBUG
329 void
330 __collate_print_tables()
331 {
332 	int i;
333 	struct __collate_st_chain_pri *p2;
334 
335 	printf("Substitute table:\n");
336 	for (i = 0; i < UCHAR_MAX + 1; i++)
337 	    if (i != *__collate_substitute_table[i])
338 		printf("\t'%c' --> \"%s\"\n", i,
339 		       __collate_substitute_table[i]);
340 	printf("Chain priority table:\n");
341 	for (p2 = __collate_chain_pri_table; p2->str[0] != '\0'; p2++)
342 		printf("\t\"%s\" : %d %d\n", p2->str, p2->prim, p2->sec);
343 	printf("Char priority table:\n");
344 	for (i = 0; i < UCHAR_MAX + 1; i++)
345 		printf("\t'%c' : %d %d\n", i, __collate_char_pri_table[i].prim,
346 		       __collate_char_pri_table[i].sec);
347 }
348 #endif
349