xref: /freebsd/lib/libc/locale/collate.c (revision 6486b015fc84e96725fef22b0e3363351399ae83)
1 /*-
2  * Copyright (c) 1995 Alex Tatmanjants <alex@elvisti.kiev.ua>
3  *		at Electronni Visti IA, Kiev, Ukraine.
4  *			All rights reserved.
5  *
6  * Copyright (c) 2011 The FreeBSD Foundation
7  * All rights reserved.
8  * Portions of this software were developed by David Chisnall
9  * under sponsorship from the FreeBSD Foundation.
10  *
11  * Redistribution and use in source and binary forms, with or without
12  * modification, are permitted provided that the following conditions
13  * are met:
14  * 1. Redistributions of source code must retain the above copyright
15  *    notice, this list of conditions and the following disclaimer.
16  * 2. Redistributions in binary form must reproduce the above copyright
17  *    notice, this list of conditions and the following disclaimer in the
18  *    documentation and/or other materials provided with the distribution.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND
21  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR BE LIABLE
24  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30  * SUCH DAMAGE.
31  */
32 
33 #include <sys/cdefs.h>
34 __FBSDID("$FreeBSD$");
35 
36 #include "namespace.h"
37 #include <arpa/inet.h>
38 #include <stdio.h>
39 #include <stdlib.h>
40 #include <string.h>
41 #include <errno.h>
42 #include <unistd.h>
43 #include <sysexits.h>
44 #include "un-namespace.h"
45 
46 #include "collate.h"
47 #include "setlocale.h"
48 #include "ldpart.h"
49 
50 #include "libc_private.h"
51 
52 /*
53  * To avoid modifying the original (single-threaded) code too much, we'll just
54  * define the old globals as fields inside the table.
55  *
56  * We also modify the collation table test functions to search the thread-local
57  * table first and the global table second.
58  */
59 #define __collate_load_error (table->__collate_load_error)
60 #define __collate_substitute_nontrivial (table->__collate_substitute_nontrivial)
61 #define __collate_substitute_table_ptr (table->__collate_substitute_table_ptr)
62 #define __collate_char_pri_table_ptr (table->__collate_char_pri_table_ptr)
63 #define __collate_chain_pri_table (table->__collate_chain_pri_table)
64 
65 
66 struct xlocale_collate __xlocale_global_collate = {
67 	{{0}, "C"}, 1, 0
68 };
69 
70  struct xlocale_collate __xlocale_C_collate = {
71 	{{0}, "C"}, 1, 0
72 };
73 
74 void __collate_err(int ex, const char *f) __dead2;
75 
76 int
77 __collate_load_tables_l(const char *encoding, struct xlocale_collate *table);
78 
79 static void
80 destruct_collate(void *t)
81 {
82 	struct xlocale_collate *table = t;
83 	if (__collate_chain_pri_table) {
84 		free(__collate_chain_pri_table);
85 	}
86 	free(t);
87 }
88 
89 void *
90 __collate_load(const char *encoding, locale_t unused)
91 {
92 	if (strcmp(encoding, "C") == 0 || strcmp(encoding, "POSIX") == 0) {
93 		return &__xlocale_C_collate;
94 	}
95 	struct xlocale_collate *table = calloc(sizeof(struct xlocale_collate), 1);
96 	table->header.header.destructor = destruct_collate;
97 	// FIXME: Make sure that _LDP_CACHE is never returned.  We should be doing
98 	// the caching outside of this section
99 	if (__collate_load_tables_l(encoding, table) != _LDP_LOADED) {
100 		xlocale_release(table);
101 		return NULL;
102 	}
103 	return table;
104 }
105 
106 /**
107  * Load the collation tables for the specified encoding into the global table.
108  */
109 int
110 __collate_load_tables(const char *encoding)
111 {
112 	return __collate_load_tables_l(encoding, &__xlocale_global_collate);
113 }
114 
115 int
116 __collate_load_tables_l(const char *encoding, struct xlocale_collate *table)
117 {
118 	FILE *fp;
119 	int i, saverr, chains;
120 	uint32_t u32;
121 	char strbuf[STR_LEN], buf[PATH_MAX];
122 	void *TMP_substitute_table, *TMP_char_pri_table, *TMP_chain_pri_table;
123 
124 	/* 'encoding' must be already checked. */
125 	if (strcmp(encoding, "C") == 0 || strcmp(encoding, "POSIX") == 0) {
126 		__collate_load_error = 1;
127 		return (_LDP_CACHE);
128 	}
129 
130 	/* 'PathLocale' must be already set & checked. */
131 	/* Range checking not needed, encoding has fixed size */
132 	(void)strcpy(buf, _PathLocale);
133 	(void)strcat(buf, "/");
134 	(void)strcat(buf, encoding);
135 	(void)strcat(buf, "/LC_COLLATE");
136 	if ((fp = fopen(buf, "r")) == NULL)
137 		return (_LDP_ERROR);
138 
139 	if (fread(strbuf, sizeof(strbuf), 1, fp) != 1) {
140 		saverr = errno;
141 		(void)fclose(fp);
142 		errno = saverr;
143 		return (_LDP_ERROR);
144 	}
145 	chains = -1;
146 	if (strcmp(strbuf, COLLATE_VERSION) == 0)
147 		chains = 0;
148 	else if (strcmp(strbuf, COLLATE_VERSION1_2) == 0)
149 		chains = 1;
150 	if (chains < 0) {
151 		(void)fclose(fp);
152 		errno = EFTYPE;
153 		return (_LDP_ERROR);
154 	}
155 	if (chains) {
156 		if (fread(&u32, sizeof(u32), 1, fp) != 1) {
157 			saverr = errno;
158 			(void)fclose(fp);
159 			errno = saverr;
160 			return (_LDP_ERROR);
161 		}
162 		if ((chains = (int)ntohl(u32)) < 1) {
163 			(void)fclose(fp);
164 			errno = EFTYPE;
165 			return (_LDP_ERROR);
166 		}
167 	} else
168 		chains = TABLE_SIZE;
169 
170 	if ((TMP_substitute_table =
171 	     malloc(sizeof(__collate_substitute_table))) == NULL) {
172 		saverr = errno;
173 		(void)fclose(fp);
174 		errno = saverr;
175 		return (_LDP_ERROR);
176 	}
177 	if ((TMP_char_pri_table =
178 	     malloc(sizeof(__collate_char_pri_table))) == NULL) {
179 		saverr = errno;
180 		free(TMP_substitute_table);
181 		(void)fclose(fp);
182 		errno = saverr;
183 		return (_LDP_ERROR);
184 	}
185 	if ((TMP_chain_pri_table =
186 	     malloc(sizeof(*__collate_chain_pri_table) * chains)) == NULL) {
187 		saverr = errno;
188 		free(TMP_substitute_table);
189 		free(TMP_char_pri_table);
190 		(void)fclose(fp);
191 		errno = saverr;
192 		return (_LDP_ERROR);
193 	}
194 
195 #define FREAD(a, b, c, d) \
196 { \
197 	if (fread(a, b, c, d) != c) { \
198 		saverr = errno; \
199 		free(TMP_substitute_table); \
200 		free(TMP_char_pri_table); \
201 		free(TMP_chain_pri_table); \
202 		(void)fclose(d); \
203 		errno = saverr; \
204 		return (_LDP_ERROR); \
205 	} \
206 }
207 
208 	FREAD(TMP_substitute_table, sizeof(__collate_substitute_table), 1, fp);
209 	FREAD(TMP_char_pri_table, sizeof(__collate_char_pri_table), 1, fp);
210 	FREAD(TMP_chain_pri_table,
211 	      sizeof(*__collate_chain_pri_table), chains, fp);
212 	(void)fclose(fp);
213 
214 	if (__collate_substitute_table_ptr != NULL)
215 		free(__collate_substitute_table_ptr);
216 	__collate_substitute_table_ptr = TMP_substitute_table;
217 	if (__collate_char_pri_table_ptr != NULL)
218 		free(__collate_char_pri_table_ptr);
219 	__collate_char_pri_table_ptr = TMP_char_pri_table;
220 	for (i = 0; i < UCHAR_MAX + 1; i++) {
221 		__collate_char_pri_table[i].prim =
222 		    ntohl(__collate_char_pri_table[i].prim);
223 		__collate_char_pri_table[i].sec =
224 		    ntohl(__collate_char_pri_table[i].sec);
225 	}
226 	if (__collate_chain_pri_table != NULL)
227 		free(__collate_chain_pri_table);
228 	__collate_chain_pri_table = TMP_chain_pri_table;
229 	for (i = 0; i < chains; i++) {
230 		__collate_chain_pri_table[i].prim =
231 		    ntohl(__collate_chain_pri_table[i].prim);
232 		__collate_chain_pri_table[i].sec =
233 		    ntohl(__collate_chain_pri_table[i].sec);
234 	}
235 	__collate_substitute_nontrivial = 0;
236 	for (i = 0; i < UCHAR_MAX + 1; i++) {
237 		if (__collate_substitute_table[i][0] != i ||
238 		    __collate_substitute_table[i][1] != 0) {
239 			__collate_substitute_nontrivial = 1;
240 			break;
241 		}
242 	}
243 	__collate_load_error = 0;
244 
245 	return (_LDP_LOADED);
246 }
247 
248 u_char *
249 __collate_substitute(struct xlocale_collate *table, const u_char *s)
250 {
251 	int dest_len, len, nlen;
252 	int delta = strlen(s);
253 	u_char *dest_str = NULL;
254 
255 	if (s == NULL || *s == '\0')
256 		return (__collate_strdup(""));
257 	delta += delta / 8;
258 	dest_str = malloc(dest_len = delta);
259 	if (dest_str == NULL)
260 		__collate_err(EX_OSERR, __func__);
261 	len = 0;
262 	while (*s) {
263 		nlen = len + strlen(__collate_substitute_table[*s]);
264 		if (dest_len <= nlen) {
265 			dest_str = reallocf(dest_str, dest_len = nlen + delta);
266 			if (dest_str == NULL)
267 				__collate_err(EX_OSERR, __func__);
268 		}
269 		(void)strcpy(dest_str + len, __collate_substitute_table[*s++]);
270 		len = nlen;
271 	}
272 	return (dest_str);
273 }
274 
275 void
276 __collate_lookup(struct xlocale_collate *table, const u_char *t, int *len, int *prim, int *sec)
277 {
278 	struct __collate_st_chain_pri *p2;
279 
280 	*len = 1;
281 	*prim = *sec = 0;
282 	for (p2 = __collate_chain_pri_table; p2->str[0] != '\0'; p2++) {
283 		if (*t == p2->str[0] &&
284 		    strncmp(t, p2->str, strlen(p2->str)) == 0) {
285 			*len = strlen(p2->str);
286 			*prim = p2->prim;
287 			*sec = p2->sec;
288 			return;
289 		}
290 	}
291 	*prim = __collate_char_pri_table[*t].prim;
292 	*sec = __collate_char_pri_table[*t].sec;
293 }
294 
295 u_char *
296 __collate_strdup(u_char *s)
297 {
298 	u_char *t = strdup(s);
299 
300 	if (t == NULL)
301 		__collate_err(EX_OSERR, __func__);
302 	return (t);
303 }
304 
305 void
306 __collate_err(int ex, const char *f)
307 {
308 	const char *s;
309 	int serrno = errno;
310 
311 	s = _getprogname();
312 	_write(STDERR_FILENO, s, strlen(s));
313 	_write(STDERR_FILENO, ": ", 2);
314 	s = f;
315 	_write(STDERR_FILENO, s, strlen(s));
316 	_write(STDERR_FILENO, ": ", 2);
317 	s = strerror(serrno);
318 	_write(STDERR_FILENO, s, strlen(s));
319 	_write(STDERR_FILENO, "\n", 1);
320 	exit(ex);
321 }
322 
323 #ifdef COLLATE_DEBUG
324 void
325 __collate_print_tables()
326 {
327 	int i;
328 	struct __collate_st_chain_pri *p2;
329 
330 	printf("Substitute table:\n");
331 	for (i = 0; i < UCHAR_MAX + 1; i++)
332 	    if (i != *__collate_substitute_table[i])
333 		printf("\t'%c' --> \"%s\"\n", i,
334 		       __collate_substitute_table[i]);
335 	printf("Chain priority table:\n");
336 	for (p2 = __collate_chain_pri_table; p2->str[0] != '\0'; p2++)
337 		printf("\t\"%s\" : %d %d\n", p2->str, p2->prim, p2->sec);
338 	printf("Char priority table:\n");
339 	for (i = 0; i < UCHAR_MAX + 1; i++)
340 		printf("\t'%c' : %d %d\n", i, __collate_char_pri_table[i].prim,
341 		       __collate_char_pri_table[i].sec);
342 }
343 #endif
344