xref: /freebsd/lib/libc/locale/collate.c (revision 57718be8fa0bd5edc11ab9a72e68cc71982939a6)
1 /*-
2  * Copyright (c) 1995 Alex Tatmanjants <alex@elvisti.kiev.ua>
3  *		at Electronni Visti IA, Kiev, Ukraine.
4  *			All rights reserved.
5  *
6  * Copyright (c) 2011 The FreeBSD Foundation
7  * All rights reserved.
8  * Portions of this software were developed by David Chisnall
9  * under sponsorship from the FreeBSD Foundation.
10  *
11  * Redistribution and use in source and binary forms, with or without
12  * modification, are permitted provided that the following conditions
13  * are met:
14  * 1. Redistributions of source code must retain the above copyright
15  *    notice, this list of conditions and the following disclaimer.
16  * 2. Redistributions in binary form must reproduce the above copyright
17  *    notice, this list of conditions and the following disclaimer in the
18  *    documentation and/or other materials provided with the distribution.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND
21  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR BE LIABLE
24  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30  * SUCH DAMAGE.
31  */
32 
33 #include <sys/cdefs.h>
34 __FBSDID("$FreeBSD$");
35 
36 #include "namespace.h"
37 #include <arpa/inet.h>
38 #include <stdio.h>
39 #include <stdlib.h>
40 #include <string.h>
41 #include <errno.h>
42 #include <unistd.h>
43 #include <sysexits.h>
44 #include "un-namespace.h"
45 
46 #include "collate.h"
47 #include "setlocale.h"
48 #include "ldpart.h"
49 
50 #include "libc_private.h"
51 
52 /*
53  * To avoid modifying the original (single-threaded) code too much, we'll just
54  * define the old globals as fields inside the table.
55  *
56  * We also modify the collation table test functions to search the thread-local
57  * table first and the global table second.
58  */
59 #define __collate_substitute_nontrivial (table->__collate_substitute_nontrivial)
60 #define __collate_substitute_table_ptr (table->__collate_substitute_table_ptr)
61 #define __collate_char_pri_table_ptr (table->__collate_char_pri_table_ptr)
62 #define __collate_chain_pri_table (table->__collate_chain_pri_table)
63 int __collate_load_error;
64 
65 
66 struct xlocale_collate __xlocale_global_collate = {
67 	{{0}, "C"}, 1, 0
68 };
69 
70  struct xlocale_collate __xlocale_C_collate = {
71 	{{0}, "C"}, 1, 0
72 };
73 
74 void __collate_err(int ex, const char *f) __dead2;
75 
76 int
77 __collate_load_tables_l(const char *encoding, struct xlocale_collate *table);
78 
79 static void
80 destruct_collate(void *t)
81 {
82 	struct xlocale_collate *table = t;
83 	if (__collate_chain_pri_table) {
84 		free(__collate_chain_pri_table);
85 	}
86 	free(t);
87 }
88 
89 void *
90 __collate_load(const char *encoding, locale_t unused)
91 {
92 	if (strcmp(encoding, "C") == 0 || strcmp(encoding, "POSIX") == 0) {
93 		return &__xlocale_C_collate;
94 	}
95 	struct xlocale_collate *table = calloc(sizeof(struct xlocale_collate), 1);
96 	table->header.header.destructor = destruct_collate;
97 	// FIXME: Make sure that _LDP_CACHE is never returned.  We should be doing
98 	// the caching outside of this section
99 	if (__collate_load_tables_l(encoding, table) != _LDP_LOADED) {
100 		xlocale_release(table);
101 		return NULL;
102 	}
103 	return table;
104 }
105 
106 /**
107  * Load the collation tables for the specified encoding into the global table.
108  */
109 int
110 __collate_load_tables(const char *encoding)
111 {
112 	int ret = __collate_load_tables_l(encoding, &__xlocale_global_collate);
113 	__collate_load_error = __xlocale_global_collate.__collate_load_error;
114 	return ret;
115 }
116 
117 int
118 __collate_load_tables_l(const char *encoding, struct xlocale_collate *table)
119 {
120 	FILE *fp;
121 	int i, saverr, chains;
122 	uint32_t u32;
123 	char strbuf[STR_LEN], buf[PATH_MAX];
124 	void *TMP_substitute_table, *TMP_char_pri_table, *TMP_chain_pri_table;
125 
126 	/* 'encoding' must be already checked. */
127 	if (strcmp(encoding, "C") == 0 || strcmp(encoding, "POSIX") == 0) {
128 		table->__collate_load_error = 1;
129 		return (_LDP_CACHE);
130 	}
131 
132 	/* 'PathLocale' must be already set & checked. */
133 	/* Range checking not needed, encoding has fixed size */
134 	(void)strcpy(buf, _PathLocale);
135 	(void)strcat(buf, "/");
136 	(void)strcat(buf, encoding);
137 	(void)strcat(buf, "/LC_COLLATE");
138 	if ((fp = fopen(buf, "re")) == NULL)
139 		return (_LDP_ERROR);
140 
141 	if (fread(strbuf, sizeof(strbuf), 1, fp) != 1) {
142 		saverr = errno;
143 		(void)fclose(fp);
144 		errno = saverr;
145 		return (_LDP_ERROR);
146 	}
147 	chains = -1;
148 	if (strcmp(strbuf, COLLATE_VERSION) == 0)
149 		chains = 0;
150 	else if (strcmp(strbuf, COLLATE_VERSION1_2) == 0)
151 		chains = 1;
152 	if (chains < 0) {
153 		(void)fclose(fp);
154 		errno = EFTYPE;
155 		return (_LDP_ERROR);
156 	}
157 	if (chains) {
158 		if (fread(&u32, sizeof(u32), 1, fp) != 1) {
159 			saverr = errno;
160 			(void)fclose(fp);
161 			errno = saverr;
162 			return (_LDP_ERROR);
163 		}
164 		if ((chains = (int)ntohl(u32)) < 1) {
165 			(void)fclose(fp);
166 			errno = EFTYPE;
167 			return (_LDP_ERROR);
168 		}
169 	} else
170 		chains = TABLE_SIZE;
171 
172 	if ((TMP_substitute_table =
173 	     malloc(sizeof(__collate_substitute_table))) == NULL) {
174 		saverr = errno;
175 		(void)fclose(fp);
176 		errno = saverr;
177 		return (_LDP_ERROR);
178 	}
179 	if ((TMP_char_pri_table =
180 	     malloc(sizeof(__collate_char_pri_table))) == NULL) {
181 		saverr = errno;
182 		free(TMP_substitute_table);
183 		(void)fclose(fp);
184 		errno = saverr;
185 		return (_LDP_ERROR);
186 	}
187 	if ((TMP_chain_pri_table =
188 	     malloc(sizeof(*__collate_chain_pri_table) * chains)) == NULL) {
189 		saverr = errno;
190 		free(TMP_substitute_table);
191 		free(TMP_char_pri_table);
192 		(void)fclose(fp);
193 		errno = saverr;
194 		return (_LDP_ERROR);
195 	}
196 
197 #define FREAD(a, b, c, d) \
198 { \
199 	if (fread(a, b, c, d) != c) { \
200 		saverr = errno; \
201 		free(TMP_substitute_table); \
202 		free(TMP_char_pri_table); \
203 		free(TMP_chain_pri_table); \
204 		(void)fclose(d); \
205 		errno = saverr; \
206 		return (_LDP_ERROR); \
207 	} \
208 }
209 
210 	FREAD(TMP_substitute_table, sizeof(__collate_substitute_table), 1, fp);
211 	FREAD(TMP_char_pri_table, sizeof(__collate_char_pri_table), 1, fp);
212 	FREAD(TMP_chain_pri_table,
213 	      sizeof(*__collate_chain_pri_table), chains, fp);
214 	(void)fclose(fp);
215 
216 	if (__collate_substitute_table_ptr != NULL)
217 		free(__collate_substitute_table_ptr);
218 	__collate_substitute_table_ptr = TMP_substitute_table;
219 	if (__collate_char_pri_table_ptr != NULL)
220 		free(__collate_char_pri_table_ptr);
221 	__collate_char_pri_table_ptr = TMP_char_pri_table;
222 	for (i = 0; i < UCHAR_MAX + 1; i++) {
223 		__collate_char_pri_table[i].prim =
224 		    ntohl(__collate_char_pri_table[i].prim);
225 		__collate_char_pri_table[i].sec =
226 		    ntohl(__collate_char_pri_table[i].sec);
227 	}
228 	if (__collate_chain_pri_table != NULL)
229 		free(__collate_chain_pri_table);
230 	__collate_chain_pri_table = TMP_chain_pri_table;
231 	for (i = 0; i < chains; i++) {
232 		__collate_chain_pri_table[i].prim =
233 		    ntohl(__collate_chain_pri_table[i].prim);
234 		__collate_chain_pri_table[i].sec =
235 		    ntohl(__collate_chain_pri_table[i].sec);
236 	}
237 	__collate_substitute_nontrivial = 0;
238 	for (i = 0; i < UCHAR_MAX + 1; i++) {
239 		if (__collate_substitute_table[i][0] != i ||
240 		    __collate_substitute_table[i][1] != 0) {
241 			__collate_substitute_nontrivial = 1;
242 			break;
243 		}
244 	}
245 	table->__collate_load_error = 0;
246 
247 	return (_LDP_LOADED);
248 }
249 
250 u_char *
251 __collate_substitute(struct xlocale_collate *table, const u_char *s)
252 {
253 	int dest_len, len, nlen;
254 	int delta = strlen(s);
255 	u_char *dest_str = NULL;
256 
257 	if (s == NULL || *s == '\0')
258 		return (__collate_strdup(""));
259 	delta += delta / 8;
260 	dest_str = malloc(dest_len = delta);
261 	if (dest_str == NULL)
262 		__collate_err(EX_OSERR, __func__);
263 	len = 0;
264 	while (*s) {
265 		nlen = len + strlen(__collate_substitute_table[*s]);
266 		if (dest_len <= nlen) {
267 			dest_str = reallocf(dest_str, dest_len = nlen + delta);
268 			if (dest_str == NULL)
269 				__collate_err(EX_OSERR, __func__);
270 		}
271 		(void)strcpy(dest_str + len, __collate_substitute_table[*s++]);
272 		len = nlen;
273 	}
274 	return (dest_str);
275 }
276 
277 void
278 __collate_lookup(struct xlocale_collate *table, const u_char *t, int *len, int *prim, int *sec)
279 {
280 	struct __collate_st_chain_pri *p2;
281 
282 	*len = 1;
283 	*prim = *sec = 0;
284 	for (p2 = __collate_chain_pri_table; p2->str[0] != '\0'; p2++) {
285 		if (*t == p2->str[0] &&
286 		    strncmp(t, p2->str, strlen(p2->str)) == 0) {
287 			*len = strlen(p2->str);
288 			*prim = p2->prim;
289 			*sec = p2->sec;
290 			return;
291 		}
292 	}
293 	*prim = __collate_char_pri_table[*t].prim;
294 	*sec = __collate_char_pri_table[*t].sec;
295 }
296 
297 u_char *
298 __collate_strdup(u_char *s)
299 {
300 	u_char *t = strdup(s);
301 
302 	if (t == NULL)
303 		__collate_err(EX_OSERR, __func__);
304 	return (t);
305 }
306 
307 void
308 __collate_err(int ex, const char *f)
309 {
310 	const char *s;
311 	int serrno = errno;
312 
313 	s = _getprogname();
314 	_write(STDERR_FILENO, s, strlen(s));
315 	_write(STDERR_FILENO, ": ", 2);
316 	s = f;
317 	_write(STDERR_FILENO, s, strlen(s));
318 	_write(STDERR_FILENO, ": ", 2);
319 	s = strerror(serrno);
320 	_write(STDERR_FILENO, s, strlen(s));
321 	_write(STDERR_FILENO, "\n", 1);
322 	exit(ex);
323 }
324 
325 #ifdef COLLATE_DEBUG
326 void
327 __collate_print_tables()
328 {
329 	int i;
330 	struct __collate_st_chain_pri *p2;
331 
332 	printf("Substitute table:\n");
333 	for (i = 0; i < UCHAR_MAX + 1; i++)
334 	    if (i != *__collate_substitute_table[i])
335 		printf("\t'%c' --> \"%s\"\n", i,
336 		       __collate_substitute_table[i]);
337 	printf("Chain priority table:\n");
338 	for (p2 = __collate_chain_pri_table; p2->str[0] != '\0'; p2++)
339 		printf("\t\"%s\" : %d %d\n", p2->str, p2->prim, p2->sec);
340 	printf("Char priority table:\n");
341 	for (i = 0; i < UCHAR_MAX + 1; i++)
342 		printf("\t'%c' : %d %d\n", i, __collate_char_pri_table[i].prim,
343 		       __collate_char_pri_table[i].sec);
344 }
345 #endif
346