xref: /freebsd/lib/libc/locale/collate.c (revision d201fe46e355212750b727061e6a7ac005267852)
1c3d0cca4SAndrey A. Chernov /*-
2c3d0cca4SAndrey A. Chernov  * Copyright (c) 1995 Alex Tatmanjants <alex@elvisti.kiev.ua>
3c3d0cca4SAndrey A. Chernov  *		at Electronni Visti IA, Kiev, Ukraine.
4c3d0cca4SAndrey A. Chernov  *			All rights reserved.
5c3d0cca4SAndrey A. Chernov  *
6c3d0cca4SAndrey A. Chernov  * Redistribution and use in source and binary forms, with or without
7c3d0cca4SAndrey A. Chernov  * modification, are permitted provided that the following conditions
8c3d0cca4SAndrey A. Chernov  * are met:
9c3d0cca4SAndrey A. Chernov  * 1. Redistributions of source code must retain the above copyright
10c3d0cca4SAndrey A. Chernov  *    notice, this list of conditions and the following disclaimer.
11c3d0cca4SAndrey A. Chernov  * 2. Redistributions in binary form must reproduce the above copyright
12c3d0cca4SAndrey A. Chernov  *    notice, this list of conditions and the following disclaimer in the
13c3d0cca4SAndrey A. Chernov  *    documentation and/or other materials provided with the distribution.
14c3d0cca4SAndrey A. Chernov  *
15c3d0cca4SAndrey A. Chernov  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND
16c3d0cca4SAndrey A. Chernov  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17c3d0cca4SAndrey A. Chernov  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18c3d0cca4SAndrey A. Chernov  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR BE LIABLE
19c3d0cca4SAndrey A. Chernov  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20c3d0cca4SAndrey A. Chernov  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21c3d0cca4SAndrey A. Chernov  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22c3d0cca4SAndrey A. Chernov  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23c3d0cca4SAndrey A. Chernov  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24c3d0cca4SAndrey A. Chernov  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25c3d0cca4SAndrey A. Chernov  * SUCH DAMAGE.
26c3d0cca4SAndrey A. Chernov  *
277f3dea24SPeter Wemm  * $FreeBSD$
28c3d0cca4SAndrey A. Chernov  */
29c3d0cca4SAndrey A. Chernov 
30d201fe46SDaniel Eischen #include "namespace.h"
31c3d0cca4SAndrey A. Chernov #include <rune.h>
32c3d0cca4SAndrey A. Chernov #include <stdio.h>
33c3d0cca4SAndrey A. Chernov #include <stdlib.h>
34c3d0cca4SAndrey A. Chernov #include <string.h>
35926f20c9SAndrey A. Chernov #include <errno.h>
36926f20c9SAndrey A. Chernov #include <unistd.h>
37c3d0cca4SAndrey A. Chernov #include <sysexits.h>
38d201fe46SDaniel Eischen #include "un-namespace.h"
39d201fe46SDaniel Eischen 
40c3d0cca4SAndrey A. Chernov #include "collate.h"
4163407d34SAndrey A. Chernov #include "setlocale.h"
42c3d0cca4SAndrey A. Chernov 
43c3d0cca4SAndrey A. Chernov int __collate_load_error = 1;
44e755fb76SDmitrij Tejblum int __collate_substitute_nontrivial;
451642f84dSAndrey A. Chernov char __collate_version[STR_LEN];
46c3d0cca4SAndrey A. Chernov u_char __collate_substitute_table[UCHAR_MAX + 1][STR_LEN];
47c3d0cca4SAndrey A. Chernov struct __collate_st_char_pri __collate_char_pri_table[UCHAR_MAX + 1];
48c3d0cca4SAndrey A. Chernov struct __collate_st_chain_pri __collate_chain_pri_table[TABLE_SIZE];
49c3d0cca4SAndrey A. Chernov 
50c3d0cca4SAndrey A. Chernov #define FREAD(a, b, c, d) \
51c3d0cca4SAndrey A. Chernov 	do { \
52c3d0cca4SAndrey A. Chernov 		if (fread(a, b, c, d) != c) { \
53c3d0cca4SAndrey A. Chernov 			fclose(d); \
54c3d0cca4SAndrey A. Chernov 			return -1; \
55c3d0cca4SAndrey A. Chernov 		} \
56c3d0cca4SAndrey A. Chernov 	} while(0)
57c3d0cca4SAndrey A. Chernov 
58eaa86f9dSBruce Evans void __collate_err(int ex, const char *f) __dead2;
59926f20c9SAndrey A. Chernov 
60c3d0cca4SAndrey A. Chernov int
61c3d0cca4SAndrey A. Chernov __collate_load_tables(encoding)
62c3d0cca4SAndrey A. Chernov 	char *encoding;
63c3d0cca4SAndrey A. Chernov {
64c3d0cca4SAndrey A. Chernov 	char buf[PATH_MAX];
65c3d0cca4SAndrey A. Chernov 	FILE *fp;
66e755fb76SDmitrij Tejblum 	int i, save_load_error;
67c3d0cca4SAndrey A. Chernov 
68377da8e8SAndrey A. Chernov 	save_load_error = __collate_load_error;
69c3d0cca4SAndrey A. Chernov 	__collate_load_error = 1;
70377da8e8SAndrey A. Chernov 	if (!encoding) {
71377da8e8SAndrey A. Chernov 		__collate_load_error = save_load_error;
72c3d0cca4SAndrey A. Chernov 		return -1;
73377da8e8SAndrey A. Chernov 	}
74c8f931a8SAndrey A. Chernov 	if (!strcmp(encoding, "C") || !strcmp(encoding, "POSIX"))
75377da8e8SAndrey A. Chernov 		return 0;
76c8f931a8SAndrey A. Chernov 	if (!_PathLocale) {
77c8f931a8SAndrey A. Chernov 		__collate_load_error = save_load_error;
78c8f931a8SAndrey A. Chernov 		return -1;
79c8f931a8SAndrey A. Chernov 	}
80d81a0916SAndrey A. Chernov 	/* Range checking not needed, encoding has fixed size */
81af155bdfSAndrey A. Chernov 	(void) strcpy(buf, _PathLocale);
82af155bdfSAndrey A. Chernov 	(void) strcat(buf, "/");
83af155bdfSAndrey A. Chernov 	(void) strcat(buf, encoding);
84af155bdfSAndrey A. Chernov 	(void) strcat(buf, "/LC_COLLATE");
85377da8e8SAndrey A. Chernov 	if ((fp = fopen(buf, "r")) == NULL) {
86377da8e8SAndrey A. Chernov 		__collate_load_error = save_load_error;
87c3d0cca4SAndrey A. Chernov 		return -1;
88377da8e8SAndrey A. Chernov 	}
891642f84dSAndrey A. Chernov 	FREAD(__collate_version, sizeof(__collate_version), 1, fp);
901642f84dSAndrey A. Chernov 	if (strcmp(__collate_version, COLLATE_VERSION) != 0) {
911642f84dSAndrey A. Chernov 		fclose(fp);
921642f84dSAndrey A. Chernov 		return -1;
931642f84dSAndrey A. Chernov 	}
94c3d0cca4SAndrey A. Chernov 	FREAD(__collate_substitute_table, sizeof(__collate_substitute_table),
95c3d0cca4SAndrey A. Chernov 	      1, fp);
96c3d0cca4SAndrey A. Chernov 	FREAD(__collate_char_pri_table, sizeof(__collate_char_pri_table), 1,
97c3d0cca4SAndrey A. Chernov 	      fp);
98c3d0cca4SAndrey A. Chernov 	FREAD(__collate_chain_pri_table, sizeof(__collate_chain_pri_table), 1,
99c3d0cca4SAndrey A. Chernov 	      fp);
100c3d0cca4SAndrey A. Chernov 	fclose(fp);
101c3d0cca4SAndrey A. Chernov 	__collate_load_error = 0;
102e755fb76SDmitrij Tejblum 
103e755fb76SDmitrij Tejblum 	__collate_substitute_nontrivial = 0;
104e755fb76SDmitrij Tejblum 	for (i = 0; i < UCHAR_MAX + 1; i++) {
105e755fb76SDmitrij Tejblum 		if (__collate_substitute_table[i][0] != i ||
106e755fb76SDmitrij Tejblum 		    __collate_substitute_table[i][1] != 0) {
107e755fb76SDmitrij Tejblum 			__collate_substitute_nontrivial = 1;
108e755fb76SDmitrij Tejblum 			break;
109e755fb76SDmitrij Tejblum 		}
110e755fb76SDmitrij Tejblum 	}
111e755fb76SDmitrij Tejblum 
112c3d0cca4SAndrey A. Chernov 	return 0;
113c3d0cca4SAndrey A. Chernov }
114c3d0cca4SAndrey A. Chernov 
115c3d0cca4SAndrey A. Chernov u_char *
116c3d0cca4SAndrey A. Chernov __collate_substitute(s)
117c3d0cca4SAndrey A. Chernov 	const u_char *s;
118c3d0cca4SAndrey A. Chernov {
11903a7efc2SDmitrij Tejblum 	int dest_len, len, nlen;
120c3d0cca4SAndrey A. Chernov 	int delta = strlen(s);
121926f20c9SAndrey A. Chernov 	u_char *dest_str = NULL;
122c3d0cca4SAndrey A. Chernov 
123926f20c9SAndrey A. Chernov 	if(s == NULL || *s == '\0')
124c3d0cca4SAndrey A. Chernov 		return __collate_strdup("");
12503a7efc2SDmitrij Tejblum 	delta += delta / 8;
12603a7efc2SDmitrij Tejblum 	dest_str = malloc(dest_len = delta);
12703a7efc2SDmitrij Tejblum 	if(dest_str == NULL)
12803a7efc2SDmitrij Tejblum 		__collate_err(EX_OSERR, __FUNCTION__);
12903a7efc2SDmitrij Tejblum 	len = 0;
130c3d0cca4SAndrey A. Chernov 	while(*s) {
13103a7efc2SDmitrij Tejblum 		nlen = len + strlen(__collate_substitute_table[*s]);
13203a7efc2SDmitrij Tejblum 		if (dest_len <= nlen) {
13303a7efc2SDmitrij Tejblum 			dest_str = reallocf(dest_str, dest_len = nlen + delta);
134926f20c9SAndrey A. Chernov 			if(dest_str == NULL)
135926f20c9SAndrey A. Chernov 				__collate_err(EX_OSERR, __FUNCTION__);
136c3d0cca4SAndrey A. Chernov 		}
13703a7efc2SDmitrij Tejblum 		strcpy(dest_str + len, __collate_substitute_table[*s++]);
13803a7efc2SDmitrij Tejblum 		len = nlen;
139c3d0cca4SAndrey A. Chernov 	}
140c3d0cca4SAndrey A. Chernov 	return dest_str;
141c3d0cca4SAndrey A. Chernov }
142c3d0cca4SAndrey A. Chernov 
143c3d0cca4SAndrey A. Chernov void
144c3d0cca4SAndrey A. Chernov __collate_lookup(t, len, prim, sec)
145e755fb76SDmitrij Tejblum 	const u_char *t;
146c3d0cca4SAndrey A. Chernov 	int *len, *prim, *sec;
147c3d0cca4SAndrey A. Chernov {
148c3d0cca4SAndrey A. Chernov 	struct __collate_st_chain_pri *p2;
149c3d0cca4SAndrey A. Chernov 
150c3d0cca4SAndrey A. Chernov 	*len = 1;
151c3d0cca4SAndrey A. Chernov 	*prim = *sec = 0;
152c3d0cca4SAndrey A. Chernov 	for(p2 = __collate_chain_pri_table; p2->str[0]; p2++) {
153c3d0cca4SAndrey A. Chernov 		if(strncmp(t, p2->str, strlen(p2->str)) == 0) {
154c3d0cca4SAndrey A. Chernov 			*len = strlen(p2->str);
155c3d0cca4SAndrey A. Chernov 			*prim = p2->prim;
156c3d0cca4SAndrey A. Chernov 			*sec = p2->sec;
157c3d0cca4SAndrey A. Chernov 			return;
158c3d0cca4SAndrey A. Chernov 		}
159c3d0cca4SAndrey A. Chernov 	}
160c3d0cca4SAndrey A. Chernov 	*prim = __collate_char_pri_table[*t].prim;
161c3d0cca4SAndrey A. Chernov 	*sec = __collate_char_pri_table[*t].sec;
162c3d0cca4SAndrey A. Chernov }
163c3d0cca4SAndrey A. Chernov 
164c3d0cca4SAndrey A. Chernov u_char *
165c3d0cca4SAndrey A. Chernov __collate_strdup(s)
166c3d0cca4SAndrey A. Chernov 	u_char *s;
167c3d0cca4SAndrey A. Chernov {
168926f20c9SAndrey A. Chernov 	u_char *t = strdup(s);
169c3d0cca4SAndrey A. Chernov 
170926f20c9SAndrey A. Chernov 	if (t == NULL)
171926f20c9SAndrey A. Chernov 		__collate_err(EX_OSERR, __FUNCTION__);
172c3d0cca4SAndrey A. Chernov 	return t;
173c3d0cca4SAndrey A. Chernov }
174c3d0cca4SAndrey A. Chernov 
175eaa86f9dSBruce Evans void
176eaa86f9dSBruce Evans __collate_err(int ex, const char *f)
177926f20c9SAndrey A. Chernov {
178926f20c9SAndrey A. Chernov 	extern char *__progname;                /* Program name, from crt0. */
179926f20c9SAndrey A. Chernov 	const char *s;
180926f20c9SAndrey A. Chernov 	int serrno = errno;
181926f20c9SAndrey A. Chernov 
182926f20c9SAndrey A. Chernov 	s = __progname;
1839233c4d9SJason Evans 	_write(STDERR_FILENO, s, strlen(s));
1849233c4d9SJason Evans 	_write(STDERR_FILENO, ": ", 2);
185926f20c9SAndrey A. Chernov 	s = f;
1869233c4d9SJason Evans 	_write(STDERR_FILENO, s, strlen(s));
1879233c4d9SJason Evans 	_write(STDERR_FILENO, ": ", 2);
188926f20c9SAndrey A. Chernov 	s = strerror(serrno);
1899233c4d9SJason Evans 	_write(STDERR_FILENO, s, strlen(s));
1909233c4d9SJason Evans 	_write(STDERR_FILENO, "\n", 1);
191926f20c9SAndrey A. Chernov 	exit(ex);
192926f20c9SAndrey A. Chernov }
193926f20c9SAndrey A. Chernov 
194c3d0cca4SAndrey A. Chernov #ifdef COLLATE_DEBUG
195c3d0cca4SAndrey A. Chernov void
196c3d0cca4SAndrey A. Chernov __collate_print_tables()
197c3d0cca4SAndrey A. Chernov {
198c3d0cca4SAndrey A. Chernov 	int i;
199c3d0cca4SAndrey A. Chernov 	struct __collate_st_chain_pri *p2;
200c3d0cca4SAndrey A. Chernov 
201c3d0cca4SAndrey A. Chernov 	printf("Substitute table:\n");
202c3d0cca4SAndrey A. Chernov 	for (i = 0; i < UCHAR_MAX + 1; i++)
203967a5cb1SAndrey A. Chernov 	    if (i != *__collate_substitute_table[i])
204c3d0cca4SAndrey A. Chernov 		printf("\t'%c' --> \"%s\"\n", i,
205c3d0cca4SAndrey A. Chernov 		       __collate_substitute_table[i]);
206c3d0cca4SAndrey A. Chernov 	printf("Chain priority table:\n");
207c3d0cca4SAndrey A. Chernov 	for (p2 = __collate_chain_pri_table; p2->str[0]; p2++)
208c3d0cca4SAndrey A. Chernov 		printf("\t\"%s\" : %d %d\n\n", p2->str, p2->prim, p2->sec);
209c3d0cca4SAndrey A. Chernov 	printf("Char priority table:\n");
210c3d0cca4SAndrey A. Chernov 	for (i = 0; i < UCHAR_MAX + 1; i++)
211c3d0cca4SAndrey A. Chernov 		printf("\t'%c' : %d %d\n", i, __collate_char_pri_table[i].prim,
212c3d0cca4SAndrey A. Chernov 		       __collate_char_pri_table[i].sec);
213c3d0cca4SAndrey A. Chernov }
214c3d0cca4SAndrey A. Chernov #endif
215