xref: /illumos-gate/usr/src/lib/libc/port/locale/collate.c (revision 8d0c3d29bb99f6521f2dc5058a7e4debebad7899)
1 /*
2  * Copyright (c) 1995 Alex Tatmanjants <alex@elvisti.kiev.ua>
3  *		at Electronni Visti IA, Kiev, Ukraine.
4  *			All rights reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  * 1. Redistributions of source code must retain the above copyright
10  *    notice, this list of conditions and the following disclaimer.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND
16  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR BE LIABLE
19  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25  * SUCH DAMAGE.
26  */
27 
28 /*
29  * Copyright 2010 Nexenta Systems, Inc.  All rights reserved.
30  * Use is subject to license terms.
31  */
32 
33 #include "lint.h"
34 #include "file64.h"
35 #include <stdio.h>
36 #include <stdlib.h>
37 #include <string.h>
38 #include <errno.h>
39 #include <unistd.h>
40 #include <sysexits.h>
41 #include <netinet/in.h>
42 
43 #include "collate.h"
44 #include "setlocale.h"
45 #include "ldpart.h"
46 
47 int __collate_load_error = 1;
48 int __collate_substitute_nontrivial;
49 
50 char __collate_substitute_table[UCHAR_MAX + 1][STR_LEN];
51 struct __collate_st_char_pri __collate_char_pri_table[UCHAR_MAX + 1];
52 struct __collate_st_chain_pri *__collate_chain_pri_table;
53 
54 int
55 __collate_load_tables(const char *encoding)
56 {
57 	FILE *fp;
58 	int i, saverr, chains;
59 	uint32_t u32;
60 	char strbuf[STR_LEN], buf[PATH_MAX];
61 	void *TMP_substitute_table, *TMP_char_pri_table, *TMP_chain_pri_table;
62 	static char collate_encoding[ENCODING_LEN + 1];
63 
64 	/* 'encoding' must be already checked. */
65 	if (strcmp(encoding, "C") == 0 || strcmp(encoding, "POSIX") == 0) {
66 		__collate_load_error = 1;
67 		return (_LDP_CACHE);
68 	}
69 
70 	/*
71 	 * If the locale name is the same as our cache, use the cache.
72 	 */
73 	if (strcmp(encoding, collate_encoding) == 0) {
74 		__collate_load_error = 0;
75 		return (_LDP_CACHE);
76 	}
77 
78 	/*
79 	 * Slurp the locale file into the cache.
80 	 */
81 
82 	/* 'PathLocale' must be already set & checked. */
83 	/* Range checking not needed, encoding has fixed size */
84 	(void) strcpy(buf, _PathLocale);
85 	(void) strcat(buf, "/");
86 	(void) strcat(buf, encoding);
87 	(void) strcat(buf, "/LC_COLLATE");
88 	if ((fp = fopen(buf, "r")) == NULL)
89 		return (_LDP_ERROR);
90 
91 	if (fread(strbuf, sizeof (strbuf), 1, fp) != 1) {
92 		saverr = errno;
93 		(void) fclose(fp);
94 		errno = saverr;
95 		return (_LDP_ERROR);
96 	}
97 	chains = -1;
98 	if (strcmp(strbuf, COLLATE_VERSION) == 0)
99 		chains = 0;
100 	else if (strcmp(strbuf, COLLATE_VERSION1_2) == 0)
101 		chains = 1;
102 	if (chains < 0) {
103 		(void) fclose(fp);
104 		errno = EINVAL;
105 		return (_LDP_ERROR);
106 	}
107 	if (chains) {
108 		if (fread(&u32, sizeof (u32), 1, fp) != 1) {
109 			saverr = errno;
110 			(void) fclose(fp);
111 			errno = saverr;
112 			return (_LDP_ERROR);
113 		}
114 		if ((chains = (int)ntohl(u32)) < 1) {
115 			(void) fclose(fp);
116 			errno = EINVAL;
117 			return (_LDP_ERROR);
118 		}
119 	} else
120 		chains = TABLE_SIZE;
121 
122 	if ((TMP_substitute_table =
123 	    malloc(sizeof (__collate_substitute_table))) == NULL) {
124 		saverr = errno;
125 		(void) fclose(fp);
126 		errno = saverr;
127 		return (_LDP_ERROR);
128 	}
129 	if ((TMP_char_pri_table =
130 	    malloc(sizeof (__collate_char_pri_table))) == NULL) {
131 		saverr = errno;
132 		free(TMP_substitute_table);
133 		(void) fclose(fp);
134 		errno = saverr;
135 		return (_LDP_ERROR);
136 	}
137 	if ((TMP_chain_pri_table =
138 	    malloc(sizeof (*__collate_chain_pri_table) * chains)) == NULL) {
139 		saverr = errno;
140 		free(TMP_substitute_table);
141 		free(TMP_char_pri_table);
142 		(void) fclose(fp);
143 		errno = saverr;
144 		return (_LDP_ERROR);
145 	}
146 
147 #define	FREAD(a, b, c, d) \
148 { \
149 	if (fread(a, b, c, d) != c) { \
150 		saverr = errno; \
151 		free(TMP_substitute_table); \
152 		free(TMP_char_pri_table); \
153 		free(TMP_chain_pri_table); \
154 		(void) fclose(d); \
155 		errno = saverr; \
156 		return (_LDP_ERROR); \
157 	} \
158 }
159 
160 	FREAD(TMP_substitute_table, sizeof (__collate_substitute_table), 1, fp);
161 	FREAD(TMP_char_pri_table, sizeof (__collate_char_pri_table), 1, fp);
162 	FREAD(TMP_chain_pri_table,
163 	    sizeof (*__collate_chain_pri_table), chains, fp);
164 	(void) fclose(fp);
165 
166 	(void) strcpy(collate_encoding, encoding);
167 	if (__collate_substitute_table_ptr != NULL)
168 		free(__collate_substitute_table_ptr);
169 	__collate_substitute_table_ptr = TMP_substitute_table;
170 	if (__collate_char_pri_table_ptr != NULL)
171 		free(__collate_char_pri_table_ptr);
172 	__collate_char_pri_table_ptr = TMP_char_pri_table;
173 	for (i = 0; i < UCHAR_MAX + 1; i++) {
174 		__collate_char_pri_table[i].prim =
175 		    ntohl(__collate_char_pri_table[i].prim);
176 		__collate_char_pri_table[i].sec =
177 		    ntohl(__collate_char_pri_table[i].sec);
178 	}
179 	if (__collate_chain_pri_table != NULL)
180 		free(__collate_chain_pri_table);
181 	__collate_chain_pri_table = TMP_chain_pri_table;
182 	for (i = 0; i < chains; i++) {
183 		__collate_chain_pri_table[i].prim =
184 		    ntohl(__collate_chain_pri_table[i].prim);
185 		__collate_chain_pri_table[i].sec =
186 		    ntohl(__collate_chain_pri_table[i].sec);
187 	}
188 	__collate_substitute_nontrivial = 0;
189 	for (i = 0; i < UCHAR_MAX + 1; i++) {
190 		if (__collate_substitute_table[i][0] != i ||
191 		    __collate_substitute_table[i][1] != 0) {
192 			__collate_substitute_nontrivial = 1;
193 			break;
194 		}
195 	}
196 	__collate_load_error = 0;
197 
198 	return (_LDP_LOADED);
199 }
200 
201 char *
202 __collate_substitute(const char *str)
203 {
204 	int dest_len, len, nlen;
205 	int delta;
206 	char *dest_str = NULL;
207 	uchar_t *s = (uchar_t *)str;
208 
209 	if (s == NULL || *s == '\0') {
210 		return (strdup(""));
211 	}
212 
213 	delta = strlen(str);
214 	delta += delta / 8;
215 	dest_str = malloc(dest_len = delta);
216 	if (dest_str == NULL)
217 		return (NULL);
218 	len = 0;
219 	while (*s) {
220 		nlen = len + strlen(__collate_substitute_table[*s]);
221 		if (dest_len <= nlen) {
222 			char *new_str;
223 			new_str = realloc(dest_str, dest_len = nlen + delta);
224 			if (new_str == NULL) {
225 				free(dest_str);
226 				return (NULL);
227 			}
228 			dest_str = new_str;
229 		}
230 		(void) strcpy(dest_str + len,
231 		    (char *)__collate_substitute_table[*s++]);
232 		len = nlen;
233 	}
234 	return (dest_str);
235 }
236 
237 void
238 __collate_lookup(const char *t, int *len, int *prim, int *sec)
239 {
240 	struct __collate_st_chain_pri *p2;
241 
242 	*len = 1;
243 	*prim = *sec = 0;
244 	for (p2 = __collate_chain_pri_table; p2->str[0] != '\0'; p2++) {
245 		if (*t == p2->str[0] &&
246 		    strncmp(t, (char *)p2->str, strlen((char *)p2->str)) == 0) {
247 			*len = strlen((char *)p2->str);
248 			*prim = p2->prim;
249 			*sec = p2->sec;
250 			return;
251 		}
252 	}
253 	*prim = __collate_char_pri_table[(uchar_t)*t].prim;
254 	*sec = __collate_char_pri_table[(uchar_t)*t].sec;
255 }
256