xref: /titanic_50/usr/src/lib/libast/common/comp/setlocale.c (revision e65e5c2d2f32a99e8c5f740cabae9075dab03ce7)
1 /***********************************************************************
2 *                                                                      *
3 *               This software is part of the ast package               *
4 *          Copyright (c) 1985-2009 AT&T Intellectual Property          *
5 *                      and is licensed under the                       *
6 *                  Common Public License, Version 1.0                  *
7 *                    by AT&T Intellectual Property                     *
8 *                                                                      *
9 *                A copy of the License is available at                 *
10 *            http://www.opensource.org/licenses/cpl1.0.txt             *
11 *         (with md5 checksum 059e8cd6165cb4c31e351f2b69388fd9)         *
12 *                                                                      *
13 *              Information and Software Systems Research               *
14 *                            AT&T Research                             *
15 *                           Florham Park NJ                            *
16 *                                                                      *
17 *                 Glenn Fowler <gsf@research.att.com>                  *
18 *                  David Korn <dgk@research.att.com>                   *
19 *                   Phong Vo <kpv@research.att.com>                    *
20 *                                                                      *
21 ***********************************************************************/
22 #pragma prototyped
23 
24 /*
25  * setlocale() intercept
26  * maintains a bitmask of non-default categories
27  * and a permanent locale namespace for pointer comparison
28  * and persistent private data for locale related functions
29  */
30 
31 #include <ast_standards.h>
32 
33 #include "lclib.h"
34 
35 #include <ast_wchar.h>
36 #include <ctype.h>
37 #include <mc.h>
38 #include <namval.h>
39 
40 #if ( _lib_wcwidth || _lib_wctomb ) && _hdr_wctype
41 #include <wctype.h>
42 #endif
43 
44 #if _lib_wcwidth
45 #undef	wcwidth
46 #else
47 #define wcwidth			0
48 #endif
49 
50 #if _lib_wctomb
51 #undef	wctomb
52 #else
53 #define wctomb			0
54 #endif
55 
56 #ifdef mblen
57 #undef	mblen
58 extern int		mblen(const char*, size_t);
59 #endif
60 
61 #undef	mbtowc
62 #undef	setlocale
63 #undef	strcmp
64 #undef	strcoll
65 #undef	strxfrm
66 #undef	valid
67 
68 #ifndef AST_LC_CANONICAL
69 #define AST_LC_CANONICAL	LC_abbreviated
70 #endif
71 
72 #ifndef AST_LC_test
73 #define AST_LC_test		(1L<<27)
74 #endif
75 
76 #if _UWIN
77 
78 #include <ast_windows.h>
79 
80 #undef	_lib_setlocale
81 #define _lib_setlocale		1
82 
83 #define setlocale(c,l)		native_setlocale(c,l)
84 
85 extern char*			uwin_setlocale(int, const char*);
86 
87 /*
88  * convert locale to native locale name in buf
89  */
90 
91 static char*
92 native_locale(const char* locale, char* buf, size_t siz)
93 {
94 	Lc_t*				lc;
95 	const Lc_attribute_list_t*	ap;
96 	int				i;
97 	unsigned long			lcid;
98 	unsigned long			lang;
99 	unsigned long			ctry;
100 	char				lbuf[128];
101 	char				cbuf[128];
102 
103 	if (locale && *locale)
104 	{
105 		if (!(lc = lcmake(locale)))
106 			return 0;
107 		lang = lc->language->index;
108 		ctry = 0;
109 		for (ap = lc->attributes; ap; ap = ap->next)
110 			if (ctry = ap->attribute->index)
111 				break;
112 		if (!ctry)
113 		{
114 			for (i = 0; i < elementsof(lc->territory->languages); i++)
115 				if (lc->territory->languages[i] == lc->language)
116 				{
117 					ctry = lc->territory->indices[i];
118 					break;
119 				}
120 			if (!ctry)
121 			{
122 				if (!lang)
123 					return 0;
124 				ctry = SUBLANG_DEFAULT;
125 			}
126 		}
127 		lcid = MAKELCID(MAKELANGID(lang, ctry), SORT_DEFAULT);
128 	}
129 	else
130 		lcid = GetUserDefaultLCID();
131 	if (GetLocaleInfo(lcid, LOCALE_SENGLANGUAGE, lbuf, sizeof(lbuf)) <= 0 ||
132 	    GetLocaleInfo(lcid, LOCALE_SENGCOUNTRY, cbuf, sizeof(cbuf)) <= 0)
133 		return 0;
134 	if (lc->charset->ms)
135 		sfsprintf(buf, siz, "%s_%s.%s", lbuf, cbuf, lc->charset->ms);
136 	else
137 		sfsprintf(buf, siz, "%s_%s", lbuf, cbuf);
138 	return buf;
139 }
140 
141 /*
142  * locale!=0 here
143  */
144 
145 static char*
146 native_setlocale(int category, const char* locale)
147 {
148 	char*		usr;
149 	char*		sys;
150 	char		buf[256];
151 
152 	if (!(usr = native_locale(locale, buf, sizeof(buf))))
153 		return 0;
154 
155 	/*
156 	 * win32 doesn't have LC_MESSAGES
157 	 */
158 
159 	if (category == LC_MESSAGES)
160 		return (char*)locale;
161 	sys = uwin_setlocale(category, usr);
162 	if (ast.locale.set & AST_LC_debug)
163 		sfprintf(sfstderr, "locale uwin %17s %-24s %-24s\n", lc_categories[lcindex(category, 0)].name, usr, sys);
164 	return sys;
165 }
166 
167 #else
168 
169 #define native_locale(a,b,c)	((char*)0)
170 
171 #endif
172 
173 /*
174  * LC_COLLATE and LC_CTYPE native support
175  */
176 
177 #if !_lib_mbtowc || MB_LEN_MAX <= 1
178 #define mblen		0
179 #define mbtowc		0
180 #endif
181 
182 #if !_lib_strcoll
183 #define	strcoll		0
184 #endif
185 
186 #if !_lib_strxfrm
187 #define	strxfrm		0
188 #endif
189 
190 /*
191  * LC_COLLATE and LC_CTYPE debug support
192  *
193  * mutibyte debug encoding
194  *
195  *	DL0 [ '0' .. '4' ] c1 ... c4 DR0
196  *	DL1 [ '0' .. '4' ] c1 ... c4 DR1
197  *
198  * with these ligatures
199  *
200  *	ch CH sst SST
201  *
202  * and private collation order
203  *
204  * wide character display width is the low order 3 bits
205  * wctomb() uses DL1...DR1
206  */
207 
208 #define DEBUG_MB_CUR_MAX	7
209 
210 #if DEBUG_MB_CUR_MAX < MB_LEN_MAX
211 #undef	DEBUG_MB_CUR_MAX
212 #define DEBUG_MB_CUR_MAX	MB_LEN_MAX
213 #endif
214 
215 #define DL0	'<'
216 #define DL1	0xab		/* 8-bit mini << on xterm	*/
217 #define DR0	'>'
218 #define DR1	0xbb		/* 8-bit mini >> on xterm	*/
219 
220 #define DB	((int)sizeof(wchar_t)*8-1)
221 #define DC	7		/* wchar_t embedded char bits	*/
222 #define DX	(DB/DC)		/* wchar_t max embedded chars	*/
223 #define DZ	(DB-DX*DC+1)	/* wchar_t embedded size bits	*/
224 #define DD	3		/* # mb delimiter chars <n...>	*/
225 
226 static unsigned char debug_order[] =
227 {
228 	  0,   1,   2,   3,   4,   5,   6,   7,
229 	  8,   9,  10,  11,  12,  13,  14,  15,
230 	 16,  17,  18,  19,  20,  21,  22,  23,
231 	 24,  25,  26,  27,  28,  29,  30,  31,
232 	 99, 100, 101, 102,  98, 103, 104, 105,
233 	106, 107, 108,  43, 109,  44,  42, 110,
234 	 32,  33,  34,  35,  36,  37,  38,  39,
235 	 40,  41, 111, 112, 113, 114, 115, 116,
236 	117,  71,  72,  73,  74,  75,  76,  77,
237 	 78,  79,  80,  81,  82,  83,  84,  85,
238 	 86,  87,  88,  89,  90,  91,  92,  93,
239 	 94,  95,  96, 118, 119, 120, 121,  97,
240 	122,  45,  46,  47,  48,  49,  50,  51,
241 	 52,  53,  54,  55,  56,  57,  58,  59,
242 	 60,  61,  62,  63,  64,  65,  66,  67,
243 	 68,  69,  70, 123, 124, 125, 126, 127,
244 	128, 129, 130, 131, 132, 133, 134, 135,
245 	136, 137, 138, 139, 140, 141, 142, 143,
246 	144, 145, 146, 147, 148, 149, 150, 151,
247 	152, 153, 154, 155, 156, 157, 158, 159,
248 	160, 161, 162, 163, 164, 165, 166, 167,
249 	168, 169, 170, 171, 172, 173, 174, 175,
250 	176, 177, 178, 179, 180, 181, 182, 183,
251 	184, 185, 186, 187, 188, 189, 190, 191,
252 	192, 193, 194, 195, 196, 197, 198, 199,
253 	200, 201, 202, 203, 204, 205, 206, 207,
254 	208, 209, 210, 211, 212, 213, 214, 215,
255 	216, 217, 218, 219, 220, 221, 222, 223,
256 	224, 225, 226, 227, 228, 229, 230, 231,
257 	232, 233, 234, 235, 236, 237, 238, 239,
258 	240, 241, 242, 243, 244, 245, 246, 247,
259 	248, 249, 250, 251, 252, 253, 254, 255,
260 };
261 
262 static int
263 debug_mbtowc(register wchar_t* p, register const char* s, size_t n)
264 {
265 	register const char*	q;
266 	register const char*	r;
267 	register int		w;
268 	register int		dr;
269 	wchar_t			c;
270 
271 	if (n < 1)
272 		return -1;
273 	if (!s || !*s)
274 		return 0;
275 	switch (((unsigned char*)s)[0])
276 	{
277 	case DL0:
278 		dr = DR0;
279 		break;
280 	case DL1:
281 		dr = DR1;
282 		break;
283 	default:
284 		if (p)
285 			*p = ((unsigned char*)s)[0] & ((1<<DC)-1);
286 		return 1;
287 	}
288 	if (n < 2)
289 		return -1;
290 	if ((w = ((unsigned char*)s)[1]) == ((unsigned char*)s)[0])
291 	{
292 		if (p)
293 			*p = w;
294 		return 2;
295 	}
296 	if (w < '0' || w > ('0' + DX))
297 		return -1;
298 	if ((w -= '0' - DD) > n)
299 		return -1;
300 	r = s + w - 1;
301 	q = s += 2;
302 	while (q < r && *q)
303 		q++;
304 	if (q != r || *((unsigned char*)q) != dr)
305 		return -1;
306 	if (p)
307 	{
308 		c = 0;
309 		while (--q >= s)
310 		{
311 			c <<= DC;
312 			c |= *((unsigned char*)q);
313 		}
314 		c <<= DZ;
315 		c |= w - DD;
316 		*p = c;
317 	}
318 	return w;
319 }
320 
321 static int
322 debug_wctomb(char* s, wchar_t c)
323 {
324 	int	w;
325 	int	i;
326 	int	k;
327 
328 	w = 0;
329 	if (c >= 0 && c <= UCHAR_MAX)
330 	{
331 		w++;
332 		if (s)
333 			*s = c;
334 	}
335 	else if ((i = c & ((1<<DZ)-1)) > DX)
336 		return -1;
337 	else
338 	{
339 		w++;
340 		if (s)
341 			*s++ = DL1;
342 		c >>= DZ;
343 		w++;
344 		if (s)
345 			*s++ = i + '0';
346 		while (i--)
347 		{
348 			w++;
349 			if (s)
350 				*s++ = (k = c & ((1<<DC)-1)) ? k : '?';
351 			c >>= DC;
352 		}
353 		w++;
354 		if (s)
355 			*s++ = DR1;
356 	}
357 	return w;
358 }
359 
360 static int
361 debug_mblen(const char* s, size_t n)
362 {
363 	return debug_mbtowc(NiL, s, n);
364 }
365 
366 static int
367 debug_wcwidth(wchar_t c)
368 {
369 	if (c >= 0 && c <= UCHAR_MAX)
370 		return 1;
371 	if ((c &= ((1<<DZ)-1)) > DX)
372 		return -1;
373 	return c + DD;
374 }
375 
376 static size_t
377 debug_strxfrm(register char* t, register const char* s, size_t n)
378 {
379 	register const char*	q;
380 	register const char*	r;
381 	register char*		e;
382 	char*			o;
383 	register size_t		z;
384 	register int		w;
385 
386 	o = t;
387 	z = 0;
388 	if (e = t)
389 		e += n;
390 	while (s[0])
391 	{
392 		if ((((unsigned char*)s)[0] == DL0 || ((unsigned char*)s)[0] == DL1) && (w = s[1]) >= '0' && w <= ('0' + DC))
393 		{
394 			w -= '0';
395 			q = s + 2;
396 			r = q + w;
397 			while (q < r && *q)
398 				q++;
399 			if (*((unsigned char*)q) == DR0 || *((unsigned char*)q) == DR1)
400 			{
401 				if (t)
402 				{
403 					for (q = s + 2; q < r; q++)
404 						if (t < e)
405 							*t++ = debug_order[*q];
406 					while (w++ < DX)
407 						if (t < e)
408 							*t++ = 1;
409 				}
410 				s = r + 1;
411 				z += DX;
412 				continue;
413 			}
414 		}
415 		if ((s[0] == 'c' || s[0] == 'C') && (s[1] == 'h' || s[1] == 'H'))
416 		{
417 			if (t)
418 			{
419 				if (t < e)
420 					*t++ = debug_order[s[0]];
421 				if (t < e)
422 					*t++ = debug_order[s[1]];
423 				if (t < e)
424 					*t++ = 1;
425 				if (t < e)
426 					*t++ = 1;
427 			}
428 			s += 2;
429 			z += DX;
430 			continue;
431 		}
432 		if ((s[0] == 's' || s[0] == 'S') && (s[1] == 's' || s[1] == 'S') && (s[2] == 't' || s[2] == 'T'))
433 		{
434 			if (t)
435 			{
436 				if (t < e)
437 					*t++ = debug_order[s[0]];
438 				if (t < e)
439 					*t++ = debug_order[s[1]];
440 				if (t < e)
441 					*t++ = debug_order[s[2]];
442 				if (t < e)
443 					*t++ = 1;
444 			}
445 			s += 3;
446 			z += DX;
447 			continue;
448 		}
449 		if (t)
450 		{
451 			if (t < e)
452 				*t++ = debug_order[s[0]];
453 			if (t < e)
454 				*t++ = 1;
455 			if (t < e)
456 				*t++ = 1;
457 			if (t < e)
458 				*t++ = 1;
459 		}
460 		s++;
461 		z += DX;
462 	}
463 	if (!t)
464 		return z;
465 	if (t < e)
466 		*t = 0;
467 	return t - o;
468 }
469 
470 static int
471 debug_strcoll(const char* a, const char* b)
472 {
473 	char	ab[1024];
474 	char	bb[1024];
475 
476 	debug_strxfrm(ab, a, sizeof(ab) - 1);
477 	ab[sizeof(ab)-1] = 0;
478 	debug_strxfrm(bb, b, sizeof(bb) - 1);
479 	bb[sizeof(bb)-1] = 0;
480 	return strcmp(ab, bb);
481 }
482 
483 /*
484  * default locale
485  */
486 
487 static int
488 default_wcwidth(wchar_t w)
489 {
490 	return w >= 0 && w <= 255 && !iscntrl(w) ? 1 : -1;
491 }
492 
493 /*
494  * called when LC_COLLATE initialized or changes
495  */
496 
497 static int
498 set_collate(Lc_category_t* cp)
499 {
500 	if (locales[cp->internal]->flags & LC_debug)
501 	{
502 		ast.collate = debug_strcoll;
503 		ast.mb_xfrm = debug_strxfrm;
504 	}
505 	else if (locales[cp->internal]->flags & LC_default)
506 	{
507 		ast.collate = strcmp;
508 		ast.mb_xfrm = 0;
509 	}
510 	else
511 	{
512 		ast.collate = strcoll;
513 		ast.mb_xfrm = strxfrm;
514 	}
515 	return 0;
516 }
517 
518 /*
519  * workaround the interesting sjis that translates unshifted 7 bit ascii!
520  */
521 
522 #if _hdr_wchar && _typ_mbstate_t && _lib_mbrtowc
523 
524 #define mb_state_zero	((mbstate_t*)&ast.pad[sizeof(ast.pad)-2*sizeof(mbstate_t)])
525 #define mb_state	((mbstate_t*)&ast.pad[sizeof(ast.pad)-sizeof(mbstate_t)])
526 
527 static int
528 sjis_mbtowc(register wchar_t* p, register const char* s, size_t n)
529 {
530 	if (n && p && s && (*s == '\\' || *s == '~') && !memcmp(mb_state, mb_state_zero, sizeof(mbstate_t)))
531 	{
532 		*p = *s;
533 		return 1;
534 	}
535 	return mbrtowc(p, s, n, mb_state);
536 }
537 
538 #endif
539 
540 #define utf8_wctomb	wctomb
541 
542 static const uint32_t		utf8mask[] =
543 {
544 	0x00000000,
545 	0x00000000,
546 	0xffffff80,
547 	0xfffff800,
548 	0xffff0000,
549 	0xffe00000,
550 	0xfc000000,
551 };
552 
553 static const signed char	utf8tab[256] =
554 {
555 	0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
556 	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
557 	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
558 	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
559 	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
560 	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
561 	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
562 	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
563 	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
564 	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
565 	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
566 	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
567 	2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
568 	2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
569 	3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
570 	4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 6, 6,-1,-1,
571 };
572 
573 static int
574 utf8_mbtowc(wchar_t* wp, const char* str, size_t n)
575 {
576 	register unsigned char*	sp = (unsigned char*)str;
577 	register int		m;
578 	register int		i;
579 	register int		c;
580 	register wchar_t	w = 0;
581 
582 	if (!sp || !n)
583 		return 0;
584 	if ((m = utf8tab[*sp]) > 0)
585 	{
586 		if (m > n)
587 			return -1;
588 		if (wp)
589 		{
590 			if (m == 1)
591 			{
592 				*wp = *sp;
593 				return 1;
594 			}
595 			w = *sp & ((1<<(8-m))-1);
596 			for (i = m - 1; i > 0; i--)
597 			{
598 				c = *++sp;
599 				if ((c&0xc0) != 0x80)
600 					goto invalid;
601 				w = (w<<6) | (c&0x3f);
602 			}
603 			if (!(utf8mask[m] & w) || w >= 0xd800 && (w <= 0xdfff || w >= 0xfffe && w <= 0xffff))
604 				goto invalid;
605 			*wp = w;
606 		}
607 		return m;
608 	}
609 	if (!*sp)
610 		return 0;
611  invalid:
612 #ifdef EILSEQ
613 	errno = EILSEQ;
614 #endif
615 	ast.mb_sync = (const char*)sp - str;
616 	return -1;
617 }
618 
619 static int
620 utf8_mblen(const char* str, size_t n)
621 {
622 	wchar_t		w;
623 
624 	return utf8_mbtowc(&w, str, n);
625 }
626 
627 /*
628  * called when LC_CTYPE initialized or changes
629  */
630 
631 static int
632 set_ctype(Lc_category_t* cp)
633 {
634 	ast.mb_sync = 0;
635 	if (locales[cp->internal]->flags & LC_debug)
636 	{
637 		ast.mb_cur_max = DEBUG_MB_CUR_MAX;
638 		ast.mb_len = debug_mblen;
639 		ast.mb_towc = debug_mbtowc;
640 		ast.mb_width = debug_wcwidth;
641 		ast.mb_conv = debug_wctomb;
642 	}
643 	else if ((locales[cp->internal]->flags & LC_default) || (ast.mb_cur_max = MB_CUR_MAX) <= 1 || !(ast.mb_len = mblen) || !(ast.mb_towc = mbtowc))
644 	{
645 		ast.mb_cur_max = 1;
646 		ast.mb_len = 0;
647 		ast.mb_towc = 0;
648 		ast.mb_width = default_wcwidth;
649 		ast.mb_conv = 0;
650 	}
651 	else if ((locales[cp->internal]->flags & LC_utf8) && !(ast.locale.set & AST_LC_test))
652 	{
653 		ast.mb_cur_max = 6;
654 		ast.mb_len = utf8_mblen;
655 		ast.mb_towc = utf8_mbtowc;
656 		if (!(ast.mb_width = wcwidth))
657 			ast.mb_width = default_wcwidth;
658 		ast.mb_conv = utf8_wctomb;
659 	}
660 	else
661 	{
662 		if (!(ast.mb_width = wcwidth))
663 			ast.mb_width = default_wcwidth;
664 		ast.mb_conv = wctomb;
665 #ifdef mb_state
666 		{
667 			/*
668 			 * check for sjis that translates unshifted 7 bit ascii!
669 			 */
670 
671 			char*	s;
672 			char	buf[2];
673 
674 			mbinit();
675 			buf[1] = 0;
676 			*(s = buf) = '\\';
677 			if (mbchar(s) != buf[0])
678 			{
679 				memcpy(mb_state, mb_state_zero, sizeof(mbstate_t));
680 				ast.mb_towc = sjis_mbtowc;
681 			}
682 		}
683 #endif
684 	}
685 	if (ast.locale.set & (AST_LC_debug|AST_LC_setlocale))
686 		sfprintf(sfstderr, "locale info %17s MB_CUR_MAX=%d%s%s%s%s\n"
687 			, cp->name
688 			, ast.mb_cur_max
689 			, ast.mb_len == debug_mblen ? " debug_mblen" : ast.mb_len == mblen ? " mblen" : ""
690 			, ast.mb_towc == debug_mbtowc ? " debug_mbtowc" : ast.mb_towc == mbtowc ? " mbtowc"
691 #ifdef mb_state
692 				: ast.mb_towc == sjis_mbtowc ? " sjis_mbtowc"
693 #endif
694 				: ""
695 			, ast.mb_width == debug_wcwidth ? " debug_wcwidth" : ast.mb_width == wcwidth ? " wcwidth" : ast.mb_width == default_wcwidth ? " default_wcwidth" : ""
696 			, ast.mb_conv == debug_wctomb ? " debug_wctomb" : ast.mb_conv == wctomb ? " wctomb" : ""
697 			);
698 	return 0;
699 }
700 
701 /*
702  * called when LC_NUMERIC initialized or changes
703  */
704 
705 static int
706 set_numeric(Lc_category_t* cp)
707 {
708 	register int		category = cp->internal;
709 	struct lconv*		lp;
710 	Lc_numeric_t*		dp;
711 
712 	static Lc_numeric_t	default_numeric = { '.', -1 };
713 
714 	if (!LCINFO(category)->data)
715 	{
716 		if ((lp = localeconv()) && (dp = newof(0, Lc_numeric_t, 1, 0)))
717 		{
718 			dp->decimal = lp->decimal_point && *lp->decimal_point ? *(unsigned char*)lp->decimal_point : '.';
719 			dp->thousand = lp->thousands_sep && *lp->thousands_sep ? *(unsigned char*)lp->thousands_sep : -1;
720 		}
721 		else
722 			dp = &default_numeric;
723 		LCINFO(category)->data = (void*)dp;
724 		if (ast.locale.set & (AST_LC_debug|AST_LC_setlocale))
725 			sfprintf(sfstderr, "locale info %17s decimal '%c' thousands '%c'\n", lc_categories[category].name, dp->decimal, dp->thousand >= 0 ? dp->thousand : 'X');
726 	}
727 	return 0;
728 }
729 
730 /*
731  * this table is indexed by AST_LC_[A-Z]*
732  */
733 
734 Lc_category_t		lc_categories[] =
735 {
736 { "LC_ALL",           LC_ALL,           AST_LC_ALL,           0               },
737 { "LC_COLLATE",       LC_COLLATE,       AST_LC_COLLATE,       set_collate     },
738 { "LC_CTYPE",         LC_CTYPE,         AST_LC_CTYPE,         set_ctype       },
739 { "LC_MESSAGES",      LC_MESSAGES,      AST_LC_MESSAGES,      0               },
740 { "LC_MONETARY",      LC_MONETARY,      AST_LC_MONETARY,      0               },
741 { "LC_NUMERIC",       LC_NUMERIC,       AST_LC_NUMERIC,       set_numeric     },
742 { "LC_TIME",          LC_TIME,          AST_LC_TIME,          0               },
743 { "LC_IDENTIFICATION",LC_IDENTIFICATION,AST_LC_IDENTIFICATION,0               },
744 { "LC_ADDRESS",       LC_ADDRESS,       AST_LC_ADDRESS,       0               },
745 { "LC_NAME",          LC_NAME,          AST_LC_NAME,          0               },
746 { "LC_TELEPHONE",     LC_TELEPHONE,     AST_LC_TELEPHONE,     0               },
747 { "LC_XLITERATE",     LC_XLITERATE,     AST_LC_XLITERATE,     0               },
748 { "LC_MEASUREMENT",   LC_MEASUREMENT,   AST_LC_MEASUREMENT,   0               },
749 { "LC_PAPER",         LC_PAPER,         AST_LC_PAPER,         0               },
750 };
751 
752 static Lc_t*		lang;
753 static Lc_t*		lc_all;
754 
755 typedef struct Unamval_s
756 {
757 	char*		name;
758 	unsigned int	value;
759 } Unamval_t;
760 
761 static const Unamval_t	options[] =
762 {
763 	"debug",		AST_LC_debug,
764 	"find",			AST_LC_find,
765 	"setlocale",		AST_LC_setlocale,
766 	"test",			AST_LC_test,
767 	"translate",		AST_LC_translate,
768 	0,			0
769 };
770 
771 /*
772  * called by stropt() to set options
773  */
774 
775 static int
776 setopt(void* a, const void* p, int n, const char* v)
777 {
778 	if (p)
779 	{
780 		if (n)
781 			ast.locale.set |= ((Unamval_t*)p)->value;
782 		else
783 			ast.locale.set &= ~((Unamval_t*)p)->value;
784 	}
785 	return 0;
786 }
787 
788 #if !_lib_setlocale
789 
790 #define setlocale(c,l)		default_setlocale(c,l)
791 
792 static char*
793 default_setlocale(int category, const char* locale)
794 {
795 	Lc_t*		lc;
796 
797 	if (locale)
798 	{
799 		if (!(lc = lcmake(locale)) || !(lc->flags & LC_default))
800 			return 0;
801 		locales[0]->flags &= ~lc->flags;
802 		locales[1]->flags &= ~lc->flags;
803 		return lc->name;
804 	}
805 	return (locales[1]->flags & (1<<category)) ? locales[1]->name : locales[0]->name;
806 }
807 
808 #endif
809 
810 /*
811  * set a single AST_LC_* locale category
812  * the caller must validate category
813  * lc==0 restores the previous state
814  */
815 
816 static char*
817 single(int category, Lc_t* lc)
818 {
819 	const char*	sys;
820 	int		i;
821 
822 	if (!lc && !(lc = lc_all) && !(lc = lc_categories[category].prev) && !(lc = lang))
823 		lc = lcmake(NiL);
824 	if (locales[category] != lc)
825 	{
826 		if (lc_categories[category].external == -lc_categories[category].internal)
827 		{
828 			sys = 0;
829 			for (i = 1; i < AST_LC_COUNT; i++)
830 				if (locales[i] == lc)
831 				{
832 					sys = (char*)lc->name;
833 					break;
834 				}
835 		}
836 		else if (lc->flags & (LC_debug|LC_local))
837 			sys = setlocale(lc_categories[category].external, lcmake(NiL)->name);
838 		else if (!(sys = setlocale(lc_categories[category].external, lc->name)) &&
839 			 (streq(lc->name, lc->code) || !(sys = setlocale(lc_categories[category].external, lc->code))) &&
840 			 !streq(lc->code, lc->language->code))
841 				sys = setlocale(lc_categories[category].external, lc->language->code);
842 		if (ast.locale.set & (AST_LC_debug|AST_LC_setlocale))
843 			sfprintf(sfstderr, "locale set  %17s %-24s %-24s\n", lc_categories[category].name, lc->name, sys);
844 		if (!sys)
845 		{
846 			/*
847 			 * check for local override
848 			 * currently this means an LC_MESSAGES dir exists
849 			 */
850 
851 			if (!(lc->flags & LC_checked))
852 			{
853 				char	path[PATH_MAX];
854 
855 				if (mcfind(path, lc->code, NiL, LC_MESSAGES, 0))
856 					lc->flags |= LC_local;
857 				lc->flags |= LC_checked;
858 			}
859 			if (!(lc->flags & LC_local))
860 				return 0;
861 			if (lc_categories[category].external != -lc_categories[category].internal)
862 				setlocale(lc_categories[category].external, lcmake(NiL)->name);
863 		}
864 		locales[category] = lc;
865 		if (lc_categories[category].setf && (*lc_categories[category].setf)(&lc_categories[category]))
866 		{
867 			locales[category] = lc_categories[category].prev;
868 			return 0;
869 		}
870 		if ((lc->flags & LC_default) || category == AST_LC_MESSAGES && lc->name[0] == 'e' && lc->name[1] == 'n' && (lc->name[2] == 0 || lc->name[2] == '_' && lc->name[3] == 'U'))
871 			ast.locale.set &= ~(1<<category);
872 		else
873 			ast.locale.set |= (1<<category);
874 	}
875 	return (char*)lc->name;
876 }
877 
878 /*
879  * set composite AST_LC_ALL locale categories
880  * return <0:composite-error 0:not-composite >0:composite-ok
881  */
882 
883 static int
884 composite(register const char* s, int initialize)
885 {
886 	register const char*	t;
887 	register int		i;
888 	register int		j;
889 	register int		k;
890 	int			n;
891 	int			m;
892 	const char*		w;
893 	Lc_t*			p;
894 	int			cat[AST_LC_COUNT];
895 	int			stk[AST_LC_COUNT];
896 	char			buf[PATH_MAX / 2];
897 
898 	k = n = 0;
899 	while (s[0] == 'L' && s[1] == 'C' && s[2] == '_')
900 	{
901 		n++;
902 		j = 0;
903 		w = s;
904 		for (i = 1; i < AST_LC_COUNT; i++)
905 		{
906 			s = w;
907 			t = lc_categories[i].name;
908 			while (*t && *s++ == *t++);
909 			if (!*t && *s++ == '=')
910 			{
911 				cat[j++] = i;
912 				if (s[0] != 'L' || s[1] != 'C' || s[2] != '_')
913 					break;
914 				w = s;
915 				i = -1;
916 			}
917 		}
918 		for (s = w; *s && *s != '='; s++);
919 		if (!*s)
920 		{
921 			for (i = 0; i < k; i++)
922 				single(stk[i], NiL);
923 			return -1;
924 		}
925 		w = ++s;
926 		for (;;)
927 		{
928 			if (!*s)
929 			{
930 				p = lcmake(w);
931 				break;
932 			}
933 			else if (*s++ == ';')
934 			{
935 				if ((m = s - w - 1) >= sizeof(buf))
936 					m = sizeof(buf) - 1;
937 				memcpy(buf, w, m);
938 				buf[m] = 0;
939 				p = lcmake(buf);
940 				break;
941 			}
942 		}
943 		for (i = 0; i < j; i++)
944 			if (!initialize)
945 			{
946 				if (!single(cat[i], p))
947 				{
948 					for (i = 0; i < k; i++)
949 						single(stk[i], NiL);
950 					return -1;
951 				}
952 				stk[k++] = cat[i];
953 			}
954 			else if (!lc_categories[cat[i]].prev)
955 				lc_categories[cat[i]].prev = p;
956 	}
957 	while (s[0] == '/' && s[1] && n < (AST_LC_COUNT - 1))
958 	{
959 		n++;
960 		for (w = ++s; *s && *s != '/'; s++);
961 		if (!*s)
962 			p = lcmake(w);
963 		else
964 		{
965 			if ((j = s - w - 1) >= sizeof(buf))
966 				j = sizeof(buf) - 1;
967 			memcpy(buf, w, j);
968 			buf[j] = 0;
969 			p = lcmake(buf);
970 		}
971 		if (!initialize)
972 		{
973 			if (!single(n, p))
974 			{
975 				for (i = 1; i < n; i++)
976 					single(i, NiL);
977 				return -1;
978 			}
979 		}
980 		else if (!lc_categories[n].prev)
981 			lc_categories[n].prev = p;
982 	}
983 	return n;
984 }
985 
986 /*
987  * setlocale() intercept
988  *
989  * locale:
990  *	0	query
991  *	""	initialize from environment
992  *	"-"	unset
993  *	*	set
994  */
995 
996 char*
997 _ast_setlocale(int category, const char* locale)
998 {
999 	register char*		s;
1000 	register int		i;
1001 	register int		j;
1002 	int			k;
1003 	char*			a;
1004 	Lc_t*			p;
1005 	int			cat[AST_LC_COUNT];
1006 
1007 	static Sfio_t*		sp;
1008 	static int		initialized;
1009 	static char		local[] = "local";
1010 
1011 	if ((category = lcindex(category, 0)) < 0)
1012 		return 0;
1013 	if (!locale)
1014 	{
1015 		/*
1016 		 * return the current state
1017 		 */
1018 
1019 	compose:
1020 		if (category != AST_LC_ALL && category != AST_LC_LANG)
1021 			return (char*)locales[category]->name;
1022 		if (!sp && !(sp = sfstropen()))
1023 			return 0;
1024 		for (i = 1; i < AST_LC_COUNT; i++)
1025 			cat[i] = -1;
1026 		for (i = 1, k = 0; i < AST_LC_COUNT; i++)
1027 			if (cat[i] < 0)
1028 			{
1029 				k++;
1030 				cat[i] = i;
1031 				for (j = i + 1; j < AST_LC_COUNT; j++)
1032 					if (locales[j] == locales[i])
1033 						cat[j] = i;
1034 			}
1035 		if (k == 1)
1036 			return (char*)locales[1]->name;
1037 		for (i = 1; i < AST_LC_COUNT; i++)
1038 			if (cat[i] >= 0 && !(locales[i]->flags & LC_default))
1039 			{
1040 				if (sfstrtell(sp))
1041 					sfprintf(sp, ";");
1042 				for (j = i, k = cat[i]; j < AST_LC_COUNT; j++)
1043 					if (cat[j] == k)
1044 					{
1045 						cat[j] = -1;
1046 						sfprintf(sp, "%s=", lc_categories[j].name);
1047 					}
1048 				sfprintf(sp, "%s", locales[i]->name);
1049 			}
1050 		if (!sfstrtell(sp))
1051 			return (char*)locales[0]->name;
1052 		return sfstruse(sp);
1053 	}
1054 	if (!ast.locale.serial++)
1055 	{
1056 		stropt(getenv("LC_OPTIONS"), options, sizeof(*options), setopt, NiL);
1057 		initialized = 0;
1058 	}
1059 	if (*locale)
1060 		p = streq(locale, "-") ? (Lc_t*)0 : lcmake(locale);
1061 	else if (!initialized)
1062 	{
1063 		char*	u;
1064 		char	tmp[256];
1065 
1066 		/*
1067 		 * initialize from the environment
1068 		 * precedence determined by X/Open
1069 		 */
1070 
1071 		u = 0;
1072 		if ((s = getenv("LANG")) && *s)
1073 		{
1074 			if (streq(s, local) && (u || (u = native_locale(locale, tmp, sizeof(tmp)))))
1075 				s = u;
1076 			lang = lcmake(s);
1077 		}
1078 		else
1079 			lang = 0;
1080 		if ((s = getenv("LC_ALL")) && *s)
1081 		{
1082 			if (streq(s, local) && (u || (u = native_locale(locale, tmp, sizeof(tmp)))))
1083 				s = u;
1084 			lc_all = lcmake(s);
1085 		}
1086 		else
1087 			lc_all = 0;
1088 		for (i = 1; i < AST_LC_COUNT; i++)
1089 			if ((s = getenv(lc_categories[i].name)) && *s)
1090 			{
1091 				if (streq(s, local) && (u || (u = native_locale(locale, tmp, sizeof(tmp)))))
1092 					s = u;
1093 				lc_categories[i].prev = lcmake(s);
1094 			}
1095 			else
1096 				lc_categories[i].prev = 0;
1097 		for (i = 1; i < AST_LC_COUNT; i++)
1098 			if (!single(i, lc_all ? lc_all : lc_categories[i].prev))
1099 			{
1100 				while (i--)
1101 					single(i, NiL);
1102 				return 0;
1103 			}
1104 		if (ast.locale.set & AST_LC_debug)
1105 			for (i = 1; i < AST_LC_COUNT; i++)
1106 				sfprintf(sfstderr, "locale env  %17s %16s %16s\n", lc_categories[i].name, locales[i]->name, lc_categories[i].prev ? lc_categories[i].prev->name : (char*)0);
1107 		initialized = 1;
1108 		goto compose;
1109 	}
1110 	else if (category == AST_LC_LANG || !(p = lc_categories[category].prev))
1111 		p = lcmake("C");
1112 	if (category == AST_LC_LANG)
1113 	{
1114 		if (lang != p)
1115 		{
1116 			lang = p;
1117 			if (!lc_all)
1118 				for (i = 1; i < AST_LC_COUNT; i++)
1119 					if (!single(i, lc_categories[i].prev))
1120 					{
1121 						while (i--)
1122 							single(i, NiL);
1123 						return 0;
1124 					}
1125 		}
1126 	}
1127 	else if (category != AST_LC_ALL)
1128 		return single(category, p);
1129 	else if ((i = composite(locale, 0)) < 0)
1130 		return 0;
1131 	else if (lc_all != p)
1132 	{
1133 		lc_all = p;
1134 		for (i = 1; i < AST_LC_COUNT; i++)
1135 			if (!single(i, lc_all ? lc_all : lc_categories[i].prev))
1136 			{
1137 				while (i--)
1138 					single(i, NiL);
1139 				return 0;
1140 			}
1141 	}
1142 	goto compose;
1143 }
1144