xref: /titanic_50/usr/src/lib/libast/common/comp/setlocale.c (revision 41afdfa77f9af46beb3aaab2eccc0d9afe660d31)
1 /***********************************************************************
2 *                                                                      *
3 *               This software is part of the ast package               *
4 *          Copyright (c) 1985-2010 AT&T Intellectual Property          *
5 *                      and is licensed under the                       *
6 *                  Common Public License, Version 1.0                  *
7 *                    by AT&T Intellectual Property                     *
8 *                                                                      *
9 *                A copy of the License is available at                 *
10 *            http://www.opensource.org/licenses/cpl1.0.txt             *
11 *         (with md5 checksum 059e8cd6165cb4c31e351f2b69388fd9)         *
12 *                                                                      *
13 *              Information and Software Systems Research               *
14 *                            AT&T Research                             *
15 *                           Florham Park NJ                            *
16 *                                                                      *
17 *                 Glenn Fowler <gsf@research.att.com>                  *
18 *                  David Korn <dgk@research.att.com>                   *
19 *                   Phong Vo <kpv@research.att.com>                    *
20 *                                                                      *
21 ***********************************************************************/
22 #pragma prototyped
23 
24 /*
25  * setlocale() intercept
26  * maintains a bitmask of non-default categories
27  * and a permanent locale namespace for pointer comparison
28  * and persistent private data for locale related functions
29  */
30 
31 #include <ast_standards.h>
32 
33 #include "lclib.h"
34 
35 #include <ast_wchar.h>
36 #include <ctype.h>
37 #include <mc.h>
38 #include <namval.h>
39 
40 #if ( _lib_wcwidth || _lib_wctomb ) && _hdr_wctype
41 #include <wctype.h>
42 #endif
43 
44 #if _lib_wcwidth
45 #undef	wcwidth
46 #else
47 #define wcwidth			0
48 #endif
49 
50 #if _lib_wctomb
51 #undef	wctomb
52 #else
53 #define wctomb			0
54 #endif
55 
56 #ifdef mblen
57 #undef	mblen
58 extern int		mblen(const char*, size_t);
59 #endif
60 
61 #undef	mbtowc
62 #undef	setlocale
63 #undef	strcmp
64 #undef	strcoll
65 #undef	strxfrm
66 #undef	valid
67 
68 #ifndef AST_LC_CANONICAL
69 #define AST_LC_CANONICAL	LC_abbreviated
70 #endif
71 
72 #ifndef AST_LC_test
73 #define AST_LC_test		(1L<<27)
74 #endif
75 
76 #if _UWIN
77 
78 #include <ast_windows.h>
79 
80 #undef	_lib_setlocale
81 #define _lib_setlocale		1
82 
83 #define setlocale(c,l)		native_setlocale(c,l)
84 
85 extern char*			uwin_setlocale(int, const char*);
86 
87 /*
88  * convert locale to native locale name in buf
89  */
90 
91 static char*
92 native_locale(const char* locale, char* buf, size_t siz)
93 {
94 	Lc_t*				lc;
95 	const Lc_attribute_list_t*	ap;
96 	int				i;
97 	unsigned long			lcid;
98 	unsigned long			lang;
99 	unsigned long			ctry;
100 	char				lbuf[128];
101 	char				cbuf[128];
102 
103 	if (locale && *locale)
104 	{
105 		if (!(lc = lcmake(locale)))
106 			return 0;
107 		lang = lc->language->index;
108 		ctry = 0;
109 		for (ap = lc->attributes; ap; ap = ap->next)
110 			if (ctry = ap->attribute->index)
111 				break;
112 		if (!ctry)
113 		{
114 			for (i = 0; i < elementsof(lc->territory->languages); i++)
115 				if (lc->territory->languages[i] == lc->language)
116 				{
117 					ctry = lc->territory->indices[i];
118 					break;
119 				}
120 			if (!ctry)
121 			{
122 				if (!lang)
123 					return 0;
124 				ctry = SUBLANG_DEFAULT;
125 			}
126 		}
127 		lcid = MAKELCID(MAKELANGID(lang, ctry), SORT_DEFAULT);
128 	}
129 	else
130 		lcid = GetUserDefaultLCID();
131 	if (GetLocaleInfo(lcid, LOCALE_SENGLANGUAGE, lbuf, sizeof(lbuf)) <= 0 ||
132 	    GetLocaleInfo(lcid, LOCALE_SENGCOUNTRY, cbuf, sizeof(cbuf)) <= 0)
133 		return 0;
134 	if (lc->charset->ms)
135 		sfsprintf(buf, siz, "%s_%s.%s", lbuf, cbuf, lc->charset->ms);
136 	else
137 		sfsprintf(buf, siz, "%s_%s", lbuf, cbuf);
138 	return buf;
139 }
140 
141 /*
142  * locale!=0 here
143  */
144 
145 static char*
146 native_setlocale(int category, const char* locale)
147 {
148 	char*		usr;
149 	char*		sys;
150 	char		buf[256];
151 
152 	if (!(usr = native_locale(locale, buf, sizeof(buf))))
153 		return 0;
154 
155 	/*
156 	 * win32 doesn't have LC_MESSAGES
157 	 */
158 
159 	if (category == LC_MESSAGES)
160 		return (char*)locale;
161 	sys = uwin_setlocale(category, usr);
162 	if (ast.locale.set & AST_LC_debug)
163 		sfprintf(sfstderr, "locale uwin %17s %-24s %-24s\n", lc_categories[lcindex(category, 0)].name, usr, sys);
164 	return sys;
165 }
166 
167 #else
168 
169 #define native_locale(a,b,c)	((char*)0)
170 
171 #endif
172 
173 /*
174  * LC_COLLATE and LC_CTYPE native support
175  */
176 
177 #if !_lib_mbtowc || MB_LEN_MAX <= 1
178 #define mblen		0
179 #define mbtowc		0
180 #endif
181 
182 #if !_lib_strcoll
183 #define	strcoll		0
184 #endif
185 
186 #if !_lib_strxfrm
187 #define	strxfrm		0
188 #endif
189 
190 /*
191  * LC_COLLATE and LC_CTYPE debug support
192  *
193  * mutibyte debug encoding
194  *
195  *	DL0 [ '0' .. '4' ] c1 ... c4 DR0
196  *	DL1 [ '0' .. '4' ] c1 ... c4 DR1
197  *
198  * with these ligatures
199  *
200  *	ch CH sst SST
201  *
202  * and private collation order
203  *
204  * wide character display width is the low order 3 bits
205  * wctomb() uses DL1...DR1
206  */
207 
208 #define DEBUG_MB_CUR_MAX	7
209 
210 #if DEBUG_MB_CUR_MAX < MB_LEN_MAX
211 #undef	DEBUG_MB_CUR_MAX
212 #define DEBUG_MB_CUR_MAX	MB_LEN_MAX
213 #endif
214 
215 #define DL0	'<'
216 #define DL1	0xab		/* 8-bit mini << on xterm	*/
217 #define DR0	'>'
218 #define DR1	0xbb		/* 8-bit mini >> on xterm	*/
219 
220 #define DB	((int)sizeof(wchar_t)*8-1)
221 #define DC	7		/* wchar_t embedded char bits	*/
222 #define DX	(DB/DC)		/* wchar_t max embedded chars	*/
223 #define DZ	(DB-DX*DC+1)	/* wchar_t embedded size bits	*/
224 #define DD	3		/* # mb delimiter chars <n...>	*/
225 
226 static unsigned char debug_order[] =
227 {
228 	  0,   1,   2,   3,   4,   5,   6,   7,
229 	  8,   9,  10,  11,  12,  13,  14,  15,
230 	 16,  17,  18,  19,  20,  21,  22,  23,
231 	 24,  25,  26,  27,  28,  29,  30,  31,
232 	 99, 100, 101, 102,  98, 103, 104, 105,
233 	106, 107, 108,  43, 109,  44,  42, 110,
234 	 32,  33,  34,  35,  36,  37,  38,  39,
235 	 40,  41, 111, 112, 113, 114, 115, 116,
236 	117,  71,  72,  73,  74,  75,  76,  77,
237 	 78,  79,  80,  81,  82,  83,  84,  85,
238 	 86,  87,  88,  89,  90,  91,  92,  93,
239 	 94,  95,  96, 118, 119, 120, 121,  97,
240 	122,  45,  46,  47,  48,  49,  50,  51,
241 	 52,  53,  54,  55,  56,  57,  58,  59,
242 	 60,  61,  62,  63,  64,  65,  66,  67,
243 	 68,  69,  70, 123, 124, 125, 126, 127,
244 	128, 129, 130, 131, 132, 133, 134, 135,
245 	136, 137, 138, 139, 140, 141, 142, 143,
246 	144, 145, 146, 147, 148, 149, 150, 151,
247 	152, 153, 154, 155, 156, 157, 158, 159,
248 	160, 161, 162, 163, 164, 165, 166, 167,
249 	168, 169, 170, 171, 172, 173, 174, 175,
250 	176, 177, 178, 179, 180, 181, 182, 183,
251 	184, 185, 186, 187, 188, 189, 190, 191,
252 	192, 193, 194, 195, 196, 197, 198, 199,
253 	200, 201, 202, 203, 204, 205, 206, 207,
254 	208, 209, 210, 211, 212, 213, 214, 215,
255 	216, 217, 218, 219, 220, 221, 222, 223,
256 	224, 225, 226, 227, 228, 229, 230, 231,
257 	232, 233, 234, 235, 236, 237, 238, 239,
258 	240, 241, 242, 243, 244, 245, 246, 247,
259 	248, 249, 250, 251, 252, 253, 254, 255,
260 };
261 
262 static int
263 debug_mbtowc(register wchar_t* p, register const char* s, size_t n)
264 {
265 	register const char*	q;
266 	register const char*	r;
267 	register int		w;
268 	register int		dr;
269 	wchar_t			c;
270 
271 	if (n < 1)
272 		return -1;
273 	if (!s || !*s)
274 		return 0;
275 	switch (((unsigned char*)s)[0])
276 	{
277 	case DL0:
278 		dr = DR0;
279 		break;
280 	case DL1:
281 		dr = DR1;
282 		break;
283 	default:
284 		if (p)
285 			*p = ((unsigned char*)s)[0] & ((1<<DC)-1);
286 		return 1;
287 	}
288 	if (n < 2)
289 		return -1;
290 	if ((w = ((unsigned char*)s)[1]) == ((unsigned char*)s)[0])
291 	{
292 		if (p)
293 			*p = w;
294 		return 2;
295 	}
296 	if (w < '0' || w > ('0' + DX))
297 		return -1;
298 	if ((w -= '0' - DD) > n)
299 		return -1;
300 	r = s + w - 1;
301 	q = s += 2;
302 	while (q < r && *q)
303 		q++;
304 	if (q != r || *((unsigned char*)q) != dr)
305 		return -1;
306 	if (p)
307 	{
308 		c = 0;
309 		while (--q >= s)
310 		{
311 			c <<= DC;
312 			c |= *((unsigned char*)q);
313 		}
314 		c <<= DZ;
315 		c |= w - DD;
316 		*p = c;
317 	}
318 	return w;
319 }
320 
321 static int
322 debug_wctomb(char* s, wchar_t c)
323 {
324 	int	w;
325 	int	i;
326 	int	k;
327 
328 	w = 0;
329 	if (c >= 0 && c <= UCHAR_MAX)
330 	{
331 		w++;
332 		if (s)
333 			*s = c;
334 	}
335 	else if ((i = c & ((1<<DZ)-1)) > DX)
336 		return -1;
337 	else
338 	{
339 		w++;
340 		if (s)
341 			*s++ = DL1;
342 		c >>= DZ;
343 		w++;
344 		if (s)
345 			*s++ = i + '0';
346 		while (i--)
347 		{
348 			w++;
349 			if (s)
350 				*s++ = (k = c & ((1<<DC)-1)) ? k : '?';
351 			c >>= DC;
352 		}
353 		w++;
354 		if (s)
355 			*s++ = DR1;
356 	}
357 	return w;
358 }
359 
360 static int
361 debug_mblen(const char* s, size_t n)
362 {
363 	return debug_mbtowc(NiL, s, n);
364 }
365 
366 static int
367 debug_wcwidth(wchar_t c)
368 {
369 	if (c >= 0 && c <= UCHAR_MAX)
370 		return 1;
371 	if ((c &= ((1<<DZ)-1)) > DX)
372 		return -1;
373 	return c + DD;
374 }
375 
376 static size_t
377 debug_strxfrm(register char* t, register const char* s, size_t n)
378 {
379 	register const char*	q;
380 	register const char*	r;
381 	register char*		e;
382 	char*			o;
383 	register size_t		z;
384 	register int		w;
385 
386 	o = t;
387 	z = 0;
388 	if (e = t)
389 		e += n;
390 	while (s[0])
391 	{
392 		if ((((unsigned char*)s)[0] == DL0 || ((unsigned char*)s)[0] == DL1) && (w = s[1]) >= '0' && w <= ('0' + DC))
393 		{
394 			w -= '0';
395 			q = s + 2;
396 			r = q + w;
397 			while (q < r && *q)
398 				q++;
399 			if (*((unsigned char*)q) == DR0 || *((unsigned char*)q) == DR1)
400 			{
401 				if (t)
402 				{
403 					for (q = s + 2; q < r; q++)
404 						if (t < e)
405 							*t++ = debug_order[*q];
406 					while (w++ < DX)
407 						if (t < e)
408 							*t++ = 1;
409 				}
410 				s = r + 1;
411 				z += DX;
412 				continue;
413 			}
414 		}
415 		if ((s[0] == 'c' || s[0] == 'C') && (s[1] == 'h' || s[1] == 'H'))
416 		{
417 			if (t)
418 			{
419 				if (t < e)
420 					*t++ = debug_order[s[0]];
421 				if (t < e)
422 					*t++ = debug_order[s[1]];
423 				if (t < e)
424 					*t++ = 1;
425 				if (t < e)
426 					*t++ = 1;
427 			}
428 			s += 2;
429 			z += DX;
430 			continue;
431 		}
432 		if ((s[0] == 's' || s[0] == 'S') && (s[1] == 's' || s[1] == 'S') && (s[2] == 't' || s[2] == 'T'))
433 		{
434 			if (t)
435 			{
436 				if (t < e)
437 					*t++ = debug_order[s[0]];
438 				if (t < e)
439 					*t++ = debug_order[s[1]];
440 				if (t < e)
441 					*t++ = debug_order[s[2]];
442 				if (t < e)
443 					*t++ = 1;
444 			}
445 			s += 3;
446 			z += DX;
447 			continue;
448 		}
449 		if (t)
450 		{
451 			if (t < e)
452 				*t++ = debug_order[s[0]];
453 			if (t < e)
454 				*t++ = 1;
455 			if (t < e)
456 				*t++ = 1;
457 			if (t < e)
458 				*t++ = 1;
459 		}
460 		s++;
461 		z += DX;
462 	}
463 	if (!t)
464 		return z;
465 	if (t < e)
466 		*t = 0;
467 	return t - o;
468 }
469 
470 static int
471 debug_strcoll(const char* a, const char* b)
472 {
473 	char	ab[1024];
474 	char	bb[1024];
475 
476 	debug_strxfrm(ab, a, sizeof(ab) - 1);
477 	ab[sizeof(ab)-1] = 0;
478 	debug_strxfrm(bb, b, sizeof(bb) - 1);
479 	bb[sizeof(bb)-1] = 0;
480 	return strcmp(ab, bb);
481 }
482 
483 /*
484  * default locale
485  */
486 
487 static int
488 default_wcwidth(wchar_t w)
489 {
490 	return w >= 0 && w <= 255 && !iscntrl(w) ? 1 : -1;
491 }
492 
493 /*
494  * called when LC_COLLATE initialized or changes
495  */
496 
497 static int
498 set_collate(Lc_category_t* cp)
499 {
500 	if (locales[cp->internal]->flags & LC_debug)
501 	{
502 		ast.collate = debug_strcoll;
503 		ast.mb_xfrm = debug_strxfrm;
504 	}
505 	else if (locales[cp->internal]->flags & LC_default)
506 	{
507 		ast.collate = strcmp;
508 		ast.mb_xfrm = 0;
509 	}
510 	else
511 	{
512 		ast.collate = strcoll;
513 		ast.mb_xfrm = strxfrm;
514 	}
515 	return 0;
516 }
517 
518 /*
519  * workaround the interesting sjis that translates unshifted 7 bit ascii!
520  */
521 
522 #if _hdr_wchar && _typ_mbstate_t && _lib_mbrtowc
523 
524 #define mb_state_zero	((mbstate_t*)&ast.pad[sizeof(ast.pad)-2*sizeof(mbstate_t)])
525 #define mb_state	((mbstate_t*)&ast.pad[sizeof(ast.pad)-sizeof(mbstate_t)])
526 
527 static int
528 sjis_mbtowc(register wchar_t* p, register const char* s, size_t n)
529 {
530 	if (n && p && s && (*s == '\\' || *s == '~') && !memcmp(mb_state, mb_state_zero, sizeof(mbstate_t)))
531 	{
532 		*p = *s;
533 		return 1;
534 	}
535 	return mbrtowc(p, s, n, mb_state);
536 }
537 
538 #endif
539 
540 #define utf8_wctomb	wctomb
541 
542 static const uint32_t		utf8mask[] =
543 {
544 	0x00000000,
545 	0x00000000,
546 	0xffffff80,
547 	0xfffff800,
548 	0xffff0000,
549 	0xffe00000,
550 	0xfc000000,
551 };
552 
553 static const signed char	utf8tab[256] =
554 {
555 	0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
556 	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
557 	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
558 	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
559 	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
560 	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
561 	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
562 	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
563 	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
564 	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
565 	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
566 	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
567 	2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
568 	2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
569 	3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
570 	4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 6, 6,-1,-1,
571 };
572 
573 static int
574 utf8_mbtowc(wchar_t* wp, const char* str, size_t n)
575 {
576 	register unsigned char*	sp = (unsigned char*)str;
577 	register int		m;
578 	register int		i;
579 	register int		c;
580 	register wchar_t	w = 0;
581 
582 	if (!sp || !n)
583 		return 0;
584 	if ((m = utf8tab[*sp]) > 0)
585 	{
586 		if (m > n)
587 			return -1;
588 		if (wp)
589 		{
590 			if (m == 1)
591 			{
592 				*wp = *sp;
593 				return 1;
594 			}
595 			w = *sp & ((1<<(8-m))-1);
596 			for (i = m - 1; i > 0; i--)
597 			{
598 				c = *++sp;
599 				if ((c&0xc0) != 0x80)
600 					goto invalid;
601 				w = (w<<6) | (c&0x3f);
602 			}
603 			if (!(utf8mask[m] & w) || w >= 0xd800 && (w <= 0xdfff || w >= 0xfffe && w <= 0xffff))
604 				goto invalid;
605 			*wp = w;
606 		}
607 		return m;
608 	}
609 	if (!*sp)
610 		return 0;
611  invalid:
612 #ifdef EILSEQ
613 	errno = EILSEQ;
614 #endif
615 	ast.mb_sync = (const char*)sp - str;
616 	return -1;
617 }
618 
619 static int
620 utf8_mblen(const char* str, size_t n)
621 {
622 	wchar_t		w;
623 
624 	return utf8_mbtowc(&w, str, n);
625 }
626 
627 /*
628  * called when LC_CTYPE initialized or changes
629  */
630 
631 static int
632 set_ctype(Lc_category_t* cp)
633 {
634 	ast.mb_sync = 0;
635 	if (locales[cp->internal]->flags & LC_debug)
636 	{
637 		ast.mb_cur_max = DEBUG_MB_CUR_MAX;
638 		ast.mb_len = debug_mblen;
639 		ast.mb_towc = debug_mbtowc;
640 		ast.mb_width = debug_wcwidth;
641 		ast.mb_conv = debug_wctomb;
642 	}
643 	else if ((locales[cp->internal]->flags & LC_default) || (ast.mb_cur_max = MB_CUR_MAX) <= 1 || !(ast.mb_len = mblen) || !(ast.mb_towc = mbtowc))
644 	{
645 		ast.mb_cur_max = 1;
646 		ast.mb_len = 0;
647 		ast.mb_towc = 0;
648 		ast.mb_width = default_wcwidth;
649 		ast.mb_conv = 0;
650 	}
651 	else if ((locales[cp->internal]->flags & LC_utf8) && !(ast.locale.set & AST_LC_test))
652 	{
653 		ast.mb_cur_max = 6;
654 		ast.mb_len = utf8_mblen;
655 		ast.mb_towc = utf8_mbtowc;
656 		if (!(ast.mb_width = wcwidth))
657 			ast.mb_width = default_wcwidth;
658 		ast.mb_conv = utf8_wctomb;
659 	}
660 	else
661 	{
662 		if (!(ast.mb_width = wcwidth))
663 			ast.mb_width = default_wcwidth;
664 		ast.mb_conv = wctomb;
665 #ifdef mb_state
666 		{
667 			/*
668 			 * check for sjis that translates unshifted 7 bit ascii!
669 			 */
670 
671 			char*	s;
672 			char	buf[2];
673 
674 			mbinit();
675 			buf[1] = 0;
676 			*(s = buf) = '\\';
677 			if (mbchar(s) != buf[0])
678 			{
679 				memcpy(mb_state, mb_state_zero, sizeof(mbstate_t));
680 				ast.mb_towc = sjis_mbtowc;
681 			}
682 		}
683 #endif
684 	}
685 	if (ast.locale.set & (AST_LC_debug|AST_LC_setlocale))
686 		sfprintf(sfstderr, "locale info %17s MB_CUR_MAX=%d%s%s%s%s\n"
687 			, cp->name
688 			, ast.mb_cur_max
689 			, ast.mb_len == debug_mblen ? " debug_mblen" : ast.mb_len == mblen ? " mblen" : ""
690 			, ast.mb_towc == debug_mbtowc ? " debug_mbtowc" : ast.mb_towc == mbtowc ? " mbtowc"
691 #ifdef mb_state
692 				: ast.mb_towc == sjis_mbtowc ? " sjis_mbtowc"
693 #endif
694 				: ""
695 			, ast.mb_width == debug_wcwidth ? " debug_wcwidth" : ast.mb_width == wcwidth ? " wcwidth" : ast.mb_width == default_wcwidth ? " default_wcwidth" : ""
696 			, ast.mb_conv == debug_wctomb ? " debug_wctomb" : ast.mb_conv == wctomb ? " wctomb" : ""
697 			);
698 	return 0;
699 }
700 
701 /*
702  * called when LC_NUMERIC initialized or changes
703  */
704 
705 static int
706 set_numeric(Lc_category_t* cp)
707 {
708 	register int		category = cp->internal;
709 	struct lconv*		lp;
710 	Lc_numeric_t*		dp;
711 
712 	static Lc_numeric_t	default_numeric = { '.', -1 };
713 
714 	if (!LCINFO(category)->data)
715 	{
716 		if ((lp = localeconv()) && (dp = newof(0, Lc_numeric_t, 1, 0)))
717 		{
718 			dp->decimal = lp->decimal_point && *lp->decimal_point ? *(unsigned char*)lp->decimal_point : '.';
719 			dp->thousand = lp->thousands_sep && *lp->thousands_sep ? *(unsigned char*)lp->thousands_sep : -1;
720 		}
721 		else
722 			dp = &default_numeric;
723 		LCINFO(category)->data = (void*)dp;
724 		if (ast.locale.set & (AST_LC_debug|AST_LC_setlocale))
725 			sfprintf(sfstderr, "locale info %17s decimal '%c' thousands '%c'\n", lc_categories[category].name, dp->decimal, dp->thousand >= 0 ? dp->thousand : 'X');
726 	}
727 	return 0;
728 }
729 
730 /*
731  * this table is indexed by AST_LC_[A-Z]*
732  */
733 
734 Lc_category_t		lc_categories[] =
735 {
736 { "LC_ALL",           LC_ALL,           AST_LC_ALL,           0               },
737 { "LC_COLLATE",       LC_COLLATE,       AST_LC_COLLATE,       set_collate     },
738 { "LC_CTYPE",         LC_CTYPE,         AST_LC_CTYPE,         set_ctype       },
739 { "LC_MESSAGES",      LC_MESSAGES,      AST_LC_MESSAGES,      0               },
740 { "LC_MONETARY",      LC_MONETARY,      AST_LC_MONETARY,      0               },
741 { "LC_NUMERIC",       LC_NUMERIC,       AST_LC_NUMERIC,       set_numeric     },
742 { "LC_TIME",          LC_TIME,          AST_LC_TIME,          0               },
743 { "LC_IDENTIFICATION",LC_IDENTIFICATION,AST_LC_IDENTIFICATION,0               },
744 { "LC_ADDRESS",       LC_ADDRESS,       AST_LC_ADDRESS,       0               },
745 { "LC_NAME",          LC_NAME,          AST_LC_NAME,          0               },
746 { "LC_TELEPHONE",     LC_TELEPHONE,     AST_LC_TELEPHONE,     0               },
747 { "LC_XLITERATE",     LC_XLITERATE,     AST_LC_XLITERATE,     0               },
748 { "LC_MEASUREMENT",   LC_MEASUREMENT,   AST_LC_MEASUREMENT,   0               },
749 { "LC_PAPER",         LC_PAPER,         AST_LC_PAPER,         0               },
750 };
751 
752 static Lc_t*		lang;
753 static Lc_t*		lc_all;
754 
755 typedef struct Unamval_s
756 {
757 	char*		name;
758 	unsigned int	value;
759 } Unamval_t;
760 
761 static const Unamval_t	options[] =
762 {
763 	"debug",		AST_LC_debug,
764 	"find",			AST_LC_find,
765 	"setlocale",		AST_LC_setlocale,
766 	"test",			AST_LC_test,
767 	"translate",		AST_LC_translate,
768 	0,			0
769 };
770 
771 /*
772  * called by stropt() to set options
773  */
774 
775 static int
776 setopt(void* a, const void* p, int n, const char* v)
777 {
778 	if (p)
779 	{
780 		if (n)
781 			ast.locale.set |= ((Unamval_t*)p)->value;
782 		else
783 			ast.locale.set &= ~((Unamval_t*)p)->value;
784 	}
785 	return 0;
786 }
787 
788 #if !_lib_setlocale
789 
790 #define setlocale(c,l)		default_setlocale(c,l)
791 
792 static char*
793 default_setlocale(int category, const char* locale)
794 {
795 	Lc_t*		lc;
796 
797 	if (locale)
798 	{
799 		if (!(lc = lcmake(locale)) || !(lc->flags & LC_default))
800 			return 0;
801 		locales[0]->flags &= ~lc->flags;
802 		locales[1]->flags &= ~lc->flags;
803 		return lc->name;
804 	}
805 	return (locales[1]->flags & (1<<category)) ? locales[1]->name : locales[0]->name;
806 }
807 
808 #endif
809 
810 /*
811  * set a single AST_LC_* locale category
812  * the caller must validate category
813  * lc==0 restores the previous state
814  */
815 
816 static char*
817 single(int category, Lc_t* lc, unsigned int flags)
818 {
819 	const char*	sys;
820 	int		i;
821 
822 	if (flags & (LC_setenv|LC_setlocale))
823 	{
824 		if (!(ast.locale.set & AST_LC_internal))
825 			lc_categories[category].prev = lc;
826 		if ((flags & LC_setenv) && lc_all && locales[category])
827 			return (char*)locales[category]->name;
828 	}
829 	if (!lc && (!(lc_categories[category].flags & LC_setlocale) || !(lc = lc_categories[category].prev)) && !(lc = lc_all) && !(lc = lc_categories[category].prev) && !(lc = lang))
830 		lc = lcmake(NiL);
831 	sys = 0;
832 	if (locales[category] != lc)
833 	{
834 		if (lc_categories[category].external == -lc_categories[category].internal)
835 		{
836 			for (i = 1; i < AST_LC_COUNT; i++)
837 				if (locales[i] == lc)
838 				{
839 					sys = (char*)lc->name;
840 					break;
841 				}
842 		}
843 		else if (lc->flags & (LC_debug|LC_local))
844 			sys = setlocale(lc_categories[category].external, lcmake(NiL)->name);
845 		else if (!(sys = setlocale(lc_categories[category].external, lc->name)) &&
846 			 (streq(lc->name, lc->code) || !(sys = setlocale(lc_categories[category].external, lc->code))) &&
847 			 !streq(lc->code, lc->language->code))
848 				sys = setlocale(lc_categories[category].external, lc->language->code);
849 		if (!sys)
850 		{
851 			/*
852 			 * check for local override
853 			 * currently this means an LC_MESSAGES dir exists
854 			 */
855 
856 			if (!(lc->flags & LC_checked))
857 			{
858 				char	path[PATH_MAX];
859 
860 				if (mcfind(path, lc->code, NiL, LC_MESSAGES, 0))
861 					lc->flags |= LC_local;
862 				lc->flags |= LC_checked;
863 			}
864 			if (!(lc->flags & LC_local))
865 				return 0;
866 			if (lc_categories[category].external != -lc_categories[category].internal)
867 				setlocale(lc_categories[category].external, lcmake(NiL)->name);
868 		}
869 		locales[category] = lc;
870 		if (lc_categories[category].setf && (*lc_categories[category].setf)(&lc_categories[category]))
871 		{
872 			locales[category] = lc_categories[category].prev;
873 			return 0;
874 		}
875 		if ((lc->flags & LC_default) || category == AST_LC_MESSAGES && lc->name[0] == 'e' && lc->name[1] == 'n' && (lc->name[2] == 0 || lc->name[2] == '_' && lc->name[3] == 'U'))
876 			ast.locale.set &= ~(1<<category);
877 		else
878 			ast.locale.set |= (1<<category);
879 	}
880 	else if (lc_categories[category].flags ^ flags)
881 	{
882 		lc_categories[category].flags &= ~(LC_setenv|LC_setlocale);
883 		lc_categories[category].flags |= flags;
884 	}
885 	else
886 		return (char*)lc->name;
887 	if ((ast.locale.set & (AST_LC_debug|AST_LC_setlocale)) && !(ast.locale.set & AST_LC_internal))
888 		sfprintf(sfstderr, "locale set  %17s %16s %16s %16s %s%s\n", lc_categories[category].name, lc->name, sys, lc_categories[category].prev ? lc_categories[category].prev->name : NiL, (lc_categories[category].flags & LC_setlocale) ? "[setlocale]" : "", (lc_categories[category].flags & LC_setenv) ? "[setenv]" : "");
889 	return (char*)lc->name;
890 }
891 
892 /*
893  * set composite AST_LC_ALL locale categories
894  * return <0:composite-error 0:not-composite >0:composite-ok
895  */
896 
897 static int
898 composite(register const char* s, int initialize)
899 {
900 	register const char*	t;
901 	register int		i;
902 	register int		j;
903 	register int		k;
904 	int			n;
905 	int			m;
906 	const char*		w;
907 	Lc_t*			p;
908 	int			cat[AST_LC_COUNT];
909 	int			stk[AST_LC_COUNT];
910 	char			buf[PATH_MAX / 2];
911 
912 	k = n = 0;
913 	while (s[0] == 'L' && s[1] == 'C' && s[2] == '_')
914 	{
915 		n++;
916 		j = 0;
917 		w = s;
918 		for (i = 1; i < AST_LC_COUNT; i++)
919 		{
920 			s = w;
921 			t = lc_categories[i].name;
922 			while (*t && *s++ == *t++);
923 			if (!*t && *s++ == '=')
924 			{
925 				cat[j++] = i;
926 				if (s[0] != 'L' || s[1] != 'C' || s[2] != '_')
927 					break;
928 				w = s;
929 				i = -1;
930 			}
931 		}
932 		for (s = w; *s && *s != '='; s++);
933 		if (!*s)
934 		{
935 			for (i = 0; i < k; i++)
936 				single(stk[i], NiL, 0);
937 			return -1;
938 		}
939 		w = ++s;
940 		for (;;)
941 		{
942 			if (!*s)
943 			{
944 				p = lcmake(w);
945 				break;
946 			}
947 			else if (*s++ == ';')
948 			{
949 				if ((m = s - w - 1) >= sizeof(buf))
950 					m = sizeof(buf) - 1;
951 				memcpy(buf, w, m);
952 				buf[m] = 0;
953 				p = lcmake(buf);
954 				break;
955 			}
956 		}
957 		for (i = 0; i < j; i++)
958 			if (!initialize)
959 			{
960 				if (!single(cat[i], p, 0))
961 				{
962 					for (i = 0; i < k; i++)
963 						single(stk[i], NiL, 0);
964 					return -1;
965 				}
966 				stk[k++] = cat[i];
967 			}
968 			else if (!lc_categories[cat[i]].prev && !(ast.locale.set & AST_LC_internal))
969 				lc_categories[cat[i]].prev = p;
970 	}
971 	while (s[0] == '/' && s[1] && n < (AST_LC_COUNT - 1))
972 	{
973 		n++;
974 		for (w = ++s; *s && *s != '/'; s++);
975 		if (!*s)
976 			p = lcmake(w);
977 		else
978 		{
979 			if ((j = s - w - 1) >= sizeof(buf))
980 				j = sizeof(buf) - 1;
981 			memcpy(buf, w, j);
982 			buf[j] = 0;
983 			p = lcmake(buf);
984 		}
985 		if (!initialize)
986 		{
987 			if (!single(n, p, 0))
988 			{
989 				for (i = 1; i < n; i++)
990 					single(i, NiL, 0);
991 				return -1;
992 			}
993 		}
994 		else if (!lc_categories[n].prev && !(ast.locale.set & AST_LC_internal))
995 			lc_categories[n].prev = p;
996 	}
997 	return n;
998 }
999 
1000 /*
1001  * setlocale() intercept
1002  *
1003  * locale:
1004  *	0	query
1005  *	""	initialize from environment (if LC_ALL)
1006  *	""	AST_LC_setenv: value unset (defer to LANG)
1007  *	"*"	AST_LC_setenv: value set (defer to LC_ALL)
1008  *	*	set (override LC_ALL)
1009  */
1010 
1011 char*
1012 _ast_setlocale(int category, const char* locale)
1013 {
1014 	register char*		s;
1015 	register int		i;
1016 	register int		j;
1017 	int			k;
1018 	int			f;
1019 	Lc_t*			p;
1020 	int			cat[AST_LC_COUNT];
1021 
1022 	static Sfio_t*		sp;
1023 	static int		initialized;
1024 	static const char	local[] = "local";
1025 
1026 	if ((category = lcindex(category, 0)) < 0)
1027 		return 0;
1028 	if (!locale)
1029 	{
1030 		/*
1031 		 * return the current state
1032 		 */
1033 
1034 	compose:
1035 		if (category != AST_LC_ALL && category != AST_LC_LANG)
1036 			return (char*)locales[category]->name;
1037 		if (!sp && !(sp = sfstropen()))
1038 			return 0;
1039 		for (i = 1; i < AST_LC_COUNT; i++)
1040 			cat[i] = -1;
1041 		for (i = 1, k = 0; i < AST_LC_COUNT; i++)
1042 			if (cat[i] < 0)
1043 			{
1044 				k++;
1045 				cat[i] = i;
1046 				for (j = i + 1; j < AST_LC_COUNT; j++)
1047 					if (locales[j] == locales[i])
1048 						cat[j] = i;
1049 			}
1050 		if (k == 1)
1051 			return (char*)locales[1]->name;
1052 		for (i = 1; i < AST_LC_COUNT; i++)
1053 			if (cat[i] >= 0 && !(locales[i]->flags & LC_default))
1054 			{
1055 				if (sfstrtell(sp))
1056 					sfprintf(sp, ";");
1057 				for (j = i, k = cat[i]; j < AST_LC_COUNT; j++)
1058 					if (cat[j] == k)
1059 					{
1060 						cat[j] = -1;
1061 						sfprintf(sp, "%s=", lc_categories[j].name);
1062 					}
1063 				sfprintf(sp, "%s", locales[i]->name);
1064 			}
1065 		if (!sfstrtell(sp))
1066 			return (char*)locales[0]->name;
1067 		return sfstruse(sp);
1068 	}
1069 	if (!ast.locale.serial++)
1070 	{
1071 		stropt(getenv("LC_OPTIONS"), options, sizeof(*options), setopt, NiL);
1072 		initialized = 0;
1073 	}
1074 	if ((ast.locale.set & (AST_LC_debug|AST_LC_setlocale)) && !(ast.locale.set & AST_LC_internal))
1075 		sfprintf(sfstderr, "locale user %17s %16s  %s%s\n", category == AST_LC_LANG ? "LANG" : lc_categories[category].name, locale && !*locale ? "''" : locale, initialized ? "" : "[initial]", (ast.locale.set & AST_LC_setenv) ? "[setenv]" : "");
1076 	if (ast.locale.set & AST_LC_setenv)
1077 	{
1078 		f = LC_setenv;
1079 		p = *locale ? lcmake(locale) : (Lc_t*)0;
1080 	}
1081 	else if (*locale)
1082 	{
1083 		f = LC_setlocale;
1084 		p = lcmake(locale);
1085 	}
1086 	else if (category == AST_LC_ALL)
1087 	{
1088 		if (!initialized)
1089 		{
1090 			char*	u;
1091 			char	tmp[256];
1092 
1093 			/*
1094 			 * initialize from the environment
1095 			 * precedence determined by X/Open
1096 			 */
1097 
1098 			u = 0;
1099 			if ((s = getenv("LANG")) && *s)
1100 			{
1101 				if (streq(s, local) && (u || (u = native_locale(locale, tmp, sizeof(tmp)))))
1102 					s = u;
1103 				lang = lcmake(s);
1104 			}
1105 			else
1106 				lang = 0;
1107 			if ((s = getenv("LC_ALL")) && *s)
1108 			{
1109 				if (streq(s, local) && (u || (u = native_locale(locale, tmp, sizeof(tmp)))))
1110 					s = u;
1111 				lc_all = lcmake(s);
1112 			}
1113 			else
1114 				lc_all = 0;
1115 			for (i = 1; i < AST_LC_COUNT; i++)
1116 				if (lc_categories[i].flags & LC_setlocale)
1117 					/* explicitly set by setlocale() */;
1118 				else if ((s = getenv(lc_categories[i].name)) && *s)
1119 				{
1120 					if (streq(s, local) && (u || (u = native_locale(locale, tmp, sizeof(tmp)))))
1121 						s = u;
1122 					lc_categories[i].prev = lcmake(s);
1123 				}
1124 				else
1125 					lc_categories[i].prev = 0;
1126 			for (i = 1; i < AST_LC_COUNT; i++)
1127 				if (!single(i, lc_all && !(lc_categories[i].flags & LC_setlocale) ? lc_all : lc_categories[i].prev, 0))
1128 				{
1129 					while (i--)
1130 						single(i, NiL, 0);
1131 					return 0;
1132 				}
1133 			if (ast.locale.set & AST_LC_debug)
1134 				for (i = 1; i < AST_LC_COUNT; i++)
1135 					sfprintf(sfstderr, "locale env  %17s %16s %16s %16s\n", lc_categories[i].name, locales[i]->name, "", lc_categories[i].prev ? lc_categories[i].prev->name : (char*)0);
1136 			initialized = 1;
1137 		}
1138 		goto compose;
1139 	}
1140 	else if (category == AST_LC_LANG || !(p = lc_categories[category].prev))
1141 	{
1142 		f = 0;
1143 		p = lcmake("C");
1144 	}
1145 	else
1146 		f = 0;
1147 	if (category == AST_LC_LANG)
1148 	{
1149 		if (lang != p)
1150 		{
1151 			lang = p;
1152 			if (!lc_all)
1153 				for (i = 1; i < AST_LC_COUNT; i++)
1154 					if (!single(i, lc_categories[i].prev, 0))
1155 					{
1156 						while (i--)
1157 							single(i, NiL, 0);
1158 						return 0;
1159 					}
1160 		}
1161 	}
1162 	else if (category != AST_LC_ALL)
1163 	{
1164 		if (f || !lc_all)
1165 			return single(category, p, f);
1166 		if (p && !(ast.locale.set & AST_LC_internal))
1167 			lc_categories[category].prev = p;
1168 		return (char*)locales[category]->name;
1169 	}
1170 	else if (composite(locale, 0) < 0)
1171 		return 0;
1172 	else if (lc_all != p)
1173 	{
1174 		lc_all = p;
1175 		for (i = 1; i < AST_LC_COUNT; i++)
1176 			if (!single(i, lc_all && !(lc_categories[i].flags & LC_setlocale) ? lc_all : lc_categories[i].prev, 0))
1177 			{
1178 				while (i--)
1179 					single(i, NiL, 0);
1180 				return 0;
1181 			}
1182 	}
1183 	goto compose;
1184 }
1185