xref: /illumos-gate/usr/src/contrib/ast/src/lib/libast/regex/regcoll.c (revision b30d193948be5a7794d7ae3ba0ed9c2f72c88e0f)
1*b30d1939SAndy Fiddaman /***********************************************************************
2*b30d1939SAndy Fiddaman *                                                                      *
3*b30d1939SAndy Fiddaman *               This software is part of the ast package               *
4*b30d1939SAndy Fiddaman *          Copyright (c) 1985-2012 AT&T Intellectual Property          *
5*b30d1939SAndy Fiddaman *                      and is licensed under the                       *
6*b30d1939SAndy Fiddaman *                 Eclipse Public License, Version 1.0                  *
7*b30d1939SAndy Fiddaman *                    by AT&T Intellectual Property                     *
8*b30d1939SAndy Fiddaman *                                                                      *
9*b30d1939SAndy Fiddaman *                A copy of the License is available at                 *
10*b30d1939SAndy Fiddaman *          http://www.eclipse.org/org/documents/epl-v10.html           *
11*b30d1939SAndy Fiddaman *         (with md5 checksum b35adb5213ca9657e911e9befb180842)         *
12*b30d1939SAndy Fiddaman *                                                                      *
13*b30d1939SAndy Fiddaman *              Information and Software Systems Research               *
14*b30d1939SAndy Fiddaman *                            AT&T Research                             *
15*b30d1939SAndy Fiddaman *                           Florham Park NJ                            *
16*b30d1939SAndy Fiddaman *                                                                      *
17*b30d1939SAndy Fiddaman *                 Glenn Fowler <gsf@research.att.com>                  *
18*b30d1939SAndy Fiddaman *                  David Korn <dgk@research.att.com>                   *
19*b30d1939SAndy Fiddaman *                   Phong Vo <kpv@research.att.com>                    *
20*b30d1939SAndy Fiddaman *                                                                      *
21*b30d1939SAndy Fiddaman ***********************************************************************/
22*b30d1939SAndy Fiddaman #pragma prototyped
23*b30d1939SAndy Fiddaman /*
24*b30d1939SAndy Fiddaman  * regex collation symbol support
25*b30d1939SAndy Fiddaman  */
26*b30d1939SAndy Fiddaman 
27*b30d1939SAndy Fiddaman #include "reglib.h"
28*b30d1939SAndy Fiddaman 
29*b30d1939SAndy Fiddaman /*
30*b30d1939SAndy Fiddaman  * return the collating symbol delimited by [c c], where c is either '=' or '.'
31*b30d1939SAndy Fiddaman  * s points to the first char after the initial [
32*b30d1939SAndy Fiddaman  * if e!=0 it is set to point to the next char in s on return
33*b30d1939SAndy Fiddaman  *
34*b30d1939SAndy Fiddaman  * the collating symbol is converted to multibyte in <buf,size>
35*b30d1939SAndy Fiddaman  * the return value is:
36*b30d1939SAndy Fiddaman  *	-1	syntax error / invalid collating element
37*b30d1939SAndy Fiddaman  *	>=0	size with 0-terminated mb character (*wc != 0)
38*b30d1939SAndy Fiddaman  *		or collating element (*wc == 0) in buf
39*b30d1939SAndy Fiddaman  */
40*b30d1939SAndy Fiddaman 
41*b30d1939SAndy Fiddaman int
regcollate(register const char * s,char ** e,char * buf,size_t size,wchar_t * wc)42*b30d1939SAndy Fiddaman regcollate(register const char* s, char** e, char* buf, size_t size, wchar_t* wc)
43*b30d1939SAndy Fiddaman {
44*b30d1939SAndy Fiddaman 	register int			c;
45*b30d1939SAndy Fiddaman 	register char*			b;
46*b30d1939SAndy Fiddaman 	register char*			x;
47*b30d1939SAndy Fiddaman 	const char*			t;
48*b30d1939SAndy Fiddaman 	int				i;
49*b30d1939SAndy Fiddaman 	int				r;
50*b30d1939SAndy Fiddaman 	int				term;
51*b30d1939SAndy Fiddaman 	wchar_t				w;
52*b30d1939SAndy Fiddaman 	char				xfm[256];
53*b30d1939SAndy Fiddaman 	char				tmp[sizeof(xfm)];
54*b30d1939SAndy Fiddaman 
55*b30d1939SAndy Fiddaman 	if (size < 2 || (term = *s) != '.' && term != '=' || !*++s || *s == term && *(s + 1) == ']')
56*b30d1939SAndy Fiddaman 		goto nope;
57*b30d1939SAndy Fiddaman 	t = s;
58*b30d1939SAndy Fiddaman 	w = mbchar(s);
59*b30d1939SAndy Fiddaman 	if ((r = (s - t)) > 1)
60*b30d1939SAndy Fiddaman 	{
61*b30d1939SAndy Fiddaman 		if (*s++ != term || *s++ != ']')
62*b30d1939SAndy Fiddaman 			goto oops;
63*b30d1939SAndy Fiddaman 		goto done;
64*b30d1939SAndy Fiddaman 	}
65*b30d1939SAndy Fiddaman 	if (*s == term && *(s + 1) == ']')
66*b30d1939SAndy Fiddaman 	{
67*b30d1939SAndy Fiddaman 		s += 2;
68*b30d1939SAndy Fiddaman 		goto done;
69*b30d1939SAndy Fiddaman 	}
70*b30d1939SAndy Fiddaman 	b = buf;
71*b30d1939SAndy Fiddaman 	x = buf + size - 2;
72*b30d1939SAndy Fiddaman 	s = t;
73*b30d1939SAndy Fiddaman 	for (;;)
74*b30d1939SAndy Fiddaman 	{
75*b30d1939SAndy Fiddaman 		if (!(c = *s++))
76*b30d1939SAndy Fiddaman 			goto oops;
77*b30d1939SAndy Fiddaman 		if (c == term)
78*b30d1939SAndy Fiddaman 		{
79*b30d1939SAndy Fiddaman 			if (!(c = *s++))
80*b30d1939SAndy Fiddaman 				goto oops;
81*b30d1939SAndy Fiddaman 			if (c != term)
82*b30d1939SAndy Fiddaman 			{
83*b30d1939SAndy Fiddaman 				if (c != ']')
84*b30d1939SAndy Fiddaman 					goto oops;
85*b30d1939SAndy Fiddaman 				break;
86*b30d1939SAndy Fiddaman 			}
87*b30d1939SAndy Fiddaman 		}
88*b30d1939SAndy Fiddaman 		if (b < x)
89*b30d1939SAndy Fiddaman 			*b++ = c;
90*b30d1939SAndy Fiddaman 	}
91*b30d1939SAndy Fiddaman 	r = s - t - 2;
92*b30d1939SAndy Fiddaman 	w = 0;
93*b30d1939SAndy Fiddaman 	if (b >= x)
94*b30d1939SAndy Fiddaman 		goto done;
95*b30d1939SAndy Fiddaman 	*b = 0;
96*b30d1939SAndy Fiddaman 	for (i = 0; i < r && i < sizeof(tmp) - 1; i++)
97*b30d1939SAndy Fiddaman 		tmp[i] = '0';
98*b30d1939SAndy Fiddaman 	tmp[i] = 0;
99*b30d1939SAndy Fiddaman 	if (mbxfrm(xfm, buf, sizeof(xfm)) >= mbxfrm(xfm, tmp, sizeof(xfm)))
100*b30d1939SAndy Fiddaman 		goto nope;
101*b30d1939SAndy Fiddaman 	t = (const char*)buf;
102*b30d1939SAndy Fiddaman  done:
103*b30d1939SAndy Fiddaman 	if (r <= size && (char*)t != buf)
104*b30d1939SAndy Fiddaman 	{
105*b30d1939SAndy Fiddaman 		memcpy(buf, t, r);
106*b30d1939SAndy Fiddaman 		if (r < size)
107*b30d1939SAndy Fiddaman 			buf[r] = 0;
108*b30d1939SAndy Fiddaman 	}
109*b30d1939SAndy Fiddaman 	if (wc)
110*b30d1939SAndy Fiddaman 		*wc = w;
111*b30d1939SAndy Fiddaman 	if (e)
112*b30d1939SAndy Fiddaman 		*e = (char*)s;
113*b30d1939SAndy Fiddaman 	return r;
114*b30d1939SAndy Fiddaman  oops:
115*b30d1939SAndy Fiddaman  	s--;
116*b30d1939SAndy Fiddaman  nope:
117*b30d1939SAndy Fiddaman 	if (e)
118*b30d1939SAndy Fiddaman 		*e = (char*)s;
119*b30d1939SAndy Fiddaman 	return -1;
120*b30d1939SAndy Fiddaman }
121