xref: /illumos-gate/usr/src/contrib/ast/src/lib/libast/regex/regcoll.c (revision ceab728f83b0af9260d2d3fb69014f3781af2101)
1 /***********************************************************************
2 *                                                                      *
3 *               This software is part of the ast package               *
4 *          Copyright (c) 1985-2012 AT&T Intellectual Property          *
5 *                      and is licensed under the                       *
6 *                 Eclipse Public License, Version 1.0                  *
7 *                    by AT&T Intellectual Property                     *
8 *                                                                      *
9 *                A copy of the License is available at                 *
10 *          http://www.eclipse.org/org/documents/epl-v10.html           *
11 *         (with md5 checksum b35adb5213ca9657e911e9befb180842)         *
12 *                                                                      *
13 *              Information and Software Systems Research               *
14 *                            AT&T Research                             *
15 *                           Florham Park NJ                            *
16 *                                                                      *
17 *                 Glenn Fowler <gsf@research.att.com>                  *
18 *                  David Korn <dgk@research.att.com>                   *
19 *                   Phong Vo <kpv@research.att.com>                    *
20 *                                                                      *
21 ***********************************************************************/
22 #pragma prototyped
23 /*
24  * regex collation symbol support
25  */
26 
27 #include "reglib.h"
28 
29 /*
30  * return the collating symbol delimited by [c c], where c is either '=' or '.'
31  * s points to the first char after the initial [
32  * if e!=0 it is set to point to the next char in s on return
33  *
34  * the collating symbol is converted to multibyte in <buf,size>
35  * the return value is:
36  *	-1	syntax error / invalid collating element
37  *	>=0	size with 0-terminated mb character (*wc != 0)
38  *		or collating element (*wc == 0) in buf
39  */
40 
41 int
42 regcollate(register const char* s, char** e, char* buf, size_t size, wchar_t* wc)
43 {
44 	register int			c;
45 	register char*			b;
46 	register char*			x;
47 	const char*			t;
48 	int				i;
49 	int				r;
50 	int				term;
51 	wchar_t				w;
52 	char				xfm[256];
53 	char				tmp[sizeof(xfm)];
54 
55 	if (size < 2 || (term = *s) != '.' && term != '=' || !*++s || *s == term && *(s + 1) == ']')
56 		goto nope;
57 	t = s;
58 	w = mbchar(s);
59 	if ((r = (s - t)) > 1)
60 	{
61 		if (*s++ != term || *s++ != ']')
62 			goto oops;
63 		goto done;
64 	}
65 	if (*s == term && *(s + 1) == ']')
66 	{
67 		s += 2;
68 		goto done;
69 	}
70 	b = buf;
71 	x = buf + size - 2;
72 	s = t;
73 	for (;;)
74 	{
75 		if (!(c = *s++))
76 			goto oops;
77 		if (c == term)
78 		{
79 			if (!(c = *s++))
80 				goto oops;
81 			if (c != term)
82 			{
83 				if (c != ']')
84 					goto oops;
85 				break;
86 			}
87 		}
88 		if (b < x)
89 			*b++ = c;
90 	}
91 	r = s - t - 2;
92 	w = 0;
93 	if (b >= x)
94 		goto done;
95 	*b = 0;
96 	for (i = 0; i < r && i < sizeof(tmp) - 1; i++)
97 		tmp[i] = '0';
98 	tmp[i] = 0;
99 	if (mbxfrm(xfm, buf, sizeof(xfm)) >= mbxfrm(xfm, tmp, sizeof(xfm)))
100 		goto nope;
101 	t = (const char*)buf;
102  done:
103 	if (r <= size && (char*)t != buf)
104 	{
105 		memcpy(buf, t, r);
106 		if (r < size)
107 			buf[r] = 0;
108 	}
109 	if (wc)
110 		*wc = w;
111 	if (e)
112 		*e = (char*)s;
113 	return r;
114  oops:
115  	s--;
116  nope:
117 	if (e)
118 		*e = (char*)s;
119 	return -1;
120 }
121