xref: /titanic_51/usr/src/lib/libast/common/string/tokscan.c (revision 29493bd8e037cbaea9095b34172305abb589cb6b)
1 /***********************************************************************
2 *                                                                      *
3 *               This software is part of the ast package               *
4 *           Copyright (c) 1985-2007 AT&T Knowledge Ventures            *
5 *                      and is licensed under the                       *
6 *                  Common Public License, Version 1.0                  *
7 *                      by AT&T Knowledge Ventures                      *
8 *                                                                      *
9 *                A copy of the License is available at                 *
10 *            http://www.opensource.org/licenses/cpl1.0.txt             *
11 *         (with md5 checksum 059e8cd6165cb4c31e351f2b69388fd9)         *
12 *                                                                      *
13 *              Information and Software Systems Research               *
14 *                            AT&T Research                             *
15 *                           Florham Park NJ                            *
16 *                                                                      *
17 *                 Glenn Fowler <gsf@research.att.com>                  *
18 *                  David Korn <dgk@research.att.com>                   *
19 *                   Phong Vo <kpv@research.att.com>                    *
20 *                                                                      *
21 ***********************************************************************/
22 #pragma prototyped
23 /*
24  * Glenn Fowler
25  * AT&T Research
26  *
27  * scan s for tokens in fmt
28  * s modified in place and not restored
29  * if nxt!=0 then it will point to the first unread char in s
30  * the number of scanned tokens is returned
31  * -1 returned if s was not empty and fmt failed to match
32  *
33  * ' ' in fmt matches 0 or more {space,tab}
34  * '\n' in fmt eats remainder of current line
35  * "..." and '...' quotes interpreted
36  * newline is equivalent to end of buf except when quoted
37  * \\ quotes following char
38  *
39  * message support for %s and %v data
40  *
41  *	(5:12345)		fixed length strings, ) may be \t
42  *	(null)			NiL
43  *
44  * "..." and '...' may span \n, and \\n is the line splice
45  * quoted '\r' translated to '\n'
46  * otherwise tokenizing is unconditionally terminated by '\n'
47  *
48  * a null arg pointer skips that arg
49  *
50  *	%c		char
51  *	%[hl]d		[short|int|long] base 10
52  *	%f		double
53  *	%g		double
54  *	%[hl]n		[short|int|long] C-style base
55  *	%[hl]o		[short|int|long] base 8
56  *	%s		string
57  *	%[hl]u		same as %[hl]n
58  *	%v		argv, elements
59  *	%[hl]x		[short|int|long] base 16
60  *
61  * unmatched char args are set to "", int args to 0
62  */
63 
64 #include <ast.h>
65 #include <tok.h>
66 
67 static char	empty[1];
68 
69 /*
70  * get one string token into p
71  */
72 
73 static char*
74 lextok(register char* s, register int c, char** p, int* n)
75 {
76 	register char*	t;
77 	register int	q;
78 	char*		b;
79 	char*		u;
80 
81 	if (*s == '(' && (!c || c == ' ' || c == '\n'))
82 	{
83 		q = strtol(s + 1, &b, 10);
84 		if (*b == ':')
85 		{
86 			if (*(t = ++b + q) == ')' || *t == '\t')
87 			{
88 				s = t;
89 				*s++ = 0;
90 				goto end;
91 			}
92 		}
93 		else if (strneq(b, "null)", 5))
94 		{
95 			s = b + 5;
96 			b = 0;
97 			goto end;
98 		}
99 	}
100 	b = s;
101 	q = 0;
102 	t = 0;
103 	for (;;)
104 	{
105 		if (!*s || !q && *s == '\n')
106 		{
107 			if (!q)
108 			{
109 				if (!c || c == ' ' || c == '\n') (*n)++;
110 				else
111 				{
112 					s = b;
113 					b = empty;
114 					break;
115 				}
116 			}
117 			if (t) *t = 0;
118 			break;
119 		}
120 		else if (*s == '\\')
121 		{
122 			u = s;
123 			if (!*++s || *s == '\n' && (!*++s || *s == '\n')) continue;
124 			if (p)
125 			{
126 				if (b == u) b = s;
127 				else if (!t) t = u;
128 			}
129 		}
130 		else if (q)
131 		{
132 			if (*s == q)
133 			{
134 				q = 0;
135 				if (!t) t = s;
136 				s++;
137 				continue;
138 			}
139 			else if (*s == '\r') *s = '\n';
140 		}
141 		else if (*s == '"' || *s == '\'')
142 		{
143 			q = *s++;
144 			if (p)
145 			{
146 				if (b == (s - 1)) b = s;
147 				else if (!t) t = s - 1;
148 			}
149 			continue;
150 		}
151 		else if (*s == c || c == ' ' && *s == '\t')
152 		{
153 			*s++ = 0;
154 			if (t) *t = 0;
155 		end:
156 			if (c == ' ') while (*s == ' ' || *s == '\t') s++;
157 			(*n)++;
158 			break;
159 		}
160 		if (t) *t++ = *s;
161 		s++;
162 	}
163 	if (p) *p = b;
164 	return(s);
165 }
166 
167 /*
168  * scan entry
169  */
170 
171 int
172 tokscan(register char* s, char** nxt, const char* fmt, ...)
173 {
174 	register int	c;
175 	register char*	f;
176 	int		num = 0;
177 	char*		skip = 0;
178 	int		q;
179 	int		onum;
180 	long		val;
181 	double		dval;
182 	va_list		ap;
183 	char*		p_char;
184 	double*		p_double;
185 	int*		p_int;
186 	long*		p_long;
187 	short*		p_short;
188 	char**		p_string;
189 	char*		prv_f = 0;
190 	va_list		prv_ap;
191 
192 	va_start(ap, fmt);
193 	if (!*s || *s == '\n')
194 	{
195 		skip = s;
196 		s = empty;
197 	}
198 	f = (char*)fmt;
199 	for (;;) switch (c = *f++)
200 	{
201 	case 0:
202 		if (f = prv_f)
203 		{
204 			prv_f = 0;
205 			/* prv_ap value is guarded by prv_f */
206 			va_copy(ap, prv_ap);
207 			continue;
208 		}
209 		goto done;
210 	case ' ':
211 		while (*s == ' ' || *s == '\t') s++;
212 		break;
213 	case '%':
214 		onum = num;
215 		switch (c = *f++)
216 		{
217 		case 'h':
218 		case 'l':
219 			q = c;
220 			c = *f++;
221 			break;
222 		default:
223 			q = 0;
224 			break;
225 		}
226 		switch (c)
227 		{
228 		case 0:
229 		case '%':
230 			f--;
231 			continue;
232 		case ':':
233 			prv_f = f;
234 			f = va_arg(ap, char*);
235 			va_copy(prv_ap, ap);
236 			va_copy(ap, va_listval(va_arg(ap, va_listarg)));
237 			continue;
238 		case 'c':
239 			p_char = va_arg(ap, char*);
240 			if (!(c = *s) || c == '\n')
241 			{
242 				if (p_char) *p_char = 0;
243 			}
244 			else
245 			{
246 				if (p_char) *p_char = c;
247 				s++;
248 				num++;
249 			}
250 			break;
251 		case 'd':
252 		case 'n':
253 		case 'o':
254 		case 'u':
255 		case 'x':
256 			switch (c)
257 			{
258 			case 'd':
259 				c = 10;
260 				break;
261 			case 'n':
262 			case 'u':
263 				c = 0;
264 				break;
265 			case 'o':
266 				c = 8;
267 				break;
268 			case 'x':
269 				c = 16;
270 				break;
271 			}
272 			if (!*s || *s == '\n')
273 			{
274 				val = 0;
275 				p_char = s;
276 			}
277 			else val = strtol(s, &p_char, c);
278 			switch (q)
279 			{
280 			case 'h':
281 				if (p_short = va_arg(ap, short*)) *p_short = (short)val;
282 				break;
283 			case 'l':
284 				if (p_long = va_arg(ap, long*)) *p_long = val;
285 				break;
286 			default:
287 				if (p_int = va_arg(ap, int*)) *p_int = (int)val;
288 				break;
289 			}
290 			if (s != p_char)
291 			{
292 				s = p_char;
293 				num++;
294 			}
295 			break;
296 		case 'f':
297 		case 'g':
298 			if (!*s || *s == '\n')
299 			{
300 				dval = 0;
301 				p_char = s;
302 			}
303 			else dval = strtod(s, &p_char);
304 			if (p_double = va_arg(ap, double*)) *p_double = dval;
305 			if (s != p_char)
306 			{
307 				s = p_char;
308 				num++;
309 			}
310 			break;
311 		case 's':
312 			p_string = va_arg(ap, char**);
313 			if (q = *f) f++;
314 			if (!*s || *s == '\n')
315 			{
316 				if (p_string) *p_string = s;
317 			}
318 			else s = lextok(s, q, p_string, &num);
319 			break;
320 		case 'v':
321 			p_string = va_arg(ap, char**);
322 			c = va_arg(ap, int);
323 			if (q = *f) f++;
324 			if ((!*s || *s == '\n') && p_string)
325 			{
326 				*p_string = 0;
327 				p_string = 0;
328 			}
329 			while (*s && *s != '\n' && --c > 0)
330 			{
331 				s = lextok(s, q, p_string, &num);
332 				if (p_string) p_string++;
333 			}
334 			if (p_string) *p_string = 0;
335 			break;
336 		}
337 		if (skip) num = onum;
338 		else if (num == onum)
339 		{
340 			if (!num) num = -1;
341 			skip = s;
342 			s = empty;
343 		}
344 		break;
345 	case '\n':
346 		goto done;
347 	default:
348 		if ((*s++ != c) && !skip)
349 		{
350 			skip = s - 1;
351 			s = empty;
352 		}
353 		break;
354 	}
355  done:
356 	va_end(ap);
357 	if (*s == '\n') *s++ = 0;
358 	if (nxt) *nxt = skip ? skip : s;
359 	return(num);
360 }
361