xref: /freebsd/contrib/netbsd-tests/lib/libc/regex/main.c (revision 0b3105a37d7adcadcb720112fed4dc4e8040be99)
1 /*	$NetBSD: main.c,v 1.2 2011/09/16 16:13:18 plunky Exp $	*/
2 
3 /*-
4  * Copyright (c) 1993 The NetBSD Foundation, Inc.
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
17  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
18  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
19  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
20  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26  * POSSIBILITY OF SUCH DAMAGE.
27  */
28 
29 #include <assert.h>
30 #include <regex.h>
31 #include <stdio.h>
32 #include <stdlib.h>
33 #include <string.h>
34 #include <unistd.h>
35 
36 #include <sys/types.h>
37 
38 #include "test_regex.h"
39 
40 char *progname;
41 int debug = 0;
42 int line = 0;
43 int status = 0;
44 
45 int copts = REG_EXTENDED;
46 int eopts = 0;
47 regoff_t startoff = 0;
48 regoff_t endoff = 0;
49 
50 static char empty = '\0';
51 
52 static char *eprint(int);
53 static int efind(char *);
54 
55 /*
56  * main - do the simple case, hand off to regress() for regression
57  */
58 int
59 main(int argc, char *argv[])
60 {
61 	regex_t re;
62 #	define	NS	10
63 	regmatch_t subs[NS];
64 	char erbuf[100];
65 	int err;
66 	size_t len;
67 	int c;
68 	int errflg = 0;
69 	int i;
70 	extern int optind;
71 	extern char *optarg;
72 
73 	progname = argv[0];
74 
75 	while ((c = getopt(argc, argv, "c:e:S:E:x")) != -1)
76 		switch (c) {
77 		case 'c':	/* compile options */
78 			copts = options('c', optarg);
79 			break;
80 		case 'e':	/* execute options */
81 			eopts = options('e', optarg);
82 			break;
83 		case 'S':	/* start offset */
84 			startoff = (regoff_t)atoi(optarg);
85 			break;
86 		case 'E':	/* end offset */
87 			endoff = (regoff_t)atoi(optarg);
88 			break;
89 		case 'x':	/* Debugging. */
90 			debug++;
91 			break;
92 		case '?':
93 		default:
94 			errflg++;
95 			break;
96 		}
97 	if (errflg) {
98 		fprintf(stderr, "usage: %s ", progname);
99 		fprintf(stderr, "[-c copt][-C][-d] [re]\n");
100 		exit(2);
101 	}
102 
103 	if (optind >= argc) {
104 		regress(stdin);
105 		exit(status);
106 	}
107 
108 	err = regcomp(&re, argv[optind++], copts);
109 	if (err) {
110 		len = regerror(err, &re, erbuf, sizeof(erbuf));
111 		fprintf(stderr, "error %s, %zd/%zd `%s'\n",
112 			eprint(err), len, (size_t)sizeof(erbuf), erbuf);
113 		exit(status);
114 	}
115 	regprint(&re, stdout);
116 
117 	if (optind >= argc) {
118 		regfree(&re);
119 		exit(status);
120 	}
121 
122 	if (eopts&REG_STARTEND) {
123 		subs[0].rm_so = startoff;
124 		subs[0].rm_eo = strlen(argv[optind]) - endoff;
125 	}
126 	err = regexec(&re, argv[optind], (size_t)NS, subs, eopts);
127 	if (err) {
128 		len = regerror(err, &re, erbuf, sizeof(erbuf));
129 		fprintf(stderr, "error %s, %zd/%zd `%s'\n",
130 			eprint(err), len, (size_t)sizeof(erbuf), erbuf);
131 		exit(status);
132 	}
133 	if (!(copts&REG_NOSUB)) {
134 		len = (int)(subs[0].rm_eo - subs[0].rm_so);
135 		if (subs[0].rm_so != -1) {
136 			if (len != 0)
137 				printf("match `%.*s'\n", (int)len,
138 					argv[optind] + subs[0].rm_so);
139 			else
140 				printf("match `'@%.1s\n",
141 					argv[optind] + subs[0].rm_so);
142 		}
143 		for (i = 1; i < NS; i++)
144 			if (subs[i].rm_so != -1)
145 				printf("(%d) `%.*s'\n", i,
146 					(int)(subs[i].rm_eo - subs[i].rm_so),
147 					argv[optind] + subs[i].rm_so);
148 	}
149 	exit(status);
150 }
151 
152 /*
153  * regress - main loop of regression test
154  */
155 void
156 regress(FILE *in)
157 {
158 	char inbuf[1000];
159 #	define	MAXF	10
160 	char *f[MAXF];
161 	int nf;
162 	int i;
163 	char erbuf[100];
164 	size_t ne;
165 	const char *badpat = "invalid regular expression";
166 #	define	SHORT	10
167 	const char *bpname = "REG_BADPAT";
168 	regex_t re;
169 
170 	while (fgets(inbuf, sizeof(inbuf), in) != NULL) {
171 		line++;
172 		if (inbuf[0] == '#' || inbuf[0] == '\n')
173 			continue;			/* NOTE CONTINUE */
174 		inbuf[strlen(inbuf)-1] = '\0';	/* get rid of stupid \n */
175 		if (debug)
176 			fprintf(stdout, "%d:\n", line);
177 		nf = split(inbuf, f, MAXF, "\t\t");
178 		if (nf < 3) {
179 			fprintf(stderr, "bad input, line %d\n", line);
180 			exit(1);
181 		}
182 		for (i = 0; i < nf; i++)
183 			if (strcmp(f[i], "\"\"") == 0)
184 				f[i] = &empty;
185 		if (nf <= 3)
186 			f[3] = NULL;
187 		if (nf <= 4)
188 			f[4] = NULL;
189 		try(f[0], f[1], f[2], f[3], f[4], options('c', f[1]));
190 		if (opt('&', f[1]))	/* try with either type of RE */
191 			try(f[0], f[1], f[2], f[3], f[4],
192 					options('c', f[1]) &~ REG_EXTENDED);
193 	}
194 
195 	ne = regerror(REG_BADPAT, NULL, erbuf, sizeof(erbuf));
196 	if (strcmp(erbuf, badpat) != 0 || ne != strlen(badpat)+1) {
197 		fprintf(stderr, "end: regerror() test gave `%s' not `%s'\n",
198 							erbuf, badpat);
199 		status = 1;
200 	}
201 	ne = regerror(REG_BADPAT, NULL, erbuf, (size_t)SHORT);
202 	if (strncmp(erbuf, badpat, SHORT-1) != 0 || erbuf[SHORT-1] != '\0' ||
203 						ne != strlen(badpat)+1) {
204 		fprintf(stderr, "end: regerror() short test gave `%s' not `%.*s'\n",
205 						erbuf, SHORT-1, badpat);
206 		status = 1;
207 	}
208 	ne = regerror(REG_ITOA|REG_BADPAT, NULL, erbuf, sizeof(erbuf));
209 	if (strcmp(erbuf, bpname) != 0 || ne != strlen(bpname)+1) {
210 		fprintf(stderr, "end: regerror() ITOA test gave `%s' not `%s'\n",
211 						erbuf, bpname);
212 		status = 1;
213 	}
214 	re.re_endp = bpname;
215 	ne = regerror(REG_ATOI, &re, erbuf, sizeof(erbuf));
216 	if (atoi(erbuf) != (int)REG_BADPAT) {
217 		fprintf(stderr, "end: regerror() ATOI test gave `%s' not `%ld'\n",
218 						erbuf, (long)REG_BADPAT);
219 		status = 1;
220 	} else if (ne != strlen(erbuf)+1) {
221 		fprintf(stderr, "end: regerror() ATOI test len(`%s') = %ld\n",
222 						erbuf, (long)REG_BADPAT);
223 		status = 1;
224 	}
225 }
226 
227 /*
228  - try - try it, and report on problems
229  == void try(char *f0, char *f1, char *f2, char *f3, char *f4, int opts);
230  */
231 void
232 try(char *f0, char *f1, char *f2, char *f3, char *f4, int opts)
233 {
234 	regex_t re;
235 #	define	NSUBS	10
236 	regmatch_t subs[NSUBS];
237 #	define	NSHOULD	15
238 	char *should[NSHOULD];
239 	int nshould;
240 	char erbuf[100];
241 	int err;
242 	int len;
243 	const char *type = (opts & REG_EXTENDED) ? "ERE" : "BRE";
244 	int i;
245 	char *grump;
246 	char f0copy[1000];
247 	char f2copy[1000];
248 
249 	strcpy(f0copy, f0);
250 	re.re_endp = (opts&REG_PEND) ? f0copy + strlen(f0copy) : NULL;
251 	fixstr(f0copy);
252 	err = regcomp(&re, f0copy, opts);
253 	if (err != 0 && (!opt('C', f1) || err != efind(f2))) {
254 		/* unexpected error or wrong error */
255 		len = regerror(err, &re, erbuf, sizeof(erbuf));
256 		fprintf(stderr, "%d: %s error %s, %d/%d `%s'\n",
257 					line, type, eprint(err), len,
258 					(int)sizeof(erbuf), erbuf);
259 		status = 1;
260 	} else if (err == 0 && opt('C', f1)) {
261 		/* unexpected success */
262 		fprintf(stderr, "%d: %s should have given REG_%s\n",
263 						line, type, f2);
264 		status = 1;
265 		err = 1;	/* so we won't try regexec */
266 	}
267 
268 	if (err != 0) {
269 		regfree(&re);
270 		return;
271 	}
272 
273 	strcpy(f2copy, f2);
274 	fixstr(f2copy);
275 
276 	if (options('e', f1)&REG_STARTEND) {
277 		if (strchr(f2, '(') == NULL || strchr(f2, ')') == NULL)
278 			fprintf(stderr, "%d: bad STARTEND syntax\n", line);
279 		subs[0].rm_so = strchr(f2, '(') - f2 + 1;
280 		subs[0].rm_eo = strchr(f2, ')') - f2;
281 	}
282 	err = regexec(&re, f2copy, NSUBS, subs, options('e', f1));
283 
284 	if (err != 0 && (f3 != NULL || err != REG_NOMATCH)) {
285 		/* unexpected error or wrong error */
286 		len = regerror(err, &re, erbuf, sizeof(erbuf));
287 		fprintf(stderr, "%d: %s exec error %s, %d/%d `%s'\n",
288 					line, type, eprint(err), len,
289 					(int)sizeof(erbuf), erbuf);
290 		status = 1;
291 	} else if (err != 0) {
292 		/* nothing more to check */
293 	} else if (f3 == NULL) {
294 		/* unexpected success */
295 		fprintf(stderr, "%d: %s exec should have failed\n",
296 						line, type);
297 		status = 1;
298 		err = 1;		/* just on principle */
299 	} else if (opts&REG_NOSUB) {
300 		/* nothing more to check */
301 	} else if ((grump = check(f2, subs[0], f3)) != NULL) {
302 		fprintf(stderr, "%d: %s %s\n", line, type, grump);
303 		status = 1;
304 		err = 1;
305 	}
306 
307 	if (err != 0 || f4 == NULL) {
308 		regfree(&re);
309 		return;
310 	}
311 
312 	for (i = 1; i < NSHOULD; i++)
313 		should[i] = NULL;
314 	nshould = split(f4, &should[1], NSHOULD-1, ",");
315 	if (nshould == 0) {
316 		nshould = 1;
317 		should[1] = &empty;
318 	}
319 	for (i = 1; i < NSUBS; i++) {
320 		grump = check(f2, subs[i], should[i]);
321 		if (grump != NULL) {
322 			fprintf(stderr, "%d: %s $%d %s\n", line,
323 							type, i, grump);
324 			status = 1;
325 			err = 1;
326 		}
327 	}
328 
329 	regfree(&re);
330 }
331 
332 /*
333  - options - pick options out of a regression-test string
334  == int options(int type, char *s);
335  */
336 int
337 options(int type, char *s)
338 {
339 	char *p;
340 	int o = (type == 'c') ? copts : eopts;
341 	const char *legal = (type == 'c') ? "bisnmp" : "^$#tl";
342 
343 	for (p = s; *p != '\0'; p++)
344 		if (strchr(legal, *p) != NULL)
345 			switch (*p) {
346 			case 'b':
347 				o &= ~REG_EXTENDED;
348 				break;
349 			case 'i':
350 				o |= REG_ICASE;
351 				break;
352 			case 's':
353 				o |= REG_NOSUB;
354 				break;
355 			case 'n':
356 				o |= REG_NEWLINE;
357 				break;
358 			case 'm':
359 				o &= ~REG_EXTENDED;
360 				o |= REG_NOSPEC;
361 				break;
362 			case 'p':
363 				o |= REG_PEND;
364 				break;
365 			case '^':
366 				o |= REG_NOTBOL;
367 				break;
368 			case '$':
369 				o |= REG_NOTEOL;
370 				break;
371 			case '#':
372 				o |= REG_STARTEND;
373 				break;
374 			case 't':	/* trace */
375 				o |= REG_TRACE;
376 				break;
377 			case 'l':	/* force long representation */
378 				o |= REG_LARGE;
379 				break;
380 			case 'r':	/* force backref use */
381 				o |= REG_BACKR;
382 				break;
383 			}
384 	return(o);
385 }
386 
387 /*
388  - opt - is a particular option in a regression string?
389  == int opt(int c, char *s);
390  */
391 int				/* predicate */
392 opt(int c, char *s)
393 {
394 	return(strchr(s, c) != NULL);
395 }
396 
397 /*
398  - fixstr - transform magic characters in strings
399  == void fixstr(char *p);
400  */
401 void
402 fixstr(char *p)
403 {
404 	if (p == NULL)
405 		return;
406 
407 	for (; *p != '\0'; p++)
408 		if (*p == 'N')
409 			*p = '\n';
410 		else if (*p == 'T')
411 			*p = '\t';
412 		else if (*p == 'S')
413 			*p = ' ';
414 		else if (*p == 'Z')
415 			*p = '\0';
416 }
417 
418 /*
419  * check - check a substring match
420  */
421 char *				/* NULL or complaint */
422 check(char *str, regmatch_t sub, char *should)
423 {
424 	int len;
425 	int shlen;
426 	char *p;
427 	static char grump[500];
428 	char *at = NULL;
429 
430 	if (should != NULL && strcmp(should, "-") == 0)
431 		should = NULL;
432 	if (should != NULL && should[0] == '@') {
433 		at = should + 1;
434 		should = &empty;
435 	}
436 
437 	/* check rm_so and rm_eo for consistency */
438 	if (sub.rm_so > sub.rm_eo || (sub.rm_so == -1 && sub.rm_eo != -1) ||
439 				(sub.rm_so != -1 && sub.rm_eo == -1) ||
440 				(sub.rm_so != -1 && sub.rm_so < 0) ||
441 				(sub.rm_eo != -1 && sub.rm_eo < 0) ) {
442 		sprintf(grump, "start %ld end %ld", (long)sub.rm_so,
443 							(long)sub.rm_eo);
444 		return(grump);
445 	}
446 
447 	/* check for no match */
448 	if (sub.rm_so == -1) {
449 		if (should == NULL)
450 			return(NULL);
451 		else {
452 			sprintf(grump, "did not match");
453 			return(grump);
454 		}
455 	}
456 
457 	/* check for in range */
458 	if (sub.rm_eo > (ssize_t)strlen(str)) {
459 		sprintf(grump, "start %ld end %ld, past end of string",
460 					(long)sub.rm_so, (long)sub.rm_eo);
461 		return(grump);
462 	}
463 
464 	len = (int)(sub.rm_eo - sub.rm_so);
465 	p = str + sub.rm_so;
466 
467 	/* check for not supposed to match */
468 	if (should == NULL) {
469 		sprintf(grump, "matched `%.*s'", len, p);
470 		return(grump);
471 	}
472 
473 	/* check for wrong match */
474 	shlen = (int)strlen(should);
475 	if (len != shlen || strncmp(p, should, (size_t)shlen) != 0) {
476 		sprintf(grump, "matched `%.*s' instead", len, p);
477 		return(grump);
478 	}
479 	if (shlen > 0)
480 		return(NULL);
481 
482 	/* check null match in right place */
483 	if (at == NULL)
484 		return(NULL);
485 	shlen = strlen(at);
486 	if (shlen == 0)
487 		shlen = 1;	/* force check for end-of-string */
488 	if (strncmp(p, at, shlen) != 0) {
489 		sprintf(grump, "matched null at `%.20s'", p);
490 		return(grump);
491 	}
492 	return(NULL);
493 }
494 
495 /*
496  * eprint - convert error number to name
497  */
498 static char *
499 eprint(int err)
500 {
501 	static char epbuf[100];
502 	size_t len;
503 
504 	len = regerror(REG_ITOA|err, NULL, epbuf, sizeof(epbuf));
505 	assert(len <= sizeof(epbuf));
506 	return(epbuf);
507 }
508 
509 /*
510  * efind - convert error name to number
511  */
512 static int
513 efind(char *name)
514 {
515 	static char efbuf[100];
516 	regex_t re;
517 
518 	sprintf(efbuf, "REG_%s", name);
519 	assert(strlen(efbuf) < sizeof(efbuf));
520 	re.re_endp = efbuf;
521 	(void) regerror(REG_ATOI, &re, efbuf, sizeof(efbuf));
522 	return(atoi(efbuf));
523 }
524