xref: /freebsd/lib/libc/regex/grot/main.c (revision 9f44a47fd07924afc035991af15d84e6585dea4f)
1 #include <sys/cdefs.h>
2 __FBSDID("$FreeBSD$");
3 
4 #include <sys/types.h>
5 #include <assert.h>
6 #include <regex.h>
7 #include <stdio.h>
8 #include <stdlib.h>
9 #include <string.h>
10 #include <unistd.h>
11 
12 #include "debug.ih"
13 #include "main.ih"
14 #include "split.ih"
15 
16 char *progname;
17 int debug = 0;
18 int line = 0;
19 int status = 0;
20 
21 int copts = REG_EXTENDED;
22 int eopts = 0;
23 regoff_t startoff = 0;
24 regoff_t endoff = 0;
25 
26 
27 /*
28  - main - do the simple case, hand off to regress() for regression
29  */
30 int
31 main(int argc, char **argv)
32 {
33 	regex_t re;
34 #	define	NS	10
35 	regmatch_t subs[NS];
36 	char erbuf[100];
37 	int err;
38 	size_t len;
39 	int c;
40 	int errflg = 0;
41 	int i;
42 	extern int optind;
43 	extern char *optarg;
44 
45 	progname = argv[0];
46 
47 	while ((c = getopt(argc, argv, "c:e:S:E:x")) != -1)
48 		switch (c) {
49 		case 'c':	/* compile options */
50 			copts = options('c', optarg);
51 			break;
52 		case 'e':	/* execute options */
53 			eopts = options('e', optarg);
54 			break;
55 		case 'S':	/* start offset */
56 			startoff = (regoff_t)atoi(optarg);
57 			break;
58 		case 'E':	/* end offset */
59 			endoff = (regoff_t)atoi(optarg);
60 			break;
61 		case 'x':	/* Debugging. */
62 			debug++;
63 			break;
64 		case '?':
65 		default:
66 			errflg++;
67 			break;
68 		}
69 	if (errflg) {
70 		fprintf(stderr, "usage: %s ", progname);
71 		fprintf(stderr, "[-c copt][-C][-d] [re]\n");
72 		exit(2);
73 	}
74 
75 	if (optind >= argc) {
76 		regress(stdin);
77 		exit(status);
78 	}
79 
80 	err = regcomp(&re, argv[optind++], copts);
81 	if (err) {
82 		len = regerror(err, &re, erbuf, sizeof(erbuf));
83 		fprintf(stderr, "error %s, %zu/%zu `%s'\n",
84 		    eprint(err), len, sizeof(erbuf), erbuf);
85 		exit(status);
86 	}
87 	regprint(&re, stdout);
88 
89 	if (optind >= argc) {
90 		regfree(&re);
91 		exit(status);
92 	}
93 
94 	if ((eopts & REG_STARTEND) != 0) {
95 		subs[0].rm_so = startoff;
96 		subs[0].rm_eo = strlen(argv[optind]) - endoff;
97 	}
98 	err = regexec(&re, argv[optind], (size_t)NS, subs, eopts);
99 	if (err) {
100 		len = regerror(err, &re, erbuf, sizeof(erbuf));
101 		fprintf(stderr, "error %s, %zu/%zu `%s'\n",
102 		    eprint(err), len, sizeof(erbuf), erbuf);
103 		exit(status);
104 	}
105 	if ((copts & REG_NOSUB) == 0) {
106 		len = (int)(subs[0].rm_eo - subs[0].rm_so);
107 		if (subs[0].rm_so != -1) {
108 			if (len != 0)
109 				printf("match `%.*s'\n", (int)len,
110 				    argv[optind] + subs[0].rm_so);
111 			else
112 				printf("match `'@%.1s\n",
113 				    argv[optind] + subs[0].rm_so);
114 		}
115 		for (i = 1; i < NS; i++)
116 			if (subs[i].rm_so != -1)
117 				printf("(%d) `%.*s'\n", i,
118 				    (int)(subs[i].rm_eo - subs[i].rm_so),
119 				    argv[optind] + subs[i].rm_so);
120 	}
121 	exit(status);
122 }
123 
124 /*
125  - regress - main loop of regression test
126  == void regress(FILE *in);
127  */
128 void
129 regress(FILE *in)
130 {
131 	char inbuf[1000];
132 #	define	MAXF	10
133 	char *f[MAXF];
134 	int nf;
135 	int i;
136 	char erbuf[100];
137 	size_t ne;
138 	char *badpat = "invalid regular expression";
139 #	define	SHORT	10
140 	char *bpname = "REG_BADPAT";
141 	regex_t re;
142 
143 	while (fgets(inbuf, sizeof(inbuf), in) != NULL) {
144 		line++;
145 		if (inbuf[0] == '#' || inbuf[0] == '\n')
146 			continue;			/* NOTE CONTINUE */
147 		inbuf[strlen(inbuf)-1] = '\0';	/* get rid of stupid \n */
148 		if (debug)
149 			fprintf(stdout, "%d:\n", line);
150 		nf = split(inbuf, f, MAXF, "\t\t");
151 		if (nf < 3) {
152 			fprintf(stderr, "bad input, line %d\n", line);
153 			exit(1);
154 		}
155 		for (i = 0; i < nf; i++)
156 			if (strcmp(f[i], "\"\"") == 0)
157 				f[i] = "";
158 		if (nf <= 3)
159 			f[3] = NULL;
160 		if (nf <= 4)
161 			f[4] = NULL;
162 		try(f[0], f[1], f[2], f[3], f[4], options('c', f[1]));
163 		if (opt('&', f[1]))	/* try with either type of RE */
164 			try(f[0], f[1], f[2], f[3], f[4],
165 					options('c', f[1]) &~ REG_EXTENDED);
166 	}
167 
168 	ne = regerror(REG_BADPAT, (regex_t *)NULL, erbuf, sizeof(erbuf));
169 	if (strcmp(erbuf, badpat) != 0 || ne != strlen(badpat)+1) {
170 		fprintf(stderr, "end: regerror() test gave `%s' not `%s'\n",
171 							erbuf, badpat);
172 		status = 1;
173 	}
174 	ne = regerror(REG_BADPAT, (regex_t *)NULL, erbuf, (size_t)SHORT);
175 	if (strncmp(erbuf, badpat, SHORT-1) != 0 || erbuf[SHORT-1] != '\0' ||
176 	    ne != strlen(badpat)+1) {
177 		fprintf(stderr, "end: regerror() short test gave `%s' not `%.*s'\n",
178 						erbuf, SHORT-1, badpat);
179 		status = 1;
180 	}
181 	ne = regerror(REG_ITOA|REG_BADPAT, (regex_t *)NULL, erbuf, sizeof(erbuf));
182 	if (strcmp(erbuf, bpname) != 0 || ne != strlen(bpname) + 1) {
183 		fprintf(stderr, "end: regerror() ITOA test gave `%s' not `%s'\n",
184 						erbuf, bpname);
185 		status = 1;
186 	}
187 	re.re_endp = bpname;
188 	ne = regerror(REG_ATOI, &re, erbuf, sizeof(erbuf));
189 	if (atoi(erbuf) != (int)REG_BADPAT) {
190 		fprintf(stderr, "end: regerror() ATOI test gave `%s' not `%ld'\n",
191 						erbuf, (long)REG_BADPAT);
192 		status = 1;
193 	} else if (ne != strlen(erbuf) + 1) {
194 		fprintf(stderr, "end: regerror() ATOI test len(`%s') = %ld\n",
195 						erbuf, (long)REG_BADPAT);
196 		status = 1;
197 	}
198 }
199 
200 /*
201  - try - try it, and report on problems
202  == void try(char *f0, char *f1, char *f2, char *f3, char *f4, int opts);
203  - opts: may not match f1
204  */
205 void
206 try(char *f0, char *f1, char *f2, char *f3, char *f4, int opts)
207 {
208 	regex_t re;
209 #	define	NSUBS	10
210 	regmatch_t subs[NSUBS];
211 #	define	NSHOULD	15
212 	char *should[NSHOULD];
213 	char erbuf[100];
214 	size_t len;
215 	int err, i, nshould;
216 	char *grump;
217 	char *type = (opts & REG_EXTENDED) ? "ERE" : "BRE";
218 	char f0copy[1000];
219 	char f2copy[1000];
220 
221 	strcpy(f0copy, f0);
222 	re.re_endp = (opts&REG_PEND) ? f0copy + strlen(f0copy) : NULL;
223 	fixstr(f0copy);
224 	err = regcomp(&re, f0copy, opts);
225 	if (err != 0 && (!opt('C', f1) || err != efind(f2))) {
226 		/* unexpected error or wrong error */
227 		len = regerror(err, &re, erbuf, sizeof(erbuf));
228 		fprintf(stderr, "%d: %s error %s, %zu/%zu `%s'\n",
229 		    line, type, eprint(err), len, sizeof(erbuf), erbuf);
230 		status = 1;
231 	} else if (err == 0 && opt('C', f1)) {
232 		/* unexpected success */
233 		fprintf(stderr, "%d: %s should have given REG_%s\n",
234 						line, type, f2);
235 		status = 1;
236 		err = 1;	/* so we won't try regexec */
237 	}
238 
239 	if (err != 0) {
240 		regfree(&re);
241 		return;
242 	}
243 
244 	strcpy(f2copy, f2);
245 	fixstr(f2copy);
246 
247 	if (options('e', f1)&REG_STARTEND) {
248 		if (strchr(f2, '(') == NULL || strchr(f2, ')') == NULL)
249 			fprintf(stderr, "%d: bad STARTEND syntax\n", line);
250 		subs[0].rm_so = strchr(f2, '(') - f2 + 1;
251 		subs[0].rm_eo = strchr(f2, ')') - f2;
252 	}
253 	err = regexec(&re, f2copy, NSUBS, subs, options('e', f1));
254 
255 	if (err != 0 && (f3 != NULL || err != REG_NOMATCH)) {
256 		/* unexpected error or wrong error */
257 		len = regerror(err, &re, erbuf, sizeof(erbuf));
258 		fprintf(stderr, "%d: %s exec error %s, %zu/%zu `%s'\n",
259 		    line, type, eprint(err), len, sizeof(erbuf), erbuf);
260 		status = 1;
261 	} else if (err != 0) {
262 		/* nothing more to check */
263 	} else if (f3 == NULL) {
264 		/* unexpected success */
265 		fprintf(stderr, "%d: %s exec should have failed\n",
266 		    line, type);
267 		status = 1;
268 		err = 1;		/* just on principle */
269 	} else if (opts&REG_NOSUB) {
270 		/* nothing more to check */
271 	} else if ((grump = check(f2, subs[0], f3)) != NULL) {
272 		fprintf(stderr, "%d: %s %s\n", line, type, grump);
273 		status = 1;
274 		err = 1;
275 	}
276 
277 	if (err != 0 || f4 == NULL) {
278 		regfree(&re);
279 		return;
280 	}
281 
282 	for (i = 1; i < NSHOULD; i++)
283 		should[i] = NULL;
284 	nshould = split(f4, should+1, NSHOULD-1, ",");
285 	if (nshould == 0) {
286 		nshould = 1;
287 		should[1] = "";
288 	}
289 	for (i = 1; i < NSUBS; i++) {
290 		grump = check(f2, subs[i], should[i]);
291 		if (grump != NULL) {
292 			fprintf(stderr, "%d: %s $%d %s\n", line,
293 			    type, i, grump);
294 			status = 1;
295 			err = 1;
296 		}
297 	}
298 
299 	regfree(&re);
300 }
301 
302 /*
303  - options - pick options out of a regression-test string
304  - type: 'c' - compile, 'e' - exec
305  == int options(int type, char *s);
306  */
307 int
308 options(int type, char *s)
309 {
310 	char *p;
311 	int o = (type == 'c') ? copts : eopts;
312 	char *legal = (type == 'c') ? "bisnmp" : "^$#tl";
313 
314 	for (p = s; *p != '\0'; p++)
315 		if (strchr(legal, *p) != NULL)
316 			switch (*p) {
317 			case 'b':
318 				o &= ~REG_EXTENDED;
319 				break;
320 			case 'i':
321 				o |= REG_ICASE;
322 				break;
323 			case 's':
324 				o |= REG_NOSUB;
325 				break;
326 			case 'n':
327 				o |= REG_NEWLINE;
328 				break;
329 			case 'm':
330 				o &= ~REG_EXTENDED;
331 				o |= REG_NOSPEC;
332 				break;
333 			case 'p':
334 				o |= REG_PEND;
335 				break;
336 			case '^':
337 				o |= REG_NOTBOL;
338 				break;
339 			case '$':
340 				o |= REG_NOTEOL;
341 				break;
342 			case '#':
343 				o |= REG_STARTEND;
344 				break;
345 			case 't':	/* trace */
346 				o |= REG_TRACE;
347 				break;
348 			case 'l':	/* force long representation */
349 				o |= REG_LARGE;
350 				break;
351 			case 'r':	/* force backref use */
352 				o |= REG_BACKR;
353 				break;
354 			}
355 	return(o);
356 }
357 
358 /*
359  - opt - is a particular option in a regression string?
360  == int opt(int c, char *s);
361  */
362 int				/* predicate */
363 opt(int c, char *s)
364 {
365 	return(strchr(s, c) != NULL);
366 }
367 
368 /*
369  - fixstr - transform magic characters in strings
370  == void fixstr(char *p);
371  */
372 void
373 fixstr(char *p)
374 {
375 	if (p == NULL)
376 		return;
377 
378 	for (; *p != '\0'; p++)
379 		if (*p == 'N')
380 			*p = '\n';
381 		else if (*p == 'T')
382 			*p = '\t';
383 		else if (*p == 'S')
384 			*p = ' ';
385 		else if (*p == 'Z')
386 			*p = '\0';
387 }
388 
389 /*
390  - check - check a substring match
391  == char *check(char *str, regmatch_t sub, char *should);
392  */
393 char *				/* NULL or complaint */
394 check(char *str, regmatch_t sub, char *should)
395 {
396 	int len;
397 	int shlen;
398 	char *p;
399 	static char grump[500];
400 	char *at = NULL;
401 
402 	if (should != NULL && strcmp(should, "-") == 0)
403 		should = NULL;
404 	if (should != NULL && should[0] == '@') {
405 		at = should + 1;
406 		should = "";
407 	}
408 
409 	/* check rm_so and rm_eo for consistency */
410 	if (sub.rm_so > sub.rm_eo || (sub.rm_so == -1 && sub.rm_eo != -1) ||
411 				(sub.rm_so != -1 && sub.rm_eo == -1) ||
412 				(sub.rm_so != -1 && sub.rm_so < 0) ||
413 				(sub.rm_eo != -1 && sub.rm_eo < 0) ) {
414 		sprintf(grump, "start %ld end %ld", (long)sub.rm_so,
415 							(long)sub.rm_eo);
416 		return(grump);
417 	}
418 
419 	/* check for no match */
420 	if (sub.rm_so == -1 && should == NULL)
421 		return(NULL);
422 	if (sub.rm_so == -1)
423 		return("did not match");
424 
425 	/* check for in range */
426 	if (sub.rm_eo > strlen(str)) {
427 		sprintf(grump, "start %ld end %ld, past end of string",
428 		    (long)sub.rm_so, (long)sub.rm_eo);
429 		return(grump);
430 	}
431 
432 	len = (int)(sub.rm_eo - sub.rm_so);
433 	shlen = (int)strlen(should);
434 	p = str + sub.rm_so;
435 
436 	/* check for not supposed to match */
437 	if (should == NULL) {
438 		sprintf(grump, "matched `%.*s'", len, p);
439 		return(grump);
440 	}
441 
442 	/* check for wrong match */
443 	if (len != shlen || strncmp(p, should, (size_t)shlen) != 0) {
444 		sprintf(grump, "matched `%.*s' instead", len, p);
445 		return(grump);
446 	}
447 	if (shlen > 0)
448 		return(NULL);
449 
450 	/* check null match in right place */
451 	if (at == NULL)
452 		return(NULL);
453 	shlen = strlen(at);
454 	if (shlen == 0)
455 		shlen = 1;	/* force check for end-of-string */
456 	if (strncmp(p, at, shlen) != 0) {
457 		sprintf(grump, "matched null at `%.20s'", p);
458 		return(grump);
459 	}
460 	return(NULL);
461 }
462 
463 /*
464  - eprint - convert error number to name
465  == static char *eprint(int err);
466  */
467 static char *
468 eprint(int err)
469 {
470 	static char epbuf[100];
471 	size_t len;
472 
473 	len = regerror(REG_ITOA|err, (regex_t *)NULL, epbuf, sizeof(epbuf));
474 	assert(len <= sizeof(epbuf));
475 	return(epbuf);
476 }
477 
478 /*
479  - efind - convert error name to number
480  == static int efind(char *name);
481  */
482 static int
483 efind(char *name)
484 {
485 	static char efbuf[100];
486 	size_t n;
487 	regex_t re;
488 
489 	sprintf(efbuf, "REG_%s", name);
490 	assert(strlen(efbuf) < sizeof(efbuf));
491 	re.re_endp = efbuf;
492 	(void) regerror(REG_ATOI, &re, efbuf, sizeof(efbuf));
493 	return(atoi(efbuf));
494 }
495