xref: /freebsd/lib/libc/regex/grot/main.c (revision 357378bbdedf24ce2b90e9bd831af4a9db3ec70a)
1 #include <sys/types.h>
2 #include <assert.h>
3 #include <regex.h>
4 #include <stdio.h>
5 #include <stdlib.h>
6 #include <string.h>
7 #include <unistd.h>
8 
9 #include "debug.ih"
10 #include "main.ih"
11 #include "split.ih"
12 
13 char *progname;
14 int debug = 0;
15 int line = 0;
16 int status = 0;
17 
18 int copts = REG_EXTENDED;
19 int eopts = 0;
20 regoff_t startoff = 0;
21 regoff_t endoff = 0;
22 
23 
24 /*
25  - main - do the simple case, hand off to regress() for regression
26  */
27 int
28 main(int argc, char **argv)
29 {
30 	regex_t re;
31 #	define	NS	10
32 	regmatch_t subs[NS];
33 	char erbuf[100];
34 	int err;
35 	size_t len;
36 	int c;
37 	int errflg = 0;
38 	int i;
39 	extern int optind;
40 	extern char *optarg;
41 
42 	progname = argv[0];
43 
44 	while ((c = getopt(argc, argv, "c:e:S:E:x")) != -1)
45 		switch (c) {
46 		case 'c':	/* compile options */
47 			copts = options('c', optarg);
48 			break;
49 		case 'e':	/* execute options */
50 			eopts = options('e', optarg);
51 			break;
52 		case 'S':	/* start offset */
53 			startoff = (regoff_t)atoi(optarg);
54 			break;
55 		case 'E':	/* end offset */
56 			endoff = (regoff_t)atoi(optarg);
57 			break;
58 		case 'x':	/* Debugging. */
59 			debug++;
60 			break;
61 		case '?':
62 		default:
63 			errflg++;
64 			break;
65 		}
66 	if (errflg) {
67 		fprintf(stderr, "usage: %s ", progname);
68 		fprintf(stderr, "[-c copt][-C][-d] [re]\n");
69 		exit(2);
70 	}
71 
72 	if (optind >= argc) {
73 		regress(stdin);
74 		exit(status);
75 	}
76 
77 	err = regcomp(&re, argv[optind++], copts);
78 	if (err) {
79 		len = regerror(err, &re, erbuf, sizeof(erbuf));
80 		fprintf(stderr, "error %s, %zu/%zu `%s'\n",
81 		    eprint(err), len, sizeof(erbuf), erbuf);
82 		exit(status);
83 	}
84 	regprint(&re, stdout);
85 
86 	if (optind >= argc) {
87 		regfree(&re);
88 		exit(status);
89 	}
90 
91 	if ((eopts & REG_STARTEND) != 0) {
92 		subs[0].rm_so = startoff;
93 		subs[0].rm_eo = strlen(argv[optind]) - endoff;
94 	}
95 	err = regexec(&re, argv[optind], (size_t)NS, subs, eopts);
96 	if (err) {
97 		len = regerror(err, &re, erbuf, sizeof(erbuf));
98 		fprintf(stderr, "error %s, %zu/%zu `%s'\n",
99 		    eprint(err), len, sizeof(erbuf), erbuf);
100 		exit(status);
101 	}
102 	if ((copts & REG_NOSUB) == 0) {
103 		len = (int)(subs[0].rm_eo - subs[0].rm_so);
104 		if (subs[0].rm_so != -1) {
105 			if (len != 0)
106 				printf("match `%.*s'\n", (int)len,
107 				    argv[optind] + subs[0].rm_so);
108 			else
109 				printf("match `'@%.1s\n",
110 				    argv[optind] + subs[0].rm_so);
111 		}
112 		for (i = 1; i < NS; i++)
113 			if (subs[i].rm_so != -1)
114 				printf("(%d) `%.*s'\n", i,
115 				    (int)(subs[i].rm_eo - subs[i].rm_so),
116 				    argv[optind] + subs[i].rm_so);
117 	}
118 	exit(status);
119 }
120 
121 /*
122  - regress - main loop of regression test
123  == void regress(FILE *in);
124  */
125 void
126 regress(FILE *in)
127 {
128 	char inbuf[1000];
129 #	define	MAXF	10
130 	char *f[MAXF];
131 	int nf;
132 	int i;
133 	char erbuf[100];
134 	size_t ne;
135 	char *badpat = "invalid regular expression";
136 #	define	SHORT	10
137 	char *bpname = "REG_BADPAT";
138 	regex_t re;
139 
140 	while (fgets(inbuf, sizeof(inbuf), in) != NULL) {
141 		line++;
142 		if (inbuf[0] == '#' || inbuf[0] == '\n')
143 			continue;			/* NOTE CONTINUE */
144 		inbuf[strlen(inbuf)-1] = '\0';	/* get rid of stupid \n */
145 		if (debug)
146 			fprintf(stdout, "%d:\n", line);
147 		nf = split(inbuf, f, MAXF, "\t\t");
148 		if (nf < 3) {
149 			fprintf(stderr, "bad input, line %d\n", line);
150 			exit(1);
151 		}
152 		for (i = 0; i < nf; i++)
153 			if (strcmp(f[i], "\"\"") == 0)
154 				f[i] = "";
155 		if (nf <= 3)
156 			f[3] = NULL;
157 		if (nf <= 4)
158 			f[4] = NULL;
159 		try(f[0], f[1], f[2], f[3], f[4], options('c', f[1]));
160 		if (opt('&', f[1]))	/* try with either type of RE */
161 			try(f[0], f[1], f[2], f[3], f[4],
162 					options('c', f[1]) &~ REG_EXTENDED);
163 	}
164 
165 	ne = regerror(REG_BADPAT, (regex_t *)NULL, erbuf, sizeof(erbuf));
166 	if (strcmp(erbuf, badpat) != 0 || ne != strlen(badpat)+1) {
167 		fprintf(stderr, "end: regerror() test gave `%s' not `%s'\n",
168 							erbuf, badpat);
169 		status = 1;
170 	}
171 	ne = regerror(REG_BADPAT, (regex_t *)NULL, erbuf, (size_t)SHORT);
172 	if (strncmp(erbuf, badpat, SHORT-1) != 0 || erbuf[SHORT-1] != '\0' ||
173 	    ne != strlen(badpat)+1) {
174 		fprintf(stderr, "end: regerror() short test gave `%s' not `%.*s'\n",
175 						erbuf, SHORT-1, badpat);
176 		status = 1;
177 	}
178 	ne = regerror(REG_ITOA|REG_BADPAT, (regex_t *)NULL, erbuf, sizeof(erbuf));
179 	if (strcmp(erbuf, bpname) != 0 || ne != strlen(bpname) + 1) {
180 		fprintf(stderr, "end: regerror() ITOA test gave `%s' not `%s'\n",
181 						erbuf, bpname);
182 		status = 1;
183 	}
184 	re.re_endp = bpname;
185 	ne = regerror(REG_ATOI, &re, erbuf, sizeof(erbuf));
186 	if (atoi(erbuf) != (int)REG_BADPAT) {
187 		fprintf(stderr, "end: regerror() ATOI test gave `%s' not `%ld'\n",
188 						erbuf, (long)REG_BADPAT);
189 		status = 1;
190 	} else if (ne != strlen(erbuf) + 1) {
191 		fprintf(stderr, "end: regerror() ATOI test len(`%s') = %ld\n",
192 						erbuf, (long)REG_BADPAT);
193 		status = 1;
194 	}
195 }
196 
197 /*
198  - try - try it, and report on problems
199  == void try(char *f0, char *f1, char *f2, char *f3, char *f4, int opts);
200  - opts: may not match f1
201  */
202 void
203 try(char *f0, char *f1, char *f2, char *f3, char *f4, int opts)
204 {
205 	regex_t re;
206 #	define	NSUBS	10
207 	regmatch_t subs[NSUBS];
208 #	define	NSHOULD	15
209 	char *should[NSHOULD];
210 	char erbuf[100];
211 	size_t len;
212 	int err, i, nshould;
213 	char *grump;
214 	char *type = (opts & REG_EXTENDED) ? "ERE" : "BRE";
215 	char f0copy[1000];
216 	char f2copy[1000];
217 
218 	strcpy(f0copy, f0);
219 	re.re_endp = (opts&REG_PEND) ? f0copy + strlen(f0copy) : NULL;
220 	fixstr(f0copy);
221 	err = regcomp(&re, f0copy, opts);
222 	if (err != 0 && (!opt('C', f1) || err != efind(f2))) {
223 		/* unexpected error or wrong error */
224 		len = regerror(err, &re, erbuf, sizeof(erbuf));
225 		fprintf(stderr, "%d: %s error %s, %zu/%zu `%s'\n",
226 		    line, type, eprint(err), len, sizeof(erbuf), erbuf);
227 		status = 1;
228 	} else if (err == 0 && opt('C', f1)) {
229 		/* unexpected success */
230 		fprintf(stderr, "%d: %s should have given REG_%s\n",
231 						line, type, f2);
232 		status = 1;
233 		err = 1;	/* so we won't try regexec */
234 	}
235 
236 	if (err != 0) {
237 		regfree(&re);
238 		return;
239 	}
240 
241 	strcpy(f2copy, f2);
242 	fixstr(f2copy);
243 
244 	if (options('e', f1)&REG_STARTEND) {
245 		if (strchr(f2, '(') == NULL || strchr(f2, ')') == NULL)
246 			fprintf(stderr, "%d: bad STARTEND syntax\n", line);
247 		subs[0].rm_so = strchr(f2, '(') - f2 + 1;
248 		subs[0].rm_eo = strchr(f2, ')') - f2;
249 	}
250 	err = regexec(&re, f2copy, NSUBS, subs, options('e', f1));
251 
252 	if (err != 0 && (f3 != NULL || err != REG_NOMATCH)) {
253 		/* unexpected error or wrong error */
254 		len = regerror(err, &re, erbuf, sizeof(erbuf));
255 		fprintf(stderr, "%d: %s exec error %s, %zu/%zu `%s'\n",
256 		    line, type, eprint(err), len, sizeof(erbuf), erbuf);
257 		status = 1;
258 	} else if (err != 0) {
259 		/* nothing more to check */
260 	} else if (f3 == NULL) {
261 		/* unexpected success */
262 		fprintf(stderr, "%d: %s exec should have failed\n",
263 		    line, type);
264 		status = 1;
265 		err = 1;		/* just on principle */
266 	} else if (opts&REG_NOSUB) {
267 		/* nothing more to check */
268 	} else if ((grump = check(f2, subs[0], f3)) != NULL) {
269 		fprintf(stderr, "%d: %s %s\n", line, type, grump);
270 		status = 1;
271 		err = 1;
272 	}
273 
274 	if (err != 0 || f4 == NULL) {
275 		regfree(&re);
276 		return;
277 	}
278 
279 	for (i = 1; i < NSHOULD; i++)
280 		should[i] = NULL;
281 	nshould = split(f4, should+1, NSHOULD-1, ",");
282 	if (nshould == 0) {
283 		nshould = 1;
284 		should[1] = "";
285 	}
286 	for (i = 1; i < NSUBS; i++) {
287 		grump = check(f2, subs[i], should[i]);
288 		if (grump != NULL) {
289 			fprintf(stderr, "%d: %s $%d %s\n", line,
290 			    type, i, grump);
291 			status = 1;
292 			err = 1;
293 		}
294 	}
295 
296 	regfree(&re);
297 }
298 
299 /*
300  - options - pick options out of a regression-test string
301  - type: 'c' - compile, 'e' - exec
302  == int options(int type, char *s);
303  */
304 int
305 options(int type, char *s)
306 {
307 	char *p;
308 	int o = (type == 'c') ? copts : eopts;
309 	char *legal = (type == 'c') ? "bisnmp" : "^$#tl";
310 
311 	for (p = s; *p != '\0'; p++)
312 		if (strchr(legal, *p) != NULL)
313 			switch (*p) {
314 			case 'b':
315 				o &= ~REG_EXTENDED;
316 				break;
317 			case 'i':
318 				o |= REG_ICASE;
319 				break;
320 			case 's':
321 				o |= REG_NOSUB;
322 				break;
323 			case 'n':
324 				o |= REG_NEWLINE;
325 				break;
326 			case 'm':
327 				o &= ~REG_EXTENDED;
328 				o |= REG_NOSPEC;
329 				break;
330 			case 'p':
331 				o |= REG_PEND;
332 				break;
333 			case '^':
334 				o |= REG_NOTBOL;
335 				break;
336 			case '$':
337 				o |= REG_NOTEOL;
338 				break;
339 			case '#':
340 				o |= REG_STARTEND;
341 				break;
342 			case 't':	/* trace */
343 				o |= REG_TRACE;
344 				break;
345 			case 'l':	/* force long representation */
346 				o |= REG_LARGE;
347 				break;
348 			case 'r':	/* force backref use */
349 				o |= REG_BACKR;
350 				break;
351 			}
352 	return(o);
353 }
354 
355 /*
356  - opt - is a particular option in a regression string?
357  == int opt(int c, char *s);
358  */
359 int				/* predicate */
360 opt(int c, char *s)
361 {
362 	return(strchr(s, c) != NULL);
363 }
364 
365 /*
366  - fixstr - transform magic characters in strings
367  == void fixstr(char *p);
368  */
369 void
370 fixstr(char *p)
371 {
372 	if (p == NULL)
373 		return;
374 
375 	for (; *p != '\0'; p++)
376 		if (*p == 'N')
377 			*p = '\n';
378 		else if (*p == 'T')
379 			*p = '\t';
380 		else if (*p == 'S')
381 			*p = ' ';
382 		else if (*p == 'Z')
383 			*p = '\0';
384 }
385 
386 /*
387  - check - check a substring match
388  == char *check(char *str, regmatch_t sub, char *should);
389  */
390 char *				/* NULL or complaint */
391 check(char *str, regmatch_t sub, char *should)
392 {
393 	int len;
394 	int shlen;
395 	char *p;
396 	static char grump[500];
397 	char *at = NULL;
398 
399 	if (should != NULL && strcmp(should, "-") == 0)
400 		should = NULL;
401 	if (should != NULL && should[0] == '@') {
402 		at = should + 1;
403 		should = "";
404 	}
405 
406 	/* check rm_so and rm_eo for consistency */
407 	if (sub.rm_so > sub.rm_eo || (sub.rm_so == -1 && sub.rm_eo != -1) ||
408 				(sub.rm_so != -1 && sub.rm_eo == -1) ||
409 				(sub.rm_so != -1 && sub.rm_so < 0) ||
410 				(sub.rm_eo != -1 && sub.rm_eo < 0) ) {
411 		sprintf(grump, "start %ld end %ld", (long)sub.rm_so,
412 							(long)sub.rm_eo);
413 		return(grump);
414 	}
415 
416 	/* check for no match */
417 	if (sub.rm_so == -1 && should == NULL)
418 		return(NULL);
419 	if (sub.rm_so == -1)
420 		return("did not match");
421 
422 	/* check for in range */
423 	if (sub.rm_eo > strlen(str)) {
424 		sprintf(grump, "start %ld end %ld, past end of string",
425 		    (long)sub.rm_so, (long)sub.rm_eo);
426 		return(grump);
427 	}
428 
429 	len = (int)(sub.rm_eo - sub.rm_so);
430 	shlen = (int)strlen(should);
431 	p = str + sub.rm_so;
432 
433 	/* check for not supposed to match */
434 	if (should == NULL) {
435 		sprintf(grump, "matched `%.*s'", len, p);
436 		return(grump);
437 	}
438 
439 	/* check for wrong match */
440 	if (len != shlen || strncmp(p, should, (size_t)shlen) != 0) {
441 		sprintf(grump, "matched `%.*s' instead", len, p);
442 		return(grump);
443 	}
444 	if (shlen > 0)
445 		return(NULL);
446 
447 	/* check null match in right place */
448 	if (at == NULL)
449 		return(NULL);
450 	shlen = strlen(at);
451 	if (shlen == 0)
452 		shlen = 1;	/* force check for end-of-string */
453 	if (strncmp(p, at, shlen) != 0) {
454 		sprintf(grump, "matched null at `%.20s'", p);
455 		return(grump);
456 	}
457 	return(NULL);
458 }
459 
460 /*
461  - eprint - convert error number to name
462  == static char *eprint(int err);
463  */
464 static char *
465 eprint(int err)
466 {
467 	static char epbuf[100];
468 	size_t len;
469 
470 	len = regerror(REG_ITOA|err, (regex_t *)NULL, epbuf, sizeof(epbuf));
471 	assert(len <= sizeof(epbuf));
472 	return(epbuf);
473 }
474 
475 /*
476  - efind - convert error name to number
477  == static int efind(char *name);
478  */
479 static int
480 efind(char *name)
481 {
482 	static char efbuf[100];
483 	size_t n;
484 	regex_t re;
485 
486 	sprintf(efbuf, "REG_%s", name);
487 	assert(strlen(efbuf) < sizeof(efbuf));
488 	re.re_endp = efbuf;
489 	(void) regerror(REG_ATOI, &re, efbuf, sizeof(efbuf));
490 	return(atoi(efbuf));
491 }
492