xref: /freebsd/lib/libc/regex/grot/main.c (revision 2a9021898c4ee2154787da862c238cfeccd655df)
1 #include <sys/cdefs.h>
2 #include <sys/types.h>
3 #include <assert.h>
4 #include <regex.h>
5 #include <stdio.h>
6 #include <stdlib.h>
7 #include <string.h>
8 #include <unistd.h>
9 
10 #include "debug.ih"
11 #include "main.ih"
12 #include "split.ih"
13 
14 char *progname;
15 int debug = 0;
16 int line = 0;
17 int status = 0;
18 
19 int copts = REG_EXTENDED;
20 int eopts = 0;
21 regoff_t startoff = 0;
22 regoff_t endoff = 0;
23 
24 
25 /*
26  - main - do the simple case, hand off to regress() for regression
27  */
28 int
29 main(int argc, char **argv)
30 {
31 	regex_t re;
32 #	define	NS	10
33 	regmatch_t subs[NS];
34 	char erbuf[100];
35 	int err;
36 	size_t len;
37 	int c;
38 	int errflg = 0;
39 	int i;
40 	extern int optind;
41 	extern char *optarg;
42 
43 	progname = argv[0];
44 
45 	while ((c = getopt(argc, argv, "c:e:S:E:x")) != -1)
46 		switch (c) {
47 		case 'c':	/* compile options */
48 			copts = options('c', optarg);
49 			break;
50 		case 'e':	/* execute options */
51 			eopts = options('e', optarg);
52 			break;
53 		case 'S':	/* start offset */
54 			startoff = (regoff_t)atoi(optarg);
55 			break;
56 		case 'E':	/* end offset */
57 			endoff = (regoff_t)atoi(optarg);
58 			break;
59 		case 'x':	/* Debugging. */
60 			debug++;
61 			break;
62 		case '?':
63 		default:
64 			errflg++;
65 			break;
66 		}
67 	if (errflg) {
68 		fprintf(stderr, "usage: %s ", progname);
69 		fprintf(stderr, "[-c copt][-C][-d] [re]\n");
70 		exit(2);
71 	}
72 
73 	if (optind >= argc) {
74 		regress(stdin);
75 		exit(status);
76 	}
77 
78 	err = regcomp(&re, argv[optind++], copts);
79 	if (err) {
80 		len = regerror(err, &re, erbuf, sizeof(erbuf));
81 		fprintf(stderr, "error %s, %zu/%zu `%s'\n",
82 		    eprint(err), len, sizeof(erbuf), erbuf);
83 		exit(status);
84 	}
85 	regprint(&re, stdout);
86 
87 	if (optind >= argc) {
88 		regfree(&re);
89 		exit(status);
90 	}
91 
92 	if ((eopts & REG_STARTEND) != 0) {
93 		subs[0].rm_so = startoff;
94 		subs[0].rm_eo = strlen(argv[optind]) - endoff;
95 	}
96 	err = regexec(&re, argv[optind], (size_t)NS, subs, eopts);
97 	if (err) {
98 		len = regerror(err, &re, erbuf, sizeof(erbuf));
99 		fprintf(stderr, "error %s, %zu/%zu `%s'\n",
100 		    eprint(err), len, sizeof(erbuf), erbuf);
101 		exit(status);
102 	}
103 	if ((copts & REG_NOSUB) == 0) {
104 		len = (int)(subs[0].rm_eo - subs[0].rm_so);
105 		if (subs[0].rm_so != -1) {
106 			if (len != 0)
107 				printf("match `%.*s'\n", (int)len,
108 				    argv[optind] + subs[0].rm_so);
109 			else
110 				printf("match `'@%.1s\n",
111 				    argv[optind] + subs[0].rm_so);
112 		}
113 		for (i = 1; i < NS; i++)
114 			if (subs[i].rm_so != -1)
115 				printf("(%d) `%.*s'\n", i,
116 				    (int)(subs[i].rm_eo - subs[i].rm_so),
117 				    argv[optind] + subs[i].rm_so);
118 	}
119 	exit(status);
120 }
121 
122 /*
123  - regress - main loop of regression test
124  == void regress(FILE *in);
125  */
126 void
127 regress(FILE *in)
128 {
129 	char inbuf[1000];
130 #	define	MAXF	10
131 	char *f[MAXF];
132 	int nf;
133 	int i;
134 	char erbuf[100];
135 	size_t ne;
136 	char *badpat = "invalid regular expression";
137 #	define	SHORT	10
138 	char *bpname = "REG_BADPAT";
139 	regex_t re;
140 
141 	while (fgets(inbuf, sizeof(inbuf), in) != NULL) {
142 		line++;
143 		if (inbuf[0] == '#' || inbuf[0] == '\n')
144 			continue;			/* NOTE CONTINUE */
145 		inbuf[strlen(inbuf)-1] = '\0';	/* get rid of stupid \n */
146 		if (debug)
147 			fprintf(stdout, "%d:\n", line);
148 		nf = split(inbuf, f, MAXF, "\t\t");
149 		if (nf < 3) {
150 			fprintf(stderr, "bad input, line %d\n", line);
151 			exit(1);
152 		}
153 		for (i = 0; i < nf; i++)
154 			if (strcmp(f[i], "\"\"") == 0)
155 				f[i] = "";
156 		if (nf <= 3)
157 			f[3] = NULL;
158 		if (nf <= 4)
159 			f[4] = NULL;
160 		try(f[0], f[1], f[2], f[3], f[4], options('c', f[1]));
161 		if (opt('&', f[1]))	/* try with either type of RE */
162 			try(f[0], f[1], f[2], f[3], f[4],
163 					options('c', f[1]) &~ REG_EXTENDED);
164 	}
165 
166 	ne = regerror(REG_BADPAT, (regex_t *)NULL, erbuf, sizeof(erbuf));
167 	if (strcmp(erbuf, badpat) != 0 || ne != strlen(badpat)+1) {
168 		fprintf(stderr, "end: regerror() test gave `%s' not `%s'\n",
169 							erbuf, badpat);
170 		status = 1;
171 	}
172 	ne = regerror(REG_BADPAT, (regex_t *)NULL, erbuf, (size_t)SHORT);
173 	if (strncmp(erbuf, badpat, SHORT-1) != 0 || erbuf[SHORT-1] != '\0' ||
174 	    ne != strlen(badpat)+1) {
175 		fprintf(stderr, "end: regerror() short test gave `%s' not `%.*s'\n",
176 						erbuf, SHORT-1, badpat);
177 		status = 1;
178 	}
179 	ne = regerror(REG_ITOA|REG_BADPAT, (regex_t *)NULL, erbuf, sizeof(erbuf));
180 	if (strcmp(erbuf, bpname) != 0 || ne != strlen(bpname) + 1) {
181 		fprintf(stderr, "end: regerror() ITOA test gave `%s' not `%s'\n",
182 						erbuf, bpname);
183 		status = 1;
184 	}
185 	re.re_endp = bpname;
186 	ne = regerror(REG_ATOI, &re, erbuf, sizeof(erbuf));
187 	if (atoi(erbuf) != (int)REG_BADPAT) {
188 		fprintf(stderr, "end: regerror() ATOI test gave `%s' not `%ld'\n",
189 						erbuf, (long)REG_BADPAT);
190 		status = 1;
191 	} else if (ne != strlen(erbuf) + 1) {
192 		fprintf(stderr, "end: regerror() ATOI test len(`%s') = %ld\n",
193 						erbuf, (long)REG_BADPAT);
194 		status = 1;
195 	}
196 }
197 
198 /*
199  - try - try it, and report on problems
200  == void try(char *f0, char *f1, char *f2, char *f3, char *f4, int opts);
201  - opts: may not match f1
202  */
203 void
204 try(char *f0, char *f1, char *f2, char *f3, char *f4, int opts)
205 {
206 	regex_t re;
207 #	define	NSUBS	10
208 	regmatch_t subs[NSUBS];
209 #	define	NSHOULD	15
210 	char *should[NSHOULD];
211 	char erbuf[100];
212 	size_t len;
213 	int err, i, nshould;
214 	char *grump;
215 	char *type = (opts & REG_EXTENDED) ? "ERE" : "BRE";
216 	char f0copy[1000];
217 	char f2copy[1000];
218 
219 	strcpy(f0copy, f0);
220 	re.re_endp = (opts&REG_PEND) ? f0copy + strlen(f0copy) : NULL;
221 	fixstr(f0copy);
222 	err = regcomp(&re, f0copy, opts);
223 	if (err != 0 && (!opt('C', f1) || err != efind(f2))) {
224 		/* unexpected error or wrong error */
225 		len = regerror(err, &re, erbuf, sizeof(erbuf));
226 		fprintf(stderr, "%d: %s error %s, %zu/%zu `%s'\n",
227 		    line, type, eprint(err), len, sizeof(erbuf), erbuf);
228 		status = 1;
229 	} else if (err == 0 && opt('C', f1)) {
230 		/* unexpected success */
231 		fprintf(stderr, "%d: %s should have given REG_%s\n",
232 						line, type, f2);
233 		status = 1;
234 		err = 1;	/* so we won't try regexec */
235 	}
236 
237 	if (err != 0) {
238 		regfree(&re);
239 		return;
240 	}
241 
242 	strcpy(f2copy, f2);
243 	fixstr(f2copy);
244 
245 	if (options('e', f1)&REG_STARTEND) {
246 		if (strchr(f2, '(') == NULL || strchr(f2, ')') == NULL)
247 			fprintf(stderr, "%d: bad STARTEND syntax\n", line);
248 		subs[0].rm_so = strchr(f2, '(') - f2 + 1;
249 		subs[0].rm_eo = strchr(f2, ')') - f2;
250 	}
251 	err = regexec(&re, f2copy, NSUBS, subs, options('e', f1));
252 
253 	if (err != 0 && (f3 != NULL || err != REG_NOMATCH)) {
254 		/* unexpected error or wrong error */
255 		len = regerror(err, &re, erbuf, sizeof(erbuf));
256 		fprintf(stderr, "%d: %s exec error %s, %zu/%zu `%s'\n",
257 		    line, type, eprint(err), len, sizeof(erbuf), erbuf);
258 		status = 1;
259 	} else if (err != 0) {
260 		/* nothing more to check */
261 	} else if (f3 == NULL) {
262 		/* unexpected success */
263 		fprintf(stderr, "%d: %s exec should have failed\n",
264 		    line, type);
265 		status = 1;
266 		err = 1;		/* just on principle */
267 	} else if (opts&REG_NOSUB) {
268 		/* nothing more to check */
269 	} else if ((grump = check(f2, subs[0], f3)) != NULL) {
270 		fprintf(stderr, "%d: %s %s\n", line, type, grump);
271 		status = 1;
272 		err = 1;
273 	}
274 
275 	if (err != 0 || f4 == NULL) {
276 		regfree(&re);
277 		return;
278 	}
279 
280 	for (i = 1; i < NSHOULD; i++)
281 		should[i] = NULL;
282 	nshould = split(f4, should+1, NSHOULD-1, ",");
283 	if (nshould == 0) {
284 		nshould = 1;
285 		should[1] = "";
286 	}
287 	for (i = 1; i < NSUBS; i++) {
288 		grump = check(f2, subs[i], should[i]);
289 		if (grump != NULL) {
290 			fprintf(stderr, "%d: %s $%d %s\n", line,
291 			    type, i, grump);
292 			status = 1;
293 			err = 1;
294 		}
295 	}
296 
297 	regfree(&re);
298 }
299 
300 /*
301  - options - pick options out of a regression-test string
302  - type: 'c' - compile, 'e' - exec
303  == int options(int type, char *s);
304  */
305 int
306 options(int type, char *s)
307 {
308 	char *p;
309 	int o = (type == 'c') ? copts : eopts;
310 	char *legal = (type == 'c') ? "bisnmp" : "^$#tl";
311 
312 	for (p = s; *p != '\0'; p++)
313 		if (strchr(legal, *p) != NULL)
314 			switch (*p) {
315 			case 'b':
316 				o &= ~REG_EXTENDED;
317 				break;
318 			case 'i':
319 				o |= REG_ICASE;
320 				break;
321 			case 's':
322 				o |= REG_NOSUB;
323 				break;
324 			case 'n':
325 				o |= REG_NEWLINE;
326 				break;
327 			case 'm':
328 				o &= ~REG_EXTENDED;
329 				o |= REG_NOSPEC;
330 				break;
331 			case 'p':
332 				o |= REG_PEND;
333 				break;
334 			case '^':
335 				o |= REG_NOTBOL;
336 				break;
337 			case '$':
338 				o |= REG_NOTEOL;
339 				break;
340 			case '#':
341 				o |= REG_STARTEND;
342 				break;
343 			case 't':	/* trace */
344 				o |= REG_TRACE;
345 				break;
346 			case 'l':	/* force long representation */
347 				o |= REG_LARGE;
348 				break;
349 			case 'r':	/* force backref use */
350 				o |= REG_BACKR;
351 				break;
352 			}
353 	return(o);
354 }
355 
356 /*
357  - opt - is a particular option in a regression string?
358  == int opt(int c, char *s);
359  */
360 int				/* predicate */
361 opt(int c, char *s)
362 {
363 	return(strchr(s, c) != NULL);
364 }
365 
366 /*
367  - fixstr - transform magic characters in strings
368  == void fixstr(char *p);
369  */
370 void
371 fixstr(char *p)
372 {
373 	if (p == NULL)
374 		return;
375 
376 	for (; *p != '\0'; p++)
377 		if (*p == 'N')
378 			*p = '\n';
379 		else if (*p == 'T')
380 			*p = '\t';
381 		else if (*p == 'S')
382 			*p = ' ';
383 		else if (*p == 'Z')
384 			*p = '\0';
385 }
386 
387 /*
388  - check - check a substring match
389  == char *check(char *str, regmatch_t sub, char *should);
390  */
391 char *				/* NULL or complaint */
392 check(char *str, regmatch_t sub, char *should)
393 {
394 	int len;
395 	int shlen;
396 	char *p;
397 	static char grump[500];
398 	char *at = NULL;
399 
400 	if (should != NULL && strcmp(should, "-") == 0)
401 		should = NULL;
402 	if (should != NULL && should[0] == '@') {
403 		at = should + 1;
404 		should = "";
405 	}
406 
407 	/* check rm_so and rm_eo for consistency */
408 	if (sub.rm_so > sub.rm_eo || (sub.rm_so == -1 && sub.rm_eo != -1) ||
409 				(sub.rm_so != -1 && sub.rm_eo == -1) ||
410 				(sub.rm_so != -1 && sub.rm_so < 0) ||
411 				(sub.rm_eo != -1 && sub.rm_eo < 0) ) {
412 		sprintf(grump, "start %ld end %ld", (long)sub.rm_so,
413 							(long)sub.rm_eo);
414 		return(grump);
415 	}
416 
417 	/* check for no match */
418 	if (sub.rm_so == -1 && should == NULL)
419 		return(NULL);
420 	if (sub.rm_so == -1)
421 		return("did not match");
422 
423 	/* check for in range */
424 	if (sub.rm_eo > strlen(str)) {
425 		sprintf(grump, "start %ld end %ld, past end of string",
426 		    (long)sub.rm_so, (long)sub.rm_eo);
427 		return(grump);
428 	}
429 
430 	len = (int)(sub.rm_eo - sub.rm_so);
431 	shlen = (int)strlen(should);
432 	p = str + sub.rm_so;
433 
434 	/* check for not supposed to match */
435 	if (should == NULL) {
436 		sprintf(grump, "matched `%.*s'", len, p);
437 		return(grump);
438 	}
439 
440 	/* check for wrong match */
441 	if (len != shlen || strncmp(p, should, (size_t)shlen) != 0) {
442 		sprintf(grump, "matched `%.*s' instead", len, p);
443 		return(grump);
444 	}
445 	if (shlen > 0)
446 		return(NULL);
447 
448 	/* check null match in right place */
449 	if (at == NULL)
450 		return(NULL);
451 	shlen = strlen(at);
452 	if (shlen == 0)
453 		shlen = 1;	/* force check for end-of-string */
454 	if (strncmp(p, at, shlen) != 0) {
455 		sprintf(grump, "matched null at `%.20s'", p);
456 		return(grump);
457 	}
458 	return(NULL);
459 }
460 
461 /*
462  - eprint - convert error number to name
463  == static char *eprint(int err);
464  */
465 static char *
466 eprint(int err)
467 {
468 	static char epbuf[100];
469 	size_t len;
470 
471 	len = regerror(REG_ITOA|err, (regex_t *)NULL, epbuf, sizeof(epbuf));
472 	assert(len <= sizeof(epbuf));
473 	return(epbuf);
474 }
475 
476 /*
477  - efind - convert error name to number
478  == static int efind(char *name);
479  */
480 static int
481 efind(char *name)
482 {
483 	static char efbuf[100];
484 	size_t n;
485 	regex_t re;
486 
487 	sprintf(efbuf, "REG_%s", name);
488 	assert(strlen(efbuf) < sizeof(efbuf));
489 	re.re_endp = efbuf;
490 	(void) regerror(REG_ATOI, &re, efbuf, sizeof(efbuf));
491 	return(atoi(efbuf));
492 }
493