xref: /freebsd/lib/libc/regex/grot/main.c (revision 1dce0e7706dcbdd32384a78691b6500eff201b03)
1 #include <sys/cdefs.h>
2 __FBSDID("$FreeBSD$");
3 
4 #include <sys/types.h>
5 #include <assert.h>
6 #include <regex.h>
7 #include <stdio.h>
8 #include <stdlib.h>
9 #include <string.h>
10 #include <unistd.h>
11 
12 #include "main.ih"
13 
14 char *progname;
15 int debug = 0;
16 int line = 0;
17 int status = 0;
18 
19 int copts = REG_EXTENDED;
20 int eopts = 0;
21 regoff_t startoff = 0;
22 regoff_t endoff = 0;
23 
24 
25 extern int split();
26 extern void regprint();
27 
28 /*
29  - main - do the simple case, hand off to regress() for regression
30  */
31 int
32 main(int argc, char **argv)
33 {
34 	regex_t re;
35 #	define	NS	10
36 	regmatch_t subs[NS];
37 	char erbuf[100];
38 	int err;
39 	size_t len;
40 	int c;
41 	int errflg = 0;
42 	int i;
43 	extern int optind;
44 	extern char *optarg;
45 
46 	progname = argv[0];
47 
48 	while ((c = getopt(argc, argv, "c:e:S:E:x")) != -1)
49 		switch (c) {
50 		case 'c':	/* compile options */
51 			copts = options('c', optarg);
52 			break;
53 		case 'e':	/* execute options */
54 			eopts = options('e', optarg);
55 			break;
56 		case 'S':	/* start offset */
57 			startoff = (regoff_t)atoi(optarg);
58 			break;
59 		case 'E':	/* end offset */
60 			endoff = (regoff_t)atoi(optarg);
61 			break;
62 		case 'x':	/* Debugging. */
63 			debug++;
64 			break;
65 		case '?':
66 		default:
67 			errflg++;
68 			break;
69 		}
70 	if (errflg) {
71 		fprintf(stderr, "usage: %s ", progname);
72 		fprintf(stderr, "[-c copt][-C][-d] [re]\n");
73 		exit(2);
74 	}
75 
76 	if (optind >= argc) {
77 		regress(stdin);
78 		exit(status);
79 	}
80 
81 	err = regcomp(&re, argv[optind++], copts);
82 	if (err) {
83 		len = regerror(err, &re, erbuf, sizeof(erbuf));
84 		fprintf(stderr, "error %s, %d/%d `%s'\n",
85 			eprint(err), len, sizeof(erbuf), erbuf);
86 		exit(status);
87 	}
88 	regprint(&re, stdout);
89 
90 	if (optind >= argc) {
91 		regfree(&re);
92 		exit(status);
93 	}
94 
95 	if (eopts&REG_STARTEND) {
96 		subs[0].rm_so = startoff;
97 		subs[0].rm_eo = strlen(argv[optind]) - endoff;
98 	}
99 	err = regexec(&re, argv[optind], (size_t)NS, subs, eopts);
100 	if (err) {
101 		len = regerror(err, &re, erbuf, sizeof(erbuf));
102 		fprintf(stderr, "error %s, %d/%d `%s'\n",
103 			eprint(err), len, sizeof(erbuf), erbuf);
104 		exit(status);
105 	}
106 	if (!(copts&REG_NOSUB)) {
107 		len = (int)(subs[0].rm_eo - subs[0].rm_so);
108 		if (subs[0].rm_so != -1) {
109 			if (len != 0)
110 				printf("match `%.*s'\n", len,
111 					argv[optind] + subs[0].rm_so);
112 			else
113 				printf("match `'@%.1s\n",
114 					argv[optind] + subs[0].rm_so);
115 		}
116 		for (i = 1; i < NS; i++)
117 			if (subs[i].rm_so != -1)
118 				printf("(%d) `%.*s'\n", i,
119 					(int)(subs[i].rm_eo - subs[i].rm_so),
120 					argv[optind] + subs[i].rm_so);
121 	}
122 	exit(status);
123 }
124 
125 /*
126  - regress - main loop of regression test
127  == void regress(FILE *in);
128  */
129 void
130 regress(FILE *in)
131 {
132 	char inbuf[1000];
133 #	define	MAXF	10
134 	char *f[MAXF];
135 	int nf;
136 	int i;
137 	char erbuf[100];
138 	size_t ne;
139 	char *badpat = "invalid regular expression";
140 #	define	SHORT	10
141 	char *bpname = "REG_BADPAT";
142 	regex_t re;
143 
144 	while (fgets(inbuf, sizeof(inbuf), in) != NULL) {
145 		line++;
146 		if (inbuf[0] == '#' || inbuf[0] == '\n')
147 			continue;			/* NOTE CONTINUE */
148 		inbuf[strlen(inbuf)-1] = '\0';	/* get rid of stupid \n */
149 		if (debug)
150 			fprintf(stdout, "%d:\n", line);
151 		nf = split(inbuf, f, MAXF, "\t\t");
152 		if (nf < 3) {
153 			fprintf(stderr, "bad input, line %d\n", line);
154 			exit(1);
155 		}
156 		for (i = 0; i < nf; i++)
157 			if (strcmp(f[i], "\"\"") == 0)
158 				f[i] = "";
159 		if (nf <= 3)
160 			f[3] = NULL;
161 		if (nf <= 4)
162 			f[4] = NULL;
163 		try(f[0], f[1], f[2], f[3], f[4], options('c', f[1]));
164 		if (opt('&', f[1]))	/* try with either type of RE */
165 			try(f[0], f[1], f[2], f[3], f[4],
166 					options('c', f[1]) &~ REG_EXTENDED);
167 	}
168 
169 	ne = regerror(REG_BADPAT, (regex_t *)NULL, erbuf, sizeof(erbuf));
170 	if (strcmp(erbuf, badpat) != 0 || ne != strlen(badpat)+1) {
171 		fprintf(stderr, "end: regerror() test gave `%s' not `%s'\n",
172 							erbuf, badpat);
173 		status = 1;
174 	}
175 	ne = regerror(REG_BADPAT, (regex_t *)NULL, erbuf, (size_t)SHORT);
176 	if (strncmp(erbuf, badpat, SHORT-1) != 0 || erbuf[SHORT-1] != '\0' ||
177 						ne != strlen(badpat)+1) {
178 		fprintf(stderr, "end: regerror() short test gave `%s' not `%.*s'\n",
179 						erbuf, SHORT-1, badpat);
180 		status = 1;
181 	}
182 	ne = regerror(REG_ITOA|REG_BADPAT, (regex_t *)NULL, erbuf, sizeof(erbuf));
183 	if (strcmp(erbuf, bpname) != 0 || ne != strlen(bpname)+1) {
184 		fprintf(stderr, "end: regerror() ITOA test gave `%s' not `%s'\n",
185 						erbuf, bpname);
186 		status = 1;
187 	}
188 	re.re_endp = bpname;
189 	ne = regerror(REG_ATOI, &re, erbuf, sizeof(erbuf));
190 	if (atoi(erbuf) != (int)REG_BADPAT) {
191 		fprintf(stderr, "end: regerror() ATOI test gave `%s' not `%ld'\n",
192 						erbuf, (long)REG_BADPAT);
193 		status = 1;
194 	} else if (ne != strlen(erbuf)+1) {
195 		fprintf(stderr, "end: regerror() ATOI test len(`%s') = %ld\n",
196 						erbuf, (long)REG_BADPAT);
197 		status = 1;
198 	}
199 }
200 
201 /*
202  - try - try it, and report on problems
203  == void try(char *f0, char *f1, char *f2, char *f3, char *f4, int opts);
204  - opts: may not match f1
205  */
206 void
207 try(char *f0, char *f1, char *f2, char *f3, char *f4, int opts)
208 {
209 	regex_t re;
210 #	define	NSUBS	10
211 	regmatch_t subs[NSUBS];
212 #	define	NSHOULD	15
213 	char *should[NSHOULD];
214 	int nshould;
215 	char erbuf[100];
216 	int err;
217 	int len;
218 	char *type = (opts & REG_EXTENDED) ? "ERE" : "BRE";
219 	int i;
220 	char *grump;
221 	char f0copy[1000];
222 	char f2copy[1000];
223 
224 	strcpy(f0copy, f0);
225 	re.re_endp = (opts&REG_PEND) ? f0copy + strlen(f0copy) : NULL;
226 	fixstr(f0copy);
227 	err = regcomp(&re, f0copy, opts);
228 	if (err != 0 && (!opt('C', f1) || err != efind(f2))) {
229 		/* unexpected error or wrong error */
230 		len = regerror(err, &re, erbuf, sizeof(erbuf));
231 		fprintf(stderr, "%d: %s error %s, %d/%d `%s'\n",
232 					line, type, eprint(err), len,
233 					sizeof(erbuf), erbuf);
234 		status = 1;
235 	} else if (err == 0 && opt('C', f1)) {
236 		/* unexpected success */
237 		fprintf(stderr, "%d: %s should have given REG_%s\n",
238 						line, type, f2);
239 		status = 1;
240 		err = 1;	/* so we won't try regexec */
241 	}
242 
243 	if (err != 0) {
244 		regfree(&re);
245 		return;
246 	}
247 
248 	strcpy(f2copy, f2);
249 	fixstr(f2copy);
250 
251 	if (options('e', f1)&REG_STARTEND) {
252 		if (strchr(f2, '(') == NULL || strchr(f2, ')') == NULL)
253 			fprintf(stderr, "%d: bad STARTEND syntax\n", line);
254 		subs[0].rm_so = strchr(f2, '(') - f2 + 1;
255 		subs[0].rm_eo = strchr(f2, ')') - f2;
256 	}
257 	err = regexec(&re, f2copy, NSUBS, subs, options('e', f1));
258 
259 	if (err != 0 && (f3 != NULL || err != REG_NOMATCH)) {
260 		/* unexpected error or wrong error */
261 		len = regerror(err, &re, erbuf, sizeof(erbuf));
262 		fprintf(stderr, "%d: %s exec error %s, %d/%d `%s'\n",
263 					line, type, eprint(err), len,
264 					sizeof(erbuf), erbuf);
265 		status = 1;
266 	} else if (err != 0) {
267 		/* nothing more to check */
268 	} else if (f3 == NULL) {
269 		/* unexpected success */
270 		fprintf(stderr, "%d: %s exec should have failed\n",
271 						line, type);
272 		status = 1;
273 		err = 1;		/* just on principle */
274 	} else if (opts&REG_NOSUB) {
275 		/* nothing more to check */
276 	} else if ((grump = check(f2, subs[0], f3)) != NULL) {
277 		fprintf(stderr, "%d: %s %s\n", line, type, grump);
278 		status = 1;
279 		err = 1;
280 	}
281 
282 	if (err != 0 || f4 == NULL) {
283 		regfree(&re);
284 		return;
285 	}
286 
287 	for (i = 1; i < NSHOULD; i++)
288 		should[i] = NULL;
289 	nshould = split(f4, should+1, NSHOULD-1, ",");
290 	if (nshould == 0) {
291 		nshould = 1;
292 		should[1] = "";
293 	}
294 	for (i = 1; i < NSUBS; i++) {
295 		grump = check(f2, subs[i], should[i]);
296 		if (grump != NULL) {
297 			fprintf(stderr, "%d: %s $%d %s\n", line,
298 							type, i, grump);
299 			status = 1;
300 			err = 1;
301 		}
302 	}
303 
304 	regfree(&re);
305 }
306 
307 /*
308  - options - pick options out of a regression-test string
309  - type: 'c' - compile, 'e' - exec
310  == int options(int type, char *s);
311  */
312 int
313 options(int type, char *s)
314 {
315 	char *p;
316 	int o = (type == 'c') ? copts : eopts;
317 	char *legal = (type == 'c') ? "bisnmp" : "^$#tl";
318 
319 	for (p = s; *p != '\0'; p++)
320 		if (strchr(legal, *p) != NULL)
321 			switch (*p) {
322 			case 'b':
323 				o &= ~REG_EXTENDED;
324 				break;
325 			case 'i':
326 				o |= REG_ICASE;
327 				break;
328 			case 's':
329 				o |= REG_NOSUB;
330 				break;
331 			case 'n':
332 				o |= REG_NEWLINE;
333 				break;
334 			case 'm':
335 				o &= ~REG_EXTENDED;
336 				o |= REG_NOSPEC;
337 				break;
338 			case 'p':
339 				o |= REG_PEND;
340 				break;
341 			case '^':
342 				o |= REG_NOTBOL;
343 				break;
344 			case '$':
345 				o |= REG_NOTEOL;
346 				break;
347 			case '#':
348 				o |= REG_STARTEND;
349 				break;
350 			case 't':	/* trace */
351 				o |= REG_TRACE;
352 				break;
353 			case 'l':	/* force long representation */
354 				o |= REG_LARGE;
355 				break;
356 			case 'r':	/* force backref use */
357 				o |= REG_BACKR;
358 				break;
359 			}
360 	return(o);
361 }
362 
363 /*
364  - opt - is a particular option in a regression string?
365  == int opt(int c, char *s);
366  */
367 int				/* predicate */
368 opt(int c, char *s)
369 {
370 	return(strchr(s, c) != NULL);
371 }
372 
373 /*
374  - fixstr - transform magic characters in strings
375  == void fixstr(char *p);
376  */
377 void
378 fixstr(char *p)
379 {
380 	if (p == NULL)
381 		return;
382 
383 	for (; *p != '\0'; p++)
384 		if (*p == 'N')
385 			*p = '\n';
386 		else if (*p == 'T')
387 			*p = '\t';
388 		else if (*p == 'S')
389 			*p = ' ';
390 		else if (*p == 'Z')
391 			*p = '\0';
392 }
393 
394 /*
395  - check - check a substring match
396  == char *check(char *str, regmatch_t sub, char *should);
397  */
398 char *				/* NULL or complaint */
399 check(char *str, regmatch_t sub, char *should)
400 {
401 	int len;
402 	int shlen;
403 	char *p;
404 	static char grump[500];
405 	char *at = NULL;
406 
407 	if (should != NULL && strcmp(should, "-") == 0)
408 		should = NULL;
409 	if (should != NULL && should[0] == '@') {
410 		at = should + 1;
411 		should = "";
412 	}
413 
414 	/* check rm_so and rm_eo for consistency */
415 	if (sub.rm_so > sub.rm_eo || (sub.rm_so == -1 && sub.rm_eo != -1) ||
416 				(sub.rm_so != -1 && sub.rm_eo == -1) ||
417 				(sub.rm_so != -1 && sub.rm_so < 0) ||
418 				(sub.rm_eo != -1 && sub.rm_eo < 0) ) {
419 		sprintf(grump, "start %ld end %ld", (long)sub.rm_so,
420 							(long)sub.rm_eo);
421 		return(grump);
422 	}
423 
424 	/* check for no match */
425 	if (sub.rm_so == -1 && should == NULL)
426 		return(NULL);
427 	if (sub.rm_so == -1)
428 		return("did not match");
429 
430 	/* check for in range */
431 	if (sub.rm_eo > strlen(str)) {
432 		sprintf(grump, "start %ld end %ld, past end of string",
433 					(long)sub.rm_so, (long)sub.rm_eo);
434 		return(grump);
435 	}
436 
437 	len = (int)(sub.rm_eo - sub.rm_so);
438 	shlen = (int)strlen(should);
439 	p = str + sub.rm_so;
440 
441 	/* check for not supposed to match */
442 	if (should == NULL) {
443 		sprintf(grump, "matched `%.*s'", len, p);
444 		return(grump);
445 	}
446 
447 	/* check for wrong match */
448 	if (len != shlen || strncmp(p, should, (size_t)shlen) != 0) {
449 		sprintf(grump, "matched `%.*s' instead", len, p);
450 		return(grump);
451 	}
452 	if (shlen > 0)
453 		return(NULL);
454 
455 	/* check null match in right place */
456 	if (at == NULL)
457 		return(NULL);
458 	shlen = strlen(at);
459 	if (shlen == 0)
460 		shlen = 1;	/* force check for end-of-string */
461 	if (strncmp(p, at, shlen) != 0) {
462 		sprintf(grump, "matched null at `%.20s'", p);
463 		return(grump);
464 	}
465 	return(NULL);
466 }
467 
468 /*
469  - eprint - convert error number to name
470  == static char *eprint(int err);
471  */
472 static char *
473 eprint(int err)
474 {
475 	static char epbuf[100];
476 	size_t len;
477 
478 	len = regerror(REG_ITOA|err, (regex_t *)NULL, epbuf, sizeof(epbuf));
479 	assert(len <= sizeof(epbuf));
480 	return(epbuf);
481 }
482 
483 /*
484  - efind - convert error name to number
485  == static int efind(char *name);
486  */
487 static int
488 efind(char *name)
489 {
490 	static char efbuf[100];
491 	size_t n;
492 	regex_t re;
493 
494 	sprintf(efbuf, "REG_%s", name);
495 	assert(strlen(efbuf) < sizeof(efbuf));
496 	re.re_endp = efbuf;
497 	(void) regerror(REG_ATOI, &re, efbuf, sizeof(efbuf));
498 	return(atoi(efbuf));
499 }
500