xref: /freebsd/lib/libc/regex/grot/main.c (revision 884a2a699669ec61e2366e3e358342dbc94be24a)
1 #include <sys/cdefs.h>
2 __FBSDID("$FreeBSD$");
3 
4 #include <stdio.h>
5 #include <string.h>
6 #include <sys/types.h>
7 #include <regex.h>
8 #include <assert.h>
9 
10 #include "main.ih"
11 
12 char *progname;
13 int debug = 0;
14 int line = 0;
15 int status = 0;
16 
17 int copts = REG_EXTENDED;
18 int eopts = 0;
19 regoff_t startoff = 0;
20 regoff_t endoff = 0;
21 
22 
23 extern int split();
24 extern void regprint();
25 
26 /*
27  - main - do the simple case, hand off to regress() for regression
28  */
29 main(argc, argv)
30 int argc;
31 char *argv[];
32 {
33 	regex_t re;
34 #	define	NS	10
35 	regmatch_t subs[NS];
36 	char erbuf[100];
37 	int err;
38 	size_t len;
39 	int c;
40 	int errflg = 0;
41 	int i;
42 	extern int optind;
43 	extern char *optarg;
44 
45 	progname = argv[0];
46 
47 	while ((c = getopt(argc, argv, "c:e:S:E:x")) != -1)
48 		switch (c) {
49 		case 'c':	/* compile options */
50 			copts = options('c', optarg);
51 			break;
52 		case 'e':	/* execute options */
53 			eopts = options('e', optarg);
54 			break;
55 		case 'S':	/* start offset */
56 			startoff = (regoff_t)atoi(optarg);
57 			break;
58 		case 'E':	/* end offset */
59 			endoff = (regoff_t)atoi(optarg);
60 			break;
61 		case 'x':	/* Debugging. */
62 			debug++;
63 			break;
64 		case '?':
65 		default:
66 			errflg++;
67 			break;
68 		}
69 	if (errflg) {
70 		fprintf(stderr, "usage: %s ", progname);
71 		fprintf(stderr, "[-c copt][-C][-d] [re]\n");
72 		exit(2);
73 	}
74 
75 	if (optind >= argc) {
76 		regress(stdin);
77 		exit(status);
78 	}
79 
80 	err = regcomp(&re, argv[optind++], copts);
81 	if (err) {
82 		len = regerror(err, &re, erbuf, sizeof(erbuf));
83 		fprintf(stderr, "error %s, %d/%d `%s'\n",
84 			eprint(err), len, sizeof(erbuf), erbuf);
85 		exit(status);
86 	}
87 	regprint(&re, stdout);
88 
89 	if (optind >= argc) {
90 		regfree(&re);
91 		exit(status);
92 	}
93 
94 	if (eopts&REG_STARTEND) {
95 		subs[0].rm_so = startoff;
96 		subs[0].rm_eo = strlen(argv[optind]) - endoff;
97 	}
98 	err = regexec(&re, argv[optind], (size_t)NS, subs, eopts);
99 	if (err) {
100 		len = regerror(err, &re, erbuf, sizeof(erbuf));
101 		fprintf(stderr, "error %s, %d/%d `%s'\n",
102 			eprint(err), len, sizeof(erbuf), erbuf);
103 		exit(status);
104 	}
105 	if (!(copts&REG_NOSUB)) {
106 		len = (int)(subs[0].rm_eo - subs[0].rm_so);
107 		if (subs[0].rm_so != -1) {
108 			if (len != 0)
109 				printf("match `%.*s'\n", len,
110 					argv[optind] + subs[0].rm_so);
111 			else
112 				printf("match `'@%.1s\n",
113 					argv[optind] + subs[0].rm_so);
114 		}
115 		for (i = 1; i < NS; i++)
116 			if (subs[i].rm_so != -1)
117 				printf("(%d) `%.*s'\n", i,
118 					(int)(subs[i].rm_eo - subs[i].rm_so),
119 					argv[optind] + subs[i].rm_so);
120 	}
121 	exit(status);
122 }
123 
124 /*
125  - regress - main loop of regression test
126  == void regress(FILE *in);
127  */
128 void
129 regress(in)
130 FILE *in;
131 {
132 	char inbuf[1000];
133 #	define	MAXF	10
134 	char *f[MAXF];
135 	int nf;
136 	int i;
137 	char erbuf[100];
138 	size_t ne;
139 	char *badpat = "invalid regular expression";
140 #	define	SHORT	10
141 	char *bpname = "REG_BADPAT";
142 	regex_t re;
143 
144 	while (fgets(inbuf, sizeof(inbuf), in) != NULL) {
145 		line++;
146 		if (inbuf[0] == '#' || inbuf[0] == '\n')
147 			continue;			/* NOTE CONTINUE */
148 		inbuf[strlen(inbuf)-1] = '\0';	/* get rid of stupid \n */
149 		if (debug)
150 			fprintf(stdout, "%d:\n", line);
151 		nf = split(inbuf, f, MAXF, "\t\t");
152 		if (nf < 3) {
153 			fprintf(stderr, "bad input, line %d\n", line);
154 			exit(1);
155 		}
156 		for (i = 0; i < nf; i++)
157 			if (strcmp(f[i], "\"\"") == 0)
158 				f[i] = "";
159 		if (nf <= 3)
160 			f[3] = NULL;
161 		if (nf <= 4)
162 			f[4] = NULL;
163 		try(f[0], f[1], f[2], f[3], f[4], options('c', f[1]));
164 		if (opt('&', f[1]))	/* try with either type of RE */
165 			try(f[0], f[1], f[2], f[3], f[4],
166 					options('c', f[1]) &~ REG_EXTENDED);
167 	}
168 
169 	ne = regerror(REG_BADPAT, (regex_t *)NULL, erbuf, sizeof(erbuf));
170 	if (strcmp(erbuf, badpat) != 0 || ne != strlen(badpat)+1) {
171 		fprintf(stderr, "end: regerror() test gave `%s' not `%s'\n",
172 							erbuf, badpat);
173 		status = 1;
174 	}
175 	ne = regerror(REG_BADPAT, (regex_t *)NULL, erbuf, (size_t)SHORT);
176 	if (strncmp(erbuf, badpat, SHORT-1) != 0 || erbuf[SHORT-1] != '\0' ||
177 						ne != strlen(badpat)+1) {
178 		fprintf(stderr, "end: regerror() short test gave `%s' not `%.*s'\n",
179 						erbuf, SHORT-1, badpat);
180 		status = 1;
181 	}
182 	ne = regerror(REG_ITOA|REG_BADPAT, (regex_t *)NULL, erbuf, sizeof(erbuf));
183 	if (strcmp(erbuf, bpname) != 0 || ne != strlen(bpname)+1) {
184 		fprintf(stderr, "end: regerror() ITOA test gave `%s' not `%s'\n",
185 						erbuf, bpname);
186 		status = 1;
187 	}
188 	re.re_endp = bpname;
189 	ne = regerror(REG_ATOI, &re, erbuf, sizeof(erbuf));
190 	if (atoi(erbuf) != (int)REG_BADPAT) {
191 		fprintf(stderr, "end: regerror() ATOI test gave `%s' not `%ld'\n",
192 						erbuf, (long)REG_BADPAT);
193 		status = 1;
194 	} else if (ne != strlen(erbuf)+1) {
195 		fprintf(stderr, "end: regerror() ATOI test len(`%s') = %ld\n",
196 						erbuf, (long)REG_BADPAT);
197 		status = 1;
198 	}
199 }
200 
201 /*
202  - try - try it, and report on problems
203  == void try(char *f0, char *f1, char *f2, char *f3, char *f4, int opts);
204  */
205 void
206 try(f0, f1, f2, f3, f4, opts)
207 char *f0;
208 char *f1;
209 char *f2;
210 char *f3;
211 char *f4;
212 int opts;			/* may not match f1 */
213 {
214 	regex_t re;
215 #	define	NSUBS	10
216 	regmatch_t subs[NSUBS];
217 #	define	NSHOULD	15
218 	char *should[NSHOULD];
219 	int nshould;
220 	char erbuf[100];
221 	int err;
222 	int len;
223 	char *type = (opts & REG_EXTENDED) ? "ERE" : "BRE";
224 	int i;
225 	char *grump;
226 	char f0copy[1000];
227 	char f2copy[1000];
228 
229 	strcpy(f0copy, f0);
230 	re.re_endp = (opts&REG_PEND) ? f0copy + strlen(f0copy) : NULL;
231 	fixstr(f0copy);
232 	err = regcomp(&re, f0copy, opts);
233 	if (err != 0 && (!opt('C', f1) || err != efind(f2))) {
234 		/* unexpected error or wrong error */
235 		len = regerror(err, &re, erbuf, sizeof(erbuf));
236 		fprintf(stderr, "%d: %s error %s, %d/%d `%s'\n",
237 					line, type, eprint(err), len,
238 					sizeof(erbuf), erbuf);
239 		status = 1;
240 	} else if (err == 0 && opt('C', f1)) {
241 		/* unexpected success */
242 		fprintf(stderr, "%d: %s should have given REG_%s\n",
243 						line, type, f2);
244 		status = 1;
245 		err = 1;	/* so we won't try regexec */
246 	}
247 
248 	if (err != 0) {
249 		regfree(&re);
250 		return;
251 	}
252 
253 	strcpy(f2copy, f2);
254 	fixstr(f2copy);
255 
256 	if (options('e', f1)&REG_STARTEND) {
257 		if (strchr(f2, '(') == NULL || strchr(f2, ')') == NULL)
258 			fprintf(stderr, "%d: bad STARTEND syntax\n", line);
259 		subs[0].rm_so = strchr(f2, '(') - f2 + 1;
260 		subs[0].rm_eo = strchr(f2, ')') - f2;
261 	}
262 	err = regexec(&re, f2copy, NSUBS, subs, options('e', f1));
263 
264 	if (err != 0 && (f3 != NULL || err != REG_NOMATCH)) {
265 		/* unexpected error or wrong error */
266 		len = regerror(err, &re, erbuf, sizeof(erbuf));
267 		fprintf(stderr, "%d: %s exec error %s, %d/%d `%s'\n",
268 					line, type, eprint(err), len,
269 					sizeof(erbuf), erbuf);
270 		status = 1;
271 	} else if (err != 0) {
272 		/* nothing more to check */
273 	} else if (f3 == NULL) {
274 		/* unexpected success */
275 		fprintf(stderr, "%d: %s exec should have failed\n",
276 						line, type);
277 		status = 1;
278 		err = 1;		/* just on principle */
279 	} else if (opts&REG_NOSUB) {
280 		/* nothing more to check */
281 	} else if ((grump = check(f2, subs[0], f3)) != NULL) {
282 		fprintf(stderr, "%d: %s %s\n", line, type, grump);
283 		status = 1;
284 		err = 1;
285 	}
286 
287 	if (err != 0 || f4 == NULL) {
288 		regfree(&re);
289 		return;
290 	}
291 
292 	for (i = 1; i < NSHOULD; i++)
293 		should[i] = NULL;
294 	nshould = split(f4, should+1, NSHOULD-1, ",");
295 	if (nshould == 0) {
296 		nshould = 1;
297 		should[1] = "";
298 	}
299 	for (i = 1; i < NSUBS; i++) {
300 		grump = check(f2, subs[i], should[i]);
301 		if (grump != NULL) {
302 			fprintf(stderr, "%d: %s $%d %s\n", line,
303 							type, i, grump);
304 			status = 1;
305 			err = 1;
306 		}
307 	}
308 
309 	regfree(&re);
310 }
311 
312 /*
313  - options - pick options out of a regression-test string
314  == int options(int type, char *s);
315  */
316 int
317 options(type, s)
318 int type;			/* 'c' compile, 'e' exec */
319 char *s;
320 {
321 	char *p;
322 	int o = (type == 'c') ? copts : eopts;
323 	char *legal = (type == 'c') ? "bisnmp" : "^$#tl";
324 
325 	for (p = s; *p != '\0'; p++)
326 		if (strchr(legal, *p) != NULL)
327 			switch (*p) {
328 			case 'b':
329 				o &= ~REG_EXTENDED;
330 				break;
331 			case 'i':
332 				o |= REG_ICASE;
333 				break;
334 			case 's':
335 				o |= REG_NOSUB;
336 				break;
337 			case 'n':
338 				o |= REG_NEWLINE;
339 				break;
340 			case 'm':
341 				o &= ~REG_EXTENDED;
342 				o |= REG_NOSPEC;
343 				break;
344 			case 'p':
345 				o |= REG_PEND;
346 				break;
347 			case '^':
348 				o |= REG_NOTBOL;
349 				break;
350 			case '$':
351 				o |= REG_NOTEOL;
352 				break;
353 			case '#':
354 				o |= REG_STARTEND;
355 				break;
356 			case 't':	/* trace */
357 				o |= REG_TRACE;
358 				break;
359 			case 'l':	/* force long representation */
360 				o |= REG_LARGE;
361 				break;
362 			case 'r':	/* force backref use */
363 				o |= REG_BACKR;
364 				break;
365 			}
366 	return(o);
367 }
368 
369 /*
370  - opt - is a particular option in a regression string?
371  == int opt(int c, char *s);
372  */
373 int				/* predicate */
374 opt(c, s)
375 int c;
376 char *s;
377 {
378 	return(strchr(s, c) != NULL);
379 }
380 
381 /*
382  - fixstr - transform magic characters in strings
383  == void fixstr(char *p);
384  */
385 void
386 fixstr(p)
387 char *p;
388 {
389 	if (p == NULL)
390 		return;
391 
392 	for (; *p != '\0'; p++)
393 		if (*p == 'N')
394 			*p = '\n';
395 		else if (*p == 'T')
396 			*p = '\t';
397 		else if (*p == 'S')
398 			*p = ' ';
399 		else if (*p == 'Z')
400 			*p = '\0';
401 }
402 
403 /*
404  - check - check a substring match
405  == char *check(char *str, regmatch_t sub, char *should);
406  */
407 char *				/* NULL or complaint */
408 check(str, sub, should)
409 char *str;
410 regmatch_t sub;
411 char *should;
412 {
413 	int len;
414 	int shlen;
415 	char *p;
416 	static char grump[500];
417 	char *at = NULL;
418 
419 	if (should != NULL && strcmp(should, "-") == 0)
420 		should = NULL;
421 	if (should != NULL && should[0] == '@') {
422 		at = should + 1;
423 		should = "";
424 	}
425 
426 	/* check rm_so and rm_eo for consistency */
427 	if (sub.rm_so > sub.rm_eo || (sub.rm_so == -1 && sub.rm_eo != -1) ||
428 				(sub.rm_so != -1 && sub.rm_eo == -1) ||
429 				(sub.rm_so != -1 && sub.rm_so < 0) ||
430 				(sub.rm_eo != -1 && sub.rm_eo < 0) ) {
431 		sprintf(grump, "start %ld end %ld", (long)sub.rm_so,
432 							(long)sub.rm_eo);
433 		return(grump);
434 	}
435 
436 	/* check for no match */
437 	if (sub.rm_so == -1 && should == NULL)
438 		return(NULL);
439 	if (sub.rm_so == -1)
440 		return("did not match");
441 
442 	/* check for in range */
443 	if (sub.rm_eo > strlen(str)) {
444 		sprintf(grump, "start %ld end %ld, past end of string",
445 					(long)sub.rm_so, (long)sub.rm_eo);
446 		return(grump);
447 	}
448 
449 	len = (int)(sub.rm_eo - sub.rm_so);
450 	shlen = (int)strlen(should);
451 	p = str + sub.rm_so;
452 
453 	/* check for not supposed to match */
454 	if (should == NULL) {
455 		sprintf(grump, "matched `%.*s'", len, p);
456 		return(grump);
457 	}
458 
459 	/* check for wrong match */
460 	if (len != shlen || strncmp(p, should, (size_t)shlen) != 0) {
461 		sprintf(grump, "matched `%.*s' instead", len, p);
462 		return(grump);
463 	}
464 	if (shlen > 0)
465 		return(NULL);
466 
467 	/* check null match in right place */
468 	if (at == NULL)
469 		return(NULL);
470 	shlen = strlen(at);
471 	if (shlen == 0)
472 		shlen = 1;	/* force check for end-of-string */
473 	if (strncmp(p, at, shlen) != 0) {
474 		sprintf(grump, "matched null at `%.20s'", p);
475 		return(grump);
476 	}
477 	return(NULL);
478 }
479 
480 /*
481  - eprint - convert error number to name
482  == static char *eprint(int err);
483  */
484 static char *
485 eprint(err)
486 int err;
487 {
488 	static char epbuf[100];
489 	size_t len;
490 
491 	len = regerror(REG_ITOA|err, (regex_t *)NULL, epbuf, sizeof(epbuf));
492 	assert(len <= sizeof(epbuf));
493 	return(epbuf);
494 }
495 
496 /*
497  - efind - convert error name to number
498  == static int efind(char *name);
499  */
500 static int
501 efind(name)
502 char *name;
503 {
504 	static char efbuf[100];
505 	size_t n;
506 	regex_t re;
507 
508 	sprintf(efbuf, "REG_%s", name);
509 	assert(strlen(efbuf) < sizeof(efbuf));
510 	re.re_endp = efbuf;
511 	(void) regerror(REG_ATOI, &re, efbuf, sizeof(efbuf));
512 	return(atoi(efbuf));
513 }
514