1 #include <sys/types.h>
2 #include <assert.h>
3 #include <regex.h>
4 #include <stdio.h>
5 #include <stdlib.h>
6 #include <string.h>
7 #include <unistd.h>
8
9 #include "debug.ih"
10 #include "main.ih"
11 #include "split.ih"
12
13 char *progname;
14 int debug = 0;
15 int line = 0;
16 int status = 0;
17
18 int copts = REG_EXTENDED;
19 int eopts = 0;
20 regoff_t startoff = 0;
21 regoff_t endoff = 0;
22
23
24 /*
25 - main - do the simple case, hand off to regress() for regression
26 */
27 int
main(int argc,char ** argv)28 main(int argc, char **argv)
29 {
30 regex_t re;
31 # define NS 10
32 regmatch_t subs[NS];
33 char erbuf[100];
34 int err;
35 size_t len;
36 int c;
37 int errflg = 0;
38 int i;
39 extern int optind;
40 extern char *optarg;
41
42 progname = argv[0];
43
44 while ((c = getopt(argc, argv, "c:e:S:E:x")) != -1)
45 switch (c) {
46 case 'c': /* compile options */
47 copts = options('c', optarg);
48 break;
49 case 'e': /* execute options */
50 eopts = options('e', optarg);
51 break;
52 case 'S': /* start offset */
53 startoff = (regoff_t)atoi(optarg);
54 break;
55 case 'E': /* end offset */
56 endoff = (regoff_t)atoi(optarg);
57 break;
58 case 'x': /* Debugging. */
59 debug++;
60 break;
61 case '?':
62 default:
63 errflg++;
64 break;
65 }
66 if (errflg) {
67 fprintf(stderr, "usage: %s ", progname);
68 fprintf(stderr, "[-c copt][-C][-d] [re]\n");
69 exit(2);
70 }
71
72 if (optind >= argc) {
73 regress(stdin);
74 exit(status);
75 }
76
77 err = regcomp(&re, argv[optind++], copts);
78 if (err) {
79 len = regerror(err, &re, erbuf, sizeof(erbuf));
80 fprintf(stderr, "error %s, %zu/%zu `%s'\n",
81 eprint(err), len, sizeof(erbuf), erbuf);
82 exit(status);
83 }
84 regprint(&re, stdout);
85
86 if (optind >= argc) {
87 regfree(&re);
88 exit(status);
89 }
90
91 if ((eopts & REG_STARTEND) != 0) {
92 subs[0].rm_so = startoff;
93 subs[0].rm_eo = strlen(argv[optind]) - endoff;
94 }
95 err = regexec(&re, argv[optind], (size_t)NS, subs, eopts);
96 if (err) {
97 len = regerror(err, &re, erbuf, sizeof(erbuf));
98 fprintf(stderr, "error %s, %zu/%zu `%s'\n",
99 eprint(err), len, sizeof(erbuf), erbuf);
100 exit(status);
101 }
102 if ((copts & REG_NOSUB) == 0) {
103 len = (int)(subs[0].rm_eo - subs[0].rm_so);
104 if (subs[0].rm_so != -1) {
105 if (len != 0)
106 printf("match `%.*s'\n", (int)len,
107 argv[optind] + subs[0].rm_so);
108 else
109 printf("match `'@%.1s\n",
110 argv[optind] + subs[0].rm_so);
111 }
112 for (i = 1; i < NS; i++)
113 if (subs[i].rm_so != -1)
114 printf("(%d) `%.*s'\n", i,
115 (int)(subs[i].rm_eo - subs[i].rm_so),
116 argv[optind] + subs[i].rm_so);
117 }
118 exit(status);
119 }
120
121 /*
122 - regress - main loop of regression test
123 == void regress(FILE *in);
124 */
125 void
regress(FILE * in)126 regress(FILE *in)
127 {
128 char inbuf[1000];
129 # define MAXF 10
130 char *f[MAXF];
131 int nf;
132 int i;
133 char erbuf[100];
134 size_t ne;
135 char *badpat = "invalid regular expression";
136 # define SHORT 10
137 char *bpname = "REG_BADPAT";
138 regex_t re;
139
140 while (fgets(inbuf, sizeof(inbuf), in) != NULL) {
141 line++;
142 if (inbuf[0] == '#' || inbuf[0] == '\n')
143 continue; /* NOTE CONTINUE */
144 inbuf[strlen(inbuf)-1] = '\0'; /* get rid of stupid \n */
145 if (debug)
146 fprintf(stdout, "%d:\n", line);
147 nf = split(inbuf, f, MAXF, "\t\t");
148 if (nf < 3) {
149 fprintf(stderr, "bad input, line %d\n", line);
150 exit(1);
151 }
152 for (i = 0; i < nf; i++)
153 if (strcmp(f[i], "\"\"") == 0)
154 f[i] = "";
155 if (nf <= 3)
156 f[3] = NULL;
157 if (nf <= 4)
158 f[4] = NULL;
159 try(f[0], f[1], f[2], f[3], f[4], options('c', f[1]));
160 if (opt('&', f[1])) /* try with either type of RE */
161 try(f[0], f[1], f[2], f[3], f[4],
162 options('c', f[1]) &~ REG_EXTENDED);
163 }
164
165 ne = regerror(REG_BADPAT, (regex_t *)NULL, erbuf, sizeof(erbuf));
166 if (strcmp(erbuf, badpat) != 0 || ne != strlen(badpat)+1) {
167 fprintf(stderr, "end: regerror() test gave `%s' not `%s'\n",
168 erbuf, badpat);
169 status = 1;
170 }
171 ne = regerror(REG_BADPAT, (regex_t *)NULL, erbuf, (size_t)SHORT);
172 if (strncmp(erbuf, badpat, SHORT-1) != 0 || erbuf[SHORT-1] != '\0' ||
173 ne != strlen(badpat)+1) {
174 fprintf(stderr, "end: regerror() short test gave `%s' not `%.*s'\n",
175 erbuf, SHORT-1, badpat);
176 status = 1;
177 }
178 ne = regerror(REG_ITOA|REG_BADPAT, (regex_t *)NULL, erbuf, sizeof(erbuf));
179 if (strcmp(erbuf, bpname) != 0 || ne != strlen(bpname) + 1) {
180 fprintf(stderr, "end: regerror() ITOA test gave `%s' not `%s'\n",
181 erbuf, bpname);
182 status = 1;
183 }
184 re.re_endp = bpname;
185 ne = regerror(REG_ATOI, &re, erbuf, sizeof(erbuf));
186 if (atoi(erbuf) != (int)REG_BADPAT) {
187 fprintf(stderr, "end: regerror() ATOI test gave `%s' not `%ld'\n",
188 erbuf, (long)REG_BADPAT);
189 status = 1;
190 } else if (ne != strlen(erbuf) + 1) {
191 fprintf(stderr, "end: regerror() ATOI test len(`%s') = %ld\n",
192 erbuf, (long)REG_BADPAT);
193 status = 1;
194 }
195 }
196
197 /*
198 - try - try it, and report on problems
199 == void try(char *f0, char *f1, char *f2, char *f3, char *f4, int opts);
200 - opts: may not match f1
201 */
202 void
try(char * f0,char * f1,char * f2,char * f3,char * f4,int opts)203 try(char *f0, char *f1, char *f2, char *f3, char *f4, int opts)
204 {
205 regex_t re;
206 # define NSUBS 10
207 regmatch_t subs[NSUBS];
208 # define NSHOULD 15
209 char *should[NSHOULD];
210 char erbuf[100];
211 size_t len;
212 int err, i, nshould;
213 char *grump;
214 char *type = (opts & REG_EXTENDED) ? "ERE" : "BRE";
215 char f0copy[1000];
216 char f2copy[1000];
217
218 strcpy(f0copy, f0);
219 re.re_endp = (opts®_PEND) ? f0copy + strlen(f0copy) : NULL;
220 fixstr(f0copy);
221 err = regcomp(&re, f0copy, opts);
222 if (err != 0 && (!opt('C', f1) || err != efind(f2))) {
223 /* unexpected error or wrong error */
224 len = regerror(err, &re, erbuf, sizeof(erbuf));
225 fprintf(stderr, "%d: %s error %s, %zu/%zu `%s'\n",
226 line, type, eprint(err), len, sizeof(erbuf), erbuf);
227 status = 1;
228 } else if (err == 0 && opt('C', f1)) {
229 /* unexpected success */
230 fprintf(stderr, "%d: %s should have given REG_%s\n",
231 line, type, f2);
232 status = 1;
233 err = 1; /* so we won't try regexec */
234 }
235
236 if (err != 0) {
237 regfree(&re);
238 return;
239 }
240
241 strcpy(f2copy, f2);
242 fixstr(f2copy);
243
244 if (options('e', f1)®_STARTEND) {
245 if (strchr(f2, '(') == NULL || strchr(f2, ')') == NULL)
246 fprintf(stderr, "%d: bad STARTEND syntax\n", line);
247 subs[0].rm_so = strchr(f2, '(') - f2 + 1;
248 subs[0].rm_eo = strchr(f2, ')') - f2;
249 }
250 err = regexec(&re, f2copy, NSUBS, subs, options('e', f1));
251
252 if (err != 0 && (f3 != NULL || err != REG_NOMATCH)) {
253 /* unexpected error or wrong error */
254 len = regerror(err, &re, erbuf, sizeof(erbuf));
255 fprintf(stderr, "%d: %s exec error %s, %zu/%zu `%s'\n",
256 line, type, eprint(err), len, sizeof(erbuf), erbuf);
257 status = 1;
258 } else if (err != 0) {
259 /* nothing more to check */
260 } else if (f3 == NULL) {
261 /* unexpected success */
262 fprintf(stderr, "%d: %s exec should have failed\n",
263 line, type);
264 status = 1;
265 err = 1; /* just on principle */
266 } else if (opts®_NOSUB) {
267 /* nothing more to check */
268 } else if ((grump = check(f2, subs[0], f3)) != NULL) {
269 fprintf(stderr, "%d: %s %s\n", line, type, grump);
270 status = 1;
271 err = 1;
272 }
273
274 if (err != 0 || f4 == NULL) {
275 regfree(&re);
276 return;
277 }
278
279 for (i = 1; i < NSHOULD; i++)
280 should[i] = NULL;
281 nshould = split(f4, should+1, NSHOULD-1, ",");
282 if (nshould == 0) {
283 nshould = 1;
284 should[1] = "";
285 }
286 for (i = 1; i < NSUBS; i++) {
287 grump = check(f2, subs[i], should[i]);
288 if (grump != NULL) {
289 fprintf(stderr, "%d: %s $%d %s\n", line,
290 type, i, grump);
291 status = 1;
292 err = 1;
293 }
294 }
295
296 regfree(&re);
297 }
298
299 /*
300 - options - pick options out of a regression-test string
301 - type: 'c' - compile, 'e' - exec
302 == int options(int type, char *s);
303 */
304 int
options(int type,char * s)305 options(int type, char *s)
306 {
307 char *p;
308 int o = (type == 'c') ? copts : eopts;
309 char *legal = (type == 'c') ? "bisnmp" : "^$#tl";
310
311 for (p = s; *p != '\0'; p++)
312 if (strchr(legal, *p) != NULL)
313 switch (*p) {
314 case 'b':
315 o &= ~REG_EXTENDED;
316 break;
317 case 'i':
318 o |= REG_ICASE;
319 break;
320 case 's':
321 o |= REG_NOSUB;
322 break;
323 case 'n':
324 o |= REG_NEWLINE;
325 break;
326 case 'm':
327 o &= ~REG_EXTENDED;
328 o |= REG_NOSPEC;
329 break;
330 case 'p':
331 o |= REG_PEND;
332 break;
333 case '^':
334 o |= REG_NOTBOL;
335 break;
336 case '$':
337 o |= REG_NOTEOL;
338 break;
339 case '#':
340 o |= REG_STARTEND;
341 break;
342 case 't': /* trace */
343 o |= REG_TRACE;
344 break;
345 case 'l': /* force long representation */
346 o |= REG_LARGE;
347 break;
348 case 'r': /* force backref use */
349 o |= REG_BACKR;
350 break;
351 }
352 return(o);
353 }
354
355 /*
356 - opt - is a particular option in a regression string?
357 == int opt(int c, char *s);
358 */
359 int /* predicate */
opt(int c,char * s)360 opt(int c, char *s)
361 {
362 return(strchr(s, c) != NULL);
363 }
364
365 /*
366 - fixstr - transform magic characters in strings
367 == void fixstr(char *p);
368 */
369 void
fixstr(char * p)370 fixstr(char *p)
371 {
372 if (p == NULL)
373 return;
374
375 for (; *p != '\0'; p++)
376 if (*p == 'N')
377 *p = '\n';
378 else if (*p == 'T')
379 *p = '\t';
380 else if (*p == 'S')
381 *p = ' ';
382 else if (*p == 'Z')
383 *p = '\0';
384 }
385
386 /*
387 - check - check a substring match
388 == char *check(char *str, regmatch_t sub, char *should);
389 */
390 char * /* NULL or complaint */
check(char * str,regmatch_t sub,char * should)391 check(char *str, regmatch_t sub, char *should)
392 {
393 int len;
394 int shlen;
395 char *p;
396 static char grump[500];
397 char *at = NULL;
398
399 if (should != NULL && strcmp(should, "-") == 0)
400 should = NULL;
401 if (should != NULL && should[0] == '@') {
402 at = should + 1;
403 should = "";
404 }
405
406 /* check rm_so and rm_eo for consistency */
407 if (sub.rm_so > sub.rm_eo || (sub.rm_so == -1 && sub.rm_eo != -1) ||
408 (sub.rm_so != -1 && sub.rm_eo == -1) ||
409 (sub.rm_so != -1 && sub.rm_so < 0) ||
410 (sub.rm_eo != -1 && sub.rm_eo < 0) ) {
411 sprintf(grump, "start %ld end %ld", (long)sub.rm_so,
412 (long)sub.rm_eo);
413 return(grump);
414 }
415
416 /* check for no match */
417 if (sub.rm_so == -1 && should == NULL)
418 return(NULL);
419 if (sub.rm_so == -1)
420 return("did not match");
421
422 /* check for in range */
423 if (sub.rm_eo > strlen(str)) {
424 sprintf(grump, "start %ld end %ld, past end of string",
425 (long)sub.rm_so, (long)sub.rm_eo);
426 return(grump);
427 }
428
429 len = (int)(sub.rm_eo - sub.rm_so);
430 shlen = (int)strlen(should);
431 p = str + sub.rm_so;
432
433 /* check for not supposed to match */
434 if (should == NULL) {
435 sprintf(grump, "matched `%.*s'", len, p);
436 return(grump);
437 }
438
439 /* check for wrong match */
440 if (len != shlen || strncmp(p, should, (size_t)shlen) != 0) {
441 sprintf(grump, "matched `%.*s' instead", len, p);
442 return(grump);
443 }
444 if (shlen > 0)
445 return(NULL);
446
447 /* check null match in right place */
448 if (at == NULL)
449 return(NULL);
450 shlen = strlen(at);
451 if (shlen == 0)
452 shlen = 1; /* force check for end-of-string */
453 if (strncmp(p, at, shlen) != 0) {
454 sprintf(grump, "matched null at `%.20s'", p);
455 return(grump);
456 }
457 return(NULL);
458 }
459
460 /*
461 - eprint - convert error number to name
462 == static char *eprint(int err);
463 */
464 static char *
eprint(int err)465 eprint(int err)
466 {
467 static char epbuf[100];
468 size_t len;
469
470 len = regerror(REG_ITOA|err, (regex_t *)NULL, epbuf, sizeof(epbuf));
471 assert(len <= sizeof(epbuf));
472 return(epbuf);
473 }
474
475 /*
476 - efind - convert error name to number
477 == static int efind(char *name);
478 */
479 static int
efind(char * name)480 efind(char *name)
481 {
482 static char efbuf[100];
483 size_t n;
484 regex_t re;
485
486 sprintf(efbuf, "REG_%s", name);
487 assert(strlen(efbuf) < sizeof(efbuf));
488 re.re_endp = efbuf;
489 (void) regerror(REG_ATOI, &re, efbuf, sizeof(efbuf));
490 return(atoi(efbuf));
491 }
492