1 #include <sys/types.h> 2 #include <assert.h> 3 #include <regex.h> 4 #include <stdio.h> 5 #include <stdlib.h> 6 #include <string.h> 7 #include <unistd.h> 8 9 #include "debug.ih" 10 #include "main.ih" 11 #include "split.ih" 12 13 char *progname; 14 int debug = 0; 15 int line = 0; 16 int status = 0; 17 18 int copts = REG_EXTENDED; 19 int eopts = 0; 20 regoff_t startoff = 0; 21 regoff_t endoff = 0; 22 23 24 /* 25 - main - do the simple case, hand off to regress() for regression 26 */ 27 int 28 main(int argc, char **argv) 29 { 30 regex_t re; 31 # define NS 10 32 regmatch_t subs[NS]; 33 char erbuf[100]; 34 int err; 35 size_t len; 36 int c; 37 int errflg = 0; 38 int i; 39 extern int optind; 40 extern char *optarg; 41 42 progname = argv[0]; 43 44 while ((c = getopt(argc, argv, "c:e:S:E:x")) != -1) 45 switch (c) { 46 case 'c': /* compile options */ 47 copts = options('c', optarg); 48 break; 49 case 'e': /* execute options */ 50 eopts = options('e', optarg); 51 break; 52 case 'S': /* start offset */ 53 startoff = (regoff_t)atoi(optarg); 54 break; 55 case 'E': /* end offset */ 56 endoff = (regoff_t)atoi(optarg); 57 break; 58 case 'x': /* Debugging. */ 59 debug++; 60 break; 61 case '?': 62 default: 63 errflg++; 64 break; 65 } 66 if (errflg) { 67 fprintf(stderr, "usage: %s ", progname); 68 fprintf(stderr, "[-c copt][-C][-d] [re]\n"); 69 exit(2); 70 } 71 72 if (optind >= argc) { 73 regress(stdin); 74 exit(status); 75 } 76 77 err = regcomp(&re, argv[optind++], copts); 78 if (err) { 79 len = regerror(err, &re, erbuf, sizeof(erbuf)); 80 fprintf(stderr, "error %s, %zu/%zu `%s'\n", 81 eprint(err), len, sizeof(erbuf), erbuf); 82 exit(status); 83 } 84 regprint(&re, stdout); 85 86 if (optind >= argc) { 87 regfree(&re); 88 exit(status); 89 } 90 91 if ((eopts & REG_STARTEND) != 0) { 92 subs[0].rm_so = startoff; 93 subs[0].rm_eo = strlen(argv[optind]) - endoff; 94 } 95 err = regexec(&re, argv[optind], (size_t)NS, subs, eopts); 96 if (err) { 97 len = regerror(err, &re, erbuf, sizeof(erbuf)); 98 fprintf(stderr, "error %s, %zu/%zu `%s'\n", 99 eprint(err), len, sizeof(erbuf), erbuf); 100 exit(status); 101 } 102 if ((copts & REG_NOSUB) == 0) { 103 len = (int)(subs[0].rm_eo - subs[0].rm_so); 104 if (subs[0].rm_so != -1) { 105 if (len != 0) 106 printf("match `%.*s'\n", (int)len, 107 argv[optind] + subs[0].rm_so); 108 else 109 printf("match `'@%.1s\n", 110 argv[optind] + subs[0].rm_so); 111 } 112 for (i = 1; i < NS; i++) 113 if (subs[i].rm_so != -1) 114 printf("(%d) `%.*s'\n", i, 115 (int)(subs[i].rm_eo - subs[i].rm_so), 116 argv[optind] + subs[i].rm_so); 117 } 118 exit(status); 119 } 120 121 /* 122 - regress - main loop of regression test 123 == void regress(FILE *in); 124 */ 125 void 126 regress(FILE *in) 127 { 128 char inbuf[1000]; 129 # define MAXF 10 130 char *f[MAXF]; 131 int nf; 132 int i; 133 char erbuf[100]; 134 size_t ne; 135 char *badpat = "invalid regular expression"; 136 # define SHORT 10 137 char *bpname = "REG_BADPAT"; 138 regex_t re; 139 140 while (fgets(inbuf, sizeof(inbuf), in) != NULL) { 141 line++; 142 if (inbuf[0] == '#' || inbuf[0] == '\n') 143 continue; /* NOTE CONTINUE */ 144 inbuf[strlen(inbuf)-1] = '\0'; /* get rid of stupid \n */ 145 if (debug) 146 fprintf(stdout, "%d:\n", line); 147 nf = split(inbuf, f, MAXF, "\t\t"); 148 if (nf < 3) { 149 fprintf(stderr, "bad input, line %d\n", line); 150 exit(1); 151 } 152 for (i = 0; i < nf; i++) 153 if (strcmp(f[i], "\"\"") == 0) 154 f[i] = ""; 155 if (nf <= 3) 156 f[3] = NULL; 157 if (nf <= 4) 158 f[4] = NULL; 159 try(f[0], f[1], f[2], f[3], f[4], options('c', f[1])); 160 if (opt('&', f[1])) /* try with either type of RE */ 161 try(f[0], f[1], f[2], f[3], f[4], 162 options('c', f[1]) &~ REG_EXTENDED); 163 } 164 165 ne = regerror(REG_BADPAT, (regex_t *)NULL, erbuf, sizeof(erbuf)); 166 if (strcmp(erbuf, badpat) != 0 || ne != strlen(badpat)+1) { 167 fprintf(stderr, "end: regerror() test gave `%s' not `%s'\n", 168 erbuf, badpat); 169 status = 1; 170 } 171 ne = regerror(REG_BADPAT, (regex_t *)NULL, erbuf, (size_t)SHORT); 172 if (strncmp(erbuf, badpat, SHORT-1) != 0 || erbuf[SHORT-1] != '\0' || 173 ne != strlen(badpat)+1) { 174 fprintf(stderr, "end: regerror() short test gave `%s' not `%.*s'\n", 175 erbuf, SHORT-1, badpat); 176 status = 1; 177 } 178 ne = regerror(REG_ITOA|REG_BADPAT, (regex_t *)NULL, erbuf, sizeof(erbuf)); 179 if (strcmp(erbuf, bpname) != 0 || ne != strlen(bpname) + 1) { 180 fprintf(stderr, "end: regerror() ITOA test gave `%s' not `%s'\n", 181 erbuf, bpname); 182 status = 1; 183 } 184 re.re_endp = bpname; 185 ne = regerror(REG_ATOI, &re, erbuf, sizeof(erbuf)); 186 if (atoi(erbuf) != (int)REG_BADPAT) { 187 fprintf(stderr, "end: regerror() ATOI test gave `%s' not `%ld'\n", 188 erbuf, (long)REG_BADPAT); 189 status = 1; 190 } else if (ne != strlen(erbuf) + 1) { 191 fprintf(stderr, "end: regerror() ATOI test len(`%s') = %ld\n", 192 erbuf, (long)REG_BADPAT); 193 status = 1; 194 } 195 } 196 197 /* 198 - try - try it, and report on problems 199 == void try(char *f0, char *f1, char *f2, char *f3, char *f4, int opts); 200 - opts: may not match f1 201 */ 202 void 203 try(char *f0, char *f1, char *f2, char *f3, char *f4, int opts) 204 { 205 regex_t re; 206 # define NSUBS 10 207 regmatch_t subs[NSUBS]; 208 # define NSHOULD 15 209 char *should[NSHOULD]; 210 char erbuf[100]; 211 size_t len; 212 int err, i, nshould; 213 char *grump; 214 char *type = (opts & REG_EXTENDED) ? "ERE" : "BRE"; 215 char f0copy[1000]; 216 char f2copy[1000]; 217 218 strcpy(f0copy, f0); 219 re.re_endp = (opts®_PEND) ? f0copy + strlen(f0copy) : NULL; 220 fixstr(f0copy); 221 err = regcomp(&re, f0copy, opts); 222 if (err != 0 && (!opt('C', f1) || err != efind(f2))) { 223 /* unexpected error or wrong error */ 224 len = regerror(err, &re, erbuf, sizeof(erbuf)); 225 fprintf(stderr, "%d: %s error %s, %zu/%zu `%s'\n", 226 line, type, eprint(err), len, sizeof(erbuf), erbuf); 227 status = 1; 228 } else if (err == 0 && opt('C', f1)) { 229 /* unexpected success */ 230 fprintf(stderr, "%d: %s should have given REG_%s\n", 231 line, type, f2); 232 status = 1; 233 err = 1; /* so we won't try regexec */ 234 } 235 236 if (err != 0) { 237 regfree(&re); 238 return; 239 } 240 241 strcpy(f2copy, f2); 242 fixstr(f2copy); 243 244 if (options('e', f1)®_STARTEND) { 245 if (strchr(f2, '(') == NULL || strchr(f2, ')') == NULL) 246 fprintf(stderr, "%d: bad STARTEND syntax\n", line); 247 subs[0].rm_so = strchr(f2, '(') - f2 + 1; 248 subs[0].rm_eo = strchr(f2, ')') - f2; 249 } 250 err = regexec(&re, f2copy, NSUBS, subs, options('e', f1)); 251 252 if (err != 0 && (f3 != NULL || err != REG_NOMATCH)) { 253 /* unexpected error or wrong error */ 254 len = regerror(err, &re, erbuf, sizeof(erbuf)); 255 fprintf(stderr, "%d: %s exec error %s, %zu/%zu `%s'\n", 256 line, type, eprint(err), len, sizeof(erbuf), erbuf); 257 status = 1; 258 } else if (err != 0) { 259 /* nothing more to check */ 260 } else if (f3 == NULL) { 261 /* unexpected success */ 262 fprintf(stderr, "%d: %s exec should have failed\n", 263 line, type); 264 status = 1; 265 err = 1; /* just on principle */ 266 } else if (opts®_NOSUB) { 267 /* nothing more to check */ 268 } else if ((grump = check(f2, subs[0], f3)) != NULL) { 269 fprintf(stderr, "%d: %s %s\n", line, type, grump); 270 status = 1; 271 err = 1; 272 } 273 274 if (err != 0 || f4 == NULL) { 275 regfree(&re); 276 return; 277 } 278 279 for (i = 1; i < NSHOULD; i++) 280 should[i] = NULL; 281 nshould = split(f4, should+1, NSHOULD-1, ","); 282 if (nshould == 0) { 283 nshould = 1; 284 should[1] = ""; 285 } 286 for (i = 1; i < NSUBS; i++) { 287 grump = check(f2, subs[i], should[i]); 288 if (grump != NULL) { 289 fprintf(stderr, "%d: %s $%d %s\n", line, 290 type, i, grump); 291 status = 1; 292 err = 1; 293 } 294 } 295 296 regfree(&re); 297 } 298 299 /* 300 - options - pick options out of a regression-test string 301 - type: 'c' - compile, 'e' - exec 302 == int options(int type, char *s); 303 */ 304 int 305 options(int type, char *s) 306 { 307 char *p; 308 int o = (type == 'c') ? copts : eopts; 309 char *legal = (type == 'c') ? "bisnmp" : "^$#tl"; 310 311 for (p = s; *p != '\0'; p++) 312 if (strchr(legal, *p) != NULL) 313 switch (*p) { 314 case 'b': 315 o &= ~REG_EXTENDED; 316 break; 317 case 'i': 318 o |= REG_ICASE; 319 break; 320 case 's': 321 o |= REG_NOSUB; 322 break; 323 case 'n': 324 o |= REG_NEWLINE; 325 break; 326 case 'm': 327 o &= ~REG_EXTENDED; 328 o |= REG_NOSPEC; 329 break; 330 case 'p': 331 o |= REG_PEND; 332 break; 333 case '^': 334 o |= REG_NOTBOL; 335 break; 336 case '$': 337 o |= REG_NOTEOL; 338 break; 339 case '#': 340 o |= REG_STARTEND; 341 break; 342 case 't': /* trace */ 343 o |= REG_TRACE; 344 break; 345 case 'l': /* force long representation */ 346 o |= REG_LARGE; 347 break; 348 case 'r': /* force backref use */ 349 o |= REG_BACKR; 350 break; 351 } 352 return(o); 353 } 354 355 /* 356 - opt - is a particular option in a regression string? 357 == int opt(int c, char *s); 358 */ 359 int /* predicate */ 360 opt(int c, char *s) 361 { 362 return(strchr(s, c) != NULL); 363 } 364 365 /* 366 - fixstr - transform magic characters in strings 367 == void fixstr(char *p); 368 */ 369 void 370 fixstr(char *p) 371 { 372 if (p == NULL) 373 return; 374 375 for (; *p != '\0'; p++) 376 if (*p == 'N') 377 *p = '\n'; 378 else if (*p == 'T') 379 *p = '\t'; 380 else if (*p == 'S') 381 *p = ' '; 382 else if (*p == 'Z') 383 *p = '\0'; 384 } 385 386 /* 387 - check - check a substring match 388 == char *check(char *str, regmatch_t sub, char *should); 389 */ 390 char * /* NULL or complaint */ 391 check(char *str, regmatch_t sub, char *should) 392 { 393 int len; 394 int shlen; 395 char *p; 396 static char grump[500]; 397 char *at = NULL; 398 399 if (should != NULL && strcmp(should, "-") == 0) 400 should = NULL; 401 if (should != NULL && should[0] == '@') { 402 at = should + 1; 403 should = ""; 404 } 405 406 /* check rm_so and rm_eo for consistency */ 407 if (sub.rm_so > sub.rm_eo || (sub.rm_so == -1 && sub.rm_eo != -1) || 408 (sub.rm_so != -1 && sub.rm_eo == -1) || 409 (sub.rm_so != -1 && sub.rm_so < 0) || 410 (sub.rm_eo != -1 && sub.rm_eo < 0) ) { 411 sprintf(grump, "start %ld end %ld", (long)sub.rm_so, 412 (long)sub.rm_eo); 413 return(grump); 414 } 415 416 /* check for no match */ 417 if (sub.rm_so == -1 && should == NULL) 418 return(NULL); 419 if (sub.rm_so == -1) 420 return("did not match"); 421 422 /* check for in range */ 423 if (sub.rm_eo > strlen(str)) { 424 sprintf(grump, "start %ld end %ld, past end of string", 425 (long)sub.rm_so, (long)sub.rm_eo); 426 return(grump); 427 } 428 429 len = (int)(sub.rm_eo - sub.rm_so); 430 shlen = (int)strlen(should); 431 p = str + sub.rm_so; 432 433 /* check for not supposed to match */ 434 if (should == NULL) { 435 sprintf(grump, "matched `%.*s'", len, p); 436 return(grump); 437 } 438 439 /* check for wrong match */ 440 if (len != shlen || strncmp(p, should, (size_t)shlen) != 0) { 441 sprintf(grump, "matched `%.*s' instead", len, p); 442 return(grump); 443 } 444 if (shlen > 0) 445 return(NULL); 446 447 /* check null match in right place */ 448 if (at == NULL) 449 return(NULL); 450 shlen = strlen(at); 451 if (shlen == 0) 452 shlen = 1; /* force check for end-of-string */ 453 if (strncmp(p, at, shlen) != 0) { 454 sprintf(grump, "matched null at `%.20s'", p); 455 return(grump); 456 } 457 return(NULL); 458 } 459 460 /* 461 - eprint - convert error number to name 462 == static char *eprint(int err); 463 */ 464 static char * 465 eprint(int err) 466 { 467 static char epbuf[100]; 468 size_t len; 469 470 len = regerror(REG_ITOA|err, (regex_t *)NULL, epbuf, sizeof(epbuf)); 471 assert(len <= sizeof(epbuf)); 472 return(epbuf); 473 } 474 475 /* 476 - efind - convert error name to number 477 == static int efind(char *name); 478 */ 479 static int 480 efind(char *name) 481 { 482 static char efbuf[100]; 483 size_t n; 484 regex_t re; 485 486 sprintf(efbuf, "REG_%s", name); 487 assert(strlen(efbuf) < sizeof(efbuf)); 488 re.re_endp = efbuf; 489 (void) regerror(REG_ATOI, &re, efbuf, sizeof(efbuf)); 490 return(atoi(efbuf)); 491 } 492