1 /* $NetBSD: main.c,v 1.2 2011/09/16 16:13:18 plunky Exp $ */ 2 3 /*- 4 * Copyright (c) 1993 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 17 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 18 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 19 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 20 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 21 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 22 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 23 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 24 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 25 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 26 * POSSIBILITY OF SUCH DAMAGE. 27 */ 28 29 #include <assert.h> 30 #include <regex.h> 31 #include <stdio.h> 32 #include <stdlib.h> 33 #include <string.h> 34 #include <unistd.h> 35 36 #include <sys/types.h> 37 38 #include "test_regex.h" 39 40 char *progname; 41 int debug = 0; 42 int line = 0; 43 int status = 0; 44 45 int copts = REG_EXTENDED; 46 int eopts = 0; 47 regoff_t startoff = 0; 48 regoff_t endoff = 0; 49 50 static char empty = '\0'; 51 52 static char *eprint(int); 53 static int efind(char *); 54 55 /* 56 * main - do the simple case, hand off to regress() for regression 57 */ 58 int 59 main(int argc, char *argv[]) 60 { 61 regex_t re; 62 # define NS 10 63 regmatch_t subs[NS]; 64 char erbuf[100]; 65 int err; 66 size_t len; 67 int c; 68 int errflg = 0; 69 int i; 70 extern int optind; 71 extern char *optarg; 72 73 progname = argv[0]; 74 75 while ((c = getopt(argc, argv, "c:e:S:E:x")) != -1) 76 switch (c) { 77 case 'c': /* compile options */ 78 copts = options('c', optarg); 79 break; 80 case 'e': /* execute options */ 81 eopts = options('e', optarg); 82 break; 83 case 'S': /* start offset */ 84 startoff = (regoff_t)atoi(optarg); 85 break; 86 case 'E': /* end offset */ 87 endoff = (regoff_t)atoi(optarg); 88 break; 89 case 'x': /* Debugging. */ 90 debug++; 91 break; 92 case '?': 93 default: 94 errflg++; 95 break; 96 } 97 if (errflg) { 98 fprintf(stderr, "usage: %s ", progname); 99 fprintf(stderr, "[-c copt][-C][-d] [re]\n"); 100 exit(2); 101 } 102 103 if (optind >= argc) { 104 regress(stdin); 105 exit(status); 106 } 107 108 err = regcomp(&re, argv[optind++], copts); 109 if (err) { 110 len = regerror(err, &re, erbuf, sizeof(erbuf)); 111 fprintf(stderr, "error %s, %zd/%zd `%s'\n", 112 eprint(err), len, (size_t)sizeof(erbuf), erbuf); 113 exit(status); 114 } 115 regprint(&re, stdout); 116 117 if (optind >= argc) { 118 regfree(&re); 119 exit(status); 120 } 121 122 if (eopts®_STARTEND) { 123 subs[0].rm_so = startoff; 124 subs[0].rm_eo = strlen(argv[optind]) - endoff; 125 } 126 err = regexec(&re, argv[optind], (size_t)NS, subs, eopts); 127 if (err) { 128 len = regerror(err, &re, erbuf, sizeof(erbuf)); 129 fprintf(stderr, "error %s, %zd/%zd `%s'\n", 130 eprint(err), len, (size_t)sizeof(erbuf), erbuf); 131 exit(status); 132 } 133 if (!(copts®_NOSUB)) { 134 len = (int)(subs[0].rm_eo - subs[0].rm_so); 135 if (subs[0].rm_so != -1) { 136 if (len != 0) 137 printf("match `%.*s'\n", (int)len, 138 argv[optind] + subs[0].rm_so); 139 else 140 printf("match `'@%.1s\n", 141 argv[optind] + subs[0].rm_so); 142 } 143 for (i = 1; i < NS; i++) 144 if (subs[i].rm_so != -1) 145 printf("(%d) `%.*s'\n", i, 146 (int)(subs[i].rm_eo - subs[i].rm_so), 147 argv[optind] + subs[i].rm_so); 148 } 149 exit(status); 150 } 151 152 /* 153 * regress - main loop of regression test 154 */ 155 void 156 regress(FILE *in) 157 { 158 char inbuf[1000]; 159 # define MAXF 10 160 char *f[MAXF]; 161 int nf; 162 int i; 163 char erbuf[100]; 164 size_t ne; 165 const char *badpat = "invalid regular expression"; 166 # define SHORT 10 167 const char *bpname = "REG_BADPAT"; 168 regex_t re; 169 170 while (fgets(inbuf, sizeof(inbuf), in) != NULL) { 171 line++; 172 if (inbuf[0] == '#' || inbuf[0] == '\n') 173 continue; /* NOTE CONTINUE */ 174 inbuf[strlen(inbuf)-1] = '\0'; /* get rid of stupid \n */ 175 if (debug) 176 fprintf(stdout, "%d:\n", line); 177 nf = split(inbuf, f, MAXF, "\t\t"); 178 if (nf < 3) { 179 fprintf(stderr, "bad input, line %d\n", line); 180 exit(1); 181 } 182 for (i = 0; i < nf; i++) 183 if (strcmp(f[i], "\"\"") == 0) 184 f[i] = ∅ 185 if (nf <= 3) 186 f[3] = NULL; 187 if (nf <= 4) 188 f[4] = NULL; 189 try(f[0], f[1], f[2], f[3], f[4], options('c', f[1])); 190 if (opt('&', f[1])) /* try with either type of RE */ 191 try(f[0], f[1], f[2], f[3], f[4], 192 options('c', f[1]) &~ REG_EXTENDED); 193 } 194 195 ne = regerror(REG_BADPAT, NULL, erbuf, sizeof(erbuf)); 196 if (strcmp(erbuf, badpat) != 0 || ne != strlen(badpat)+1) { 197 fprintf(stderr, "end: regerror() test gave `%s' not `%s'\n", 198 erbuf, badpat); 199 status = 1; 200 } 201 ne = regerror(REG_BADPAT, NULL, erbuf, (size_t)SHORT); 202 if (strncmp(erbuf, badpat, SHORT-1) != 0 || erbuf[SHORT-1] != '\0' || 203 ne != strlen(badpat)+1) { 204 fprintf(stderr, "end: regerror() short test gave `%s' not `%.*s'\n", 205 erbuf, SHORT-1, badpat); 206 status = 1; 207 } 208 ne = regerror(REG_ITOA|REG_BADPAT, NULL, erbuf, sizeof(erbuf)); 209 if (strcmp(erbuf, bpname) != 0 || ne != strlen(bpname)+1) { 210 fprintf(stderr, "end: regerror() ITOA test gave `%s' not `%s'\n", 211 erbuf, bpname); 212 status = 1; 213 } 214 re.re_endp = bpname; 215 ne = regerror(REG_ATOI, &re, erbuf, sizeof(erbuf)); 216 if (atoi(erbuf) != (int)REG_BADPAT) { 217 fprintf(stderr, "end: regerror() ATOI test gave `%s' not `%ld'\n", 218 erbuf, (long)REG_BADPAT); 219 status = 1; 220 } else if (ne != strlen(erbuf)+1) { 221 fprintf(stderr, "end: regerror() ATOI test len(`%s') = %ld\n", 222 erbuf, (long)REG_BADPAT); 223 status = 1; 224 } 225 } 226 227 /* 228 - try - try it, and report on problems 229 == void try(char *f0, char *f1, char *f2, char *f3, char *f4, int opts); 230 */ 231 void 232 try(char *f0, char *f1, char *f2, char *f3, char *f4, int opts) 233 { 234 regex_t re; 235 # define NSUBS 10 236 regmatch_t subs[NSUBS]; 237 # define NSHOULD 15 238 char *should[NSHOULD]; 239 int nshould; 240 char erbuf[100]; 241 int err; 242 int len; 243 const char *type = (opts & REG_EXTENDED) ? "ERE" : "BRE"; 244 int i; 245 char *grump; 246 char f0copy[1000]; 247 char f2copy[1000]; 248 249 strcpy(f0copy, f0); 250 re.re_endp = (opts®_PEND) ? f0copy + strlen(f0copy) : NULL; 251 fixstr(f0copy); 252 err = regcomp(&re, f0copy, opts); 253 if (err != 0 && (!opt('C', f1) || err != efind(f2))) { 254 /* unexpected error or wrong error */ 255 len = regerror(err, &re, erbuf, sizeof(erbuf)); 256 fprintf(stderr, "%d: %s error %s, %d/%d `%s'\n", 257 line, type, eprint(err), len, 258 (int)sizeof(erbuf), erbuf); 259 status = 1; 260 } else if (err == 0 && opt('C', f1)) { 261 /* unexpected success */ 262 fprintf(stderr, "%d: %s should have given REG_%s\n", 263 line, type, f2); 264 status = 1; 265 err = 1; /* so we won't try regexec */ 266 } 267 268 if (err != 0) { 269 regfree(&re); 270 return; 271 } 272 273 strcpy(f2copy, f2); 274 fixstr(f2copy); 275 276 if (options('e', f1)®_STARTEND) { 277 if (strchr(f2, '(') == NULL || strchr(f2, ')') == NULL) 278 fprintf(stderr, "%d: bad STARTEND syntax\n", line); 279 subs[0].rm_so = strchr(f2, '(') - f2 + 1; 280 subs[0].rm_eo = strchr(f2, ')') - f2; 281 } 282 err = regexec(&re, f2copy, NSUBS, subs, options('e', f1)); 283 284 if (err != 0 && (f3 != NULL || err != REG_NOMATCH)) { 285 /* unexpected error or wrong error */ 286 len = regerror(err, &re, erbuf, sizeof(erbuf)); 287 fprintf(stderr, "%d: %s exec error %s, %d/%d `%s'\n", 288 line, type, eprint(err), len, 289 (int)sizeof(erbuf), erbuf); 290 status = 1; 291 } else if (err != 0) { 292 /* nothing more to check */ 293 } else if (f3 == NULL) { 294 /* unexpected success */ 295 fprintf(stderr, "%d: %s exec should have failed\n", 296 line, type); 297 status = 1; 298 err = 1; /* just on principle */ 299 } else if (opts®_NOSUB) { 300 /* nothing more to check */ 301 } else if ((grump = check(f2, subs[0], f3)) != NULL) { 302 fprintf(stderr, "%d: %s %s\n", line, type, grump); 303 status = 1; 304 err = 1; 305 } 306 307 if (err != 0 || f4 == NULL) { 308 regfree(&re); 309 return; 310 } 311 312 for (i = 1; i < NSHOULD; i++) 313 should[i] = NULL; 314 nshould = split(f4, &should[1], NSHOULD-1, ","); 315 if (nshould == 0) { 316 nshould = 1; 317 should[1] = ∅ 318 } 319 for (i = 1; i < NSUBS; i++) { 320 grump = check(f2, subs[i], should[i]); 321 if (grump != NULL) { 322 fprintf(stderr, "%d: %s $%d %s\n", line, 323 type, i, grump); 324 status = 1; 325 err = 1; 326 } 327 } 328 329 regfree(&re); 330 } 331 332 /* 333 - options - pick options out of a regression-test string 334 == int options(int type, char *s); 335 */ 336 int 337 options(int type, char *s) 338 { 339 char *p; 340 int o = (type == 'c') ? copts : eopts; 341 const char *legal = (type == 'c') ? "bisnmp" : "^$#tl"; 342 343 for (p = s; *p != '\0'; p++) 344 if (strchr(legal, *p) != NULL) 345 switch (*p) { 346 case 'b': 347 o &= ~REG_EXTENDED; 348 break; 349 case 'i': 350 o |= REG_ICASE; 351 break; 352 case 's': 353 o |= REG_NOSUB; 354 break; 355 case 'n': 356 o |= REG_NEWLINE; 357 break; 358 case 'm': 359 o &= ~REG_EXTENDED; 360 o |= REG_NOSPEC; 361 break; 362 case 'p': 363 o |= REG_PEND; 364 break; 365 case '^': 366 o |= REG_NOTBOL; 367 break; 368 case '$': 369 o |= REG_NOTEOL; 370 break; 371 case '#': 372 o |= REG_STARTEND; 373 break; 374 case 't': /* trace */ 375 o |= REG_TRACE; 376 break; 377 case 'l': /* force long representation */ 378 o |= REG_LARGE; 379 break; 380 case 'r': /* force backref use */ 381 o |= REG_BACKR; 382 break; 383 } 384 return(o); 385 } 386 387 /* 388 - opt - is a particular option in a regression string? 389 == int opt(int c, char *s); 390 */ 391 int /* predicate */ 392 opt(int c, char *s) 393 { 394 return(strchr(s, c) != NULL); 395 } 396 397 /* 398 - fixstr - transform magic characters in strings 399 == void fixstr(char *p); 400 */ 401 void 402 fixstr(char *p) 403 { 404 if (p == NULL) 405 return; 406 407 for (; *p != '\0'; p++) 408 if (*p == 'N') 409 *p = '\n'; 410 else if (*p == 'T') 411 *p = '\t'; 412 else if (*p == 'S') 413 *p = ' '; 414 else if (*p == 'Z') 415 *p = '\0'; 416 } 417 418 /* 419 * check - check a substring match 420 */ 421 char * /* NULL or complaint */ 422 check(char *str, regmatch_t sub, char *should) 423 { 424 int len; 425 int shlen; 426 char *p; 427 static char grump[500]; 428 char *at = NULL; 429 430 if (should != NULL && strcmp(should, "-") == 0) 431 should = NULL; 432 if (should != NULL && should[0] == '@') { 433 at = should + 1; 434 should = ∅ 435 } 436 437 /* check rm_so and rm_eo for consistency */ 438 if (sub.rm_so > sub.rm_eo || (sub.rm_so == -1 && sub.rm_eo != -1) || 439 (sub.rm_so != -1 && sub.rm_eo == -1) || 440 (sub.rm_so != -1 && sub.rm_so < 0) || 441 (sub.rm_eo != -1 && sub.rm_eo < 0) ) { 442 sprintf(grump, "start %ld end %ld", (long)sub.rm_so, 443 (long)sub.rm_eo); 444 return(grump); 445 } 446 447 /* check for no match */ 448 if (sub.rm_so == -1) { 449 if (should == NULL) 450 return(NULL); 451 else { 452 sprintf(grump, "did not match"); 453 return(grump); 454 } 455 } 456 457 /* check for in range */ 458 if (sub.rm_eo > (ssize_t)strlen(str)) { 459 sprintf(grump, "start %ld end %ld, past end of string", 460 (long)sub.rm_so, (long)sub.rm_eo); 461 return(grump); 462 } 463 464 len = (int)(sub.rm_eo - sub.rm_so); 465 p = str + sub.rm_so; 466 467 /* check for not supposed to match */ 468 if (should == NULL) { 469 sprintf(grump, "matched `%.*s'", len, p); 470 return(grump); 471 } 472 473 /* check for wrong match */ 474 shlen = (int)strlen(should); 475 if (len != shlen || strncmp(p, should, (size_t)shlen) != 0) { 476 sprintf(grump, "matched `%.*s' instead", len, p); 477 return(grump); 478 } 479 if (shlen > 0) 480 return(NULL); 481 482 /* check null match in right place */ 483 if (at == NULL) 484 return(NULL); 485 shlen = strlen(at); 486 if (shlen == 0) 487 shlen = 1; /* force check for end-of-string */ 488 if (strncmp(p, at, shlen) != 0) { 489 sprintf(grump, "matched null at `%.20s'", p); 490 return(grump); 491 } 492 return(NULL); 493 } 494 495 /* 496 * eprint - convert error number to name 497 */ 498 static char * 499 eprint(int err) 500 { 501 static char epbuf[100]; 502 size_t len; 503 504 len = regerror(REG_ITOA|err, NULL, epbuf, sizeof(epbuf)); 505 assert(len <= sizeof(epbuf)); 506 return(epbuf); 507 } 508 509 /* 510 * efind - convert error name to number 511 */ 512 static int 513 efind(char *name) 514 { 515 static char efbuf[100]; 516 regex_t re; 517 518 sprintf(efbuf, "REG_%s", name); 519 assert(strlen(efbuf) < sizeof(efbuf)); 520 re.re_endp = efbuf; 521 (void) regerror(REG_ATOI, &re, efbuf, sizeof(efbuf)); 522 return(atoi(efbuf)); 523 } 524