1 /*- 2 * Copyright (c) 1992 Keith Muller. 3 * Copyright (c) 1992, 1993 4 * The Regents of the University of California. All rights reserved. 5 * 6 * This code is derived from software contributed to Berkeley by 7 * Keith Muller of the University of California, San Diego. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 3. All advertising materials mentioning features or use of this software 18 * must display the following acknowledgement: 19 * This product includes software developed by the University of 20 * California, Berkeley and its contributors. 21 * 4. Neither the name of the University nor the names of its contributors 22 * may be used to endorse or promote products derived from this software 23 * without specific prior written permission. 24 * 25 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 26 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 27 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 28 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 29 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 30 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 31 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 32 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 33 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 34 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 35 * SUCH DAMAGE. 36 */ 37 38 #ifndef lint 39 #if 0 40 static char sccsid[] = "@(#)pat_rep.c 8.2 (Berkeley) 4/18/94"; 41 #endif 42 static const char rcsid[] = 43 "$FreeBSD$"; 44 #endif /* not lint */ 45 46 #include <sys/types.h> 47 #include <sys/stat.h> 48 #include <stdio.h> 49 #include <string.h> 50 #include <unistd.h> 51 #include <stdlib.h> 52 #include <errno.h> 53 #ifdef NET2_REGEX 54 #include <regexp.h> 55 #else 56 #include <regex.h> 57 #endif 58 #include "pax.h" 59 #include "pat_rep.h" 60 #include "extern.h" 61 62 /* 63 * routines to handle pattern matching, name modification (regular expression 64 * substitution and interactive renames), and destination name modification for 65 * copy (-rw). Both file name and link names are adjusted as required in these 66 * routines. 67 */ 68 69 #define MAXSUBEXP 10 /* max subexpressions, DO NOT CHANGE */ 70 static PATTERN *pathead = NULL; /* file pattern match list head */ 71 static PATTERN *pattail = NULL; /* file pattern match list tail */ 72 static REPLACE *rephead = NULL; /* replacement string list head */ 73 static REPLACE *reptail = NULL; /* replacement string list tail */ 74 75 static int rep_name(char *, int *, int); 76 static int tty_rename(ARCHD *); 77 static int fix_path(char *, int *, char *, int); 78 static int fn_match(char *, char *, char **); 79 static char * range_match(char *, int); 80 #ifdef NET2_REGEX 81 static int resub(regexp *, char *, char *, char *); 82 #else 83 static int resub(regex_t *, regmatch_t *, char *, char *, char *); 84 #endif 85 86 /* 87 * rep_add() 88 * parses the -s replacement string; compiles the regular expression 89 * and stores the compiled value and it's replacement string together in 90 * replacement string list. Input to this function is of the form: 91 * /old/new/pg 92 * The first char in the string specifies the delimiter used by this 93 * replacement string. "Old" is a regular expression in "ed" format which 94 * is compiled by regcomp() and is applied to filenames. "new" is the 95 * substitution string; p and g are options flags for printing and global 96 * replacement (over the single filename) 97 * Return: 98 * 0 if a proper replacement string and regular expression was added to 99 * the list of replacement patterns; -1 otherwise. 100 */ 101 102 int 103 rep_add(char *str) 104 { 105 char *pt1; 106 char *pt2; 107 REPLACE *rep; 108 # ifndef NET2_REGEX 109 int res; 110 char rebuf[BUFSIZ]; 111 # endif 112 113 /* 114 * throw out the bad parameters 115 */ 116 if ((str == NULL) || (*str == '\0')) { 117 paxwarn(1, "Empty replacement string"); 118 return(-1); 119 } 120 121 /* 122 * first character in the string specifies what the delimiter is for 123 * this expression 124 */ 125 if ((pt1 = strchr(str+1, *str)) == NULL) { 126 paxwarn(1, "Invalid replacement string %s", str); 127 return(-1); 128 } 129 130 /* 131 * allocate space for the node that handles this replacement pattern 132 * and split out the regular expression and try to compile it 133 */ 134 if ((rep = (REPLACE *)malloc(sizeof(REPLACE))) == NULL) { 135 paxwarn(1, "Unable to allocate memory for replacement string"); 136 return(-1); 137 } 138 139 *pt1 = '\0'; 140 # ifdef NET2_REGEX 141 if ((rep->rcmp = regcomp(str+1)) == NULL) { 142 # else 143 if ((res = regcomp(&(rep->rcmp), str+1, 0)) != 0) { 144 regerror(res, &(rep->rcmp), rebuf, sizeof(rebuf)); 145 paxwarn(1, "%s while compiling regular expression %s", rebuf, str); 146 # endif 147 (void)free((char *)rep); 148 return(-1); 149 } 150 151 /* 152 * put the delimiter back in case we need an error message and 153 * locate the delimiter at the end of the replacement string 154 * we then point the node at the new substitution string 155 */ 156 *pt1++ = *str; 157 if ((pt2 = strchr(pt1, *str)) == NULL) { 158 # ifdef NET2_REGEX 159 (void)free((char *)rep->rcmp); 160 # else 161 regfree(&(rep->rcmp)); 162 # endif 163 (void)free((char *)rep); 164 paxwarn(1, "Invalid replacement string %s", str); 165 return(-1); 166 } 167 168 *pt2 = '\0'; 169 rep->nstr = pt1; 170 pt1 = pt2++; 171 rep->flgs = 0; 172 173 /* 174 * set the options if any 175 */ 176 while (*pt2 != '\0') { 177 switch(*pt2) { 178 case 'g': 179 case 'G': 180 rep->flgs |= GLOB; 181 break; 182 case 'p': 183 case 'P': 184 rep->flgs |= PRNT; 185 break; 186 default: 187 # ifdef NET2_REGEX 188 (void)free((char *)rep->rcmp); 189 # else 190 regfree(&(rep->rcmp)); 191 # endif 192 (void)free((char *)rep); 193 *pt1 = *str; 194 paxwarn(1, "Invalid replacement string option %s", str); 195 return(-1); 196 } 197 ++pt2; 198 } 199 200 /* 201 * all done, link it in at the end 202 */ 203 rep->fow = NULL; 204 if (rephead == NULL) { 205 reptail = rephead = rep; 206 return(0); 207 } 208 reptail->fow = rep; 209 reptail = rep; 210 return(0); 211 } 212 213 /* 214 * pat_add() 215 * add a pattern match to the pattern match list. Pattern matches are used 216 * to select which archive members are extracted. (They appear as 217 * arguments to pax in the list and read modes). If no patterns are 218 * supplied to pax, all members in the archive will be selected (and the 219 * pattern match list is empty). 220 * Return: 221 * 0 if the pattern was added to the list, -1 otherwise 222 */ 223 224 int 225 pat_add(char *str, char *chdname) 226 { 227 PATTERN *pt; 228 229 /* 230 * throw out the junk 231 */ 232 if ((str == NULL) || (*str == '\0')) { 233 paxwarn(1, "Empty pattern string"); 234 return(-1); 235 } 236 237 /* 238 * allocate space for the pattern and store the pattern. the pattern is 239 * part of argv so do not bother to copy it, just point at it. Add the 240 * node to the end of the pattern list 241 */ 242 if ((pt = (PATTERN *)malloc(sizeof(PATTERN))) == NULL) { 243 paxwarn(1, "Unable to allocate memory for pattern string"); 244 return(-1); 245 } 246 247 pt->pstr = str; 248 pt->pend = NULL; 249 pt->plen = strlen(str); 250 pt->fow = NULL; 251 pt->flgs = 0; 252 pt->chdname = chdname; 253 254 if (pathead == NULL) { 255 pattail = pathead = pt; 256 return(0); 257 } 258 pattail->fow = pt; 259 pattail = pt; 260 return(0); 261 } 262 263 /* 264 * pat_chk() 265 * complain if any the user supplied pattern did not result in a match to 266 * a selected archive member. 267 */ 268 269 void 270 pat_chk(void) 271 { 272 PATTERN *pt; 273 int wban = 0; 274 275 /* 276 * walk down the list checking the flags to make sure MTCH was set, 277 * if not complain 278 */ 279 for (pt = pathead; pt != NULL; pt = pt->fow) { 280 if (pt->flgs & MTCH) 281 continue; 282 if (!wban) { 283 paxwarn(1, "WARNING! These patterns were not matched:"); 284 ++wban; 285 } 286 (void)fprintf(stderr, "%s\n", pt->pstr); 287 } 288 } 289 290 /* 291 * pat_sel() 292 * the archive member which matches a pattern was selected. Mark the 293 * pattern as having selected an archive member. arcn->pat points at the 294 * pattern that was matched. arcn->pat is set in pat_match() 295 * 296 * NOTE: When the -c option is used, we are called when there was no match 297 * by pat_match() (that means we did match before the inverted sense of 298 * the logic). Now this seems really strange at first, but with -c we 299 * need to keep track of those patterns that cause a archive member to NOT 300 * be selected (it found an archive member with a specified pattern) 301 * Return: 302 * 0 if the pattern pointed at by arcn->pat was tagged as creating a 303 * match, -1 otherwise. 304 */ 305 306 int 307 pat_sel(ARCHD *arcn) 308 { 309 PATTERN *pt; 310 PATTERN **ppt; 311 int len; 312 313 /* 314 * if no patterns just return 315 */ 316 if ((pathead == NULL) || ((pt = arcn->pat) == NULL)) 317 return(0); 318 319 /* 320 * when we are NOT limited to a single match per pattern mark the 321 * pattern and return 322 */ 323 if (!nflag) { 324 pt->flgs |= MTCH; 325 return(0); 326 } 327 328 /* 329 * we reach this point only when we allow a single selected match per 330 * pattern, if the pattern matches a directory and we do not have -d 331 * (dflag) we are done with this pattern. We may also be handed a file 332 * in the subtree of a directory. in that case when we are operating 333 * with -d, this pattern was already selected and we are done 334 */ 335 if (pt->flgs & DIR_MTCH) 336 return(0); 337 338 if (!dflag && ((pt->pend != NULL) || (arcn->type == PAX_DIR))) { 339 /* 340 * ok we matched a directory and we are allowing 341 * subtree matches but because of the -n only its children will 342 * match. This is tagged as a DIR_MTCH type. 343 * WATCH IT, the code assumes that pt->pend points 344 * into arcn->name and arcn->name has not been modified. 345 * If not we will have a big mess. Yup this is another kludge 346 */ 347 348 /* 349 * if this was a prefix match, remove trailing part of path 350 * so we can copy it. Future matches will be exact prefix match 351 */ 352 if (pt->pend != NULL) 353 *pt->pend = '\0'; 354 355 if ((pt->pstr = strdup(arcn->name)) == NULL) { 356 paxwarn(1, "Pattern select out of memory"); 357 if (pt->pend != NULL) 358 *pt->pend = '/'; 359 pt->pend = NULL; 360 return(-1); 361 } 362 363 /* 364 * put the trailing / back in the source string 365 */ 366 if (pt->pend != NULL) { 367 *pt->pend = '/'; 368 pt->pend = NULL; 369 } 370 pt->plen = strlen(pt->pstr); 371 372 /* 373 * strip off any trailing /, this should really never happen 374 */ 375 len = pt->plen - 1; 376 if (*(pt->pstr + len) == '/') { 377 *(pt->pstr + len) = '\0'; 378 pt->plen = len; 379 } 380 pt->flgs = DIR_MTCH | MTCH; 381 arcn->pat = pt; 382 return(0); 383 } 384 385 /* 386 * we are then done with this pattern, so we delete it from the list 387 * because it can never be used for another match. 388 * Seems kind of strange to do for a -c, but the pax spec is really 389 * vague on the interaction of -c -n and -d. We assume that when -c 390 * and the pattern rejects a member (i.e. it matched it) it is done. 391 * In effect we place the order of the flags as having -c last. 392 */ 393 pt = pathead; 394 ppt = &pathead; 395 while ((pt != NULL) && (pt != arcn->pat)) { 396 ppt = &(pt->fow); 397 pt = pt->fow; 398 } 399 400 if (pt == NULL) { 401 /* 402 * should never happen.... 403 */ 404 paxwarn(1, "Pattern list inconsistant"); 405 return(-1); 406 } 407 *ppt = pt->fow; 408 (void)free((char *)pt); 409 arcn->pat = NULL; 410 return(0); 411 } 412 413 /* 414 * pat_match() 415 * see if this archive member matches any supplied pattern, if a match 416 * is found, arcn->pat is set to point at the potential pattern. Later if 417 * this archive member is "selected" we process and mark the pattern as 418 * one which matched a selected archive member (see pat_sel()) 419 * Return: 420 * 0 if this archive member should be processed, 1 if it should be 421 * skipped and -1 if we are done with all patterns (and pax should quit 422 * looking for more members) 423 */ 424 425 int 426 pat_match(ARCHD *arcn) 427 { 428 PATTERN *pt; 429 430 arcn->pat = NULL; 431 432 /* 433 * if there are no more patterns and we have -n (and not -c) we are 434 * done. otherwise with no patterns to match, matches all 435 */ 436 if (pathead == NULL) { 437 if (nflag && !cflag) 438 return(-1); 439 return(0); 440 } 441 442 /* 443 * have to search down the list one at a time looking for a match. 444 */ 445 pt = pathead; 446 while (pt != NULL) { 447 /* 448 * check for a file name match unless we have DIR_MTCH set in 449 * this pattern then we want a prefix match 450 */ 451 if (pt->flgs & DIR_MTCH) { 452 /* 453 * this pattern was matched before to a directory 454 * as we must have -n set for this (but not -d). We can 455 * only match CHILDREN of that directory so we must use 456 * an exact prefix match (no wildcards). 457 */ 458 if ((arcn->name[pt->plen] == '/') && 459 (strncmp(pt->pstr, arcn->name, pt->plen) == 0)) 460 break; 461 } else if (fn_match(pt->pstr, arcn->name, &pt->pend) == 0) 462 break; 463 pt = pt->fow; 464 } 465 466 /* 467 * return the result, remember that cflag (-c) inverts the sense of a 468 * match 469 */ 470 if (pt == NULL) 471 return(cflag ? 0 : 1); 472 473 /* 474 * we had a match, now when we invert the sense (-c) we reject this 475 * member. However we have to tag the pattern a being successful, (in a 476 * match, not in selecting a archive member) so we call pat_sel() here. 477 */ 478 arcn->pat = pt; 479 if (!cflag) 480 return(0); 481 482 if (pat_sel(arcn) < 0) 483 return(-1); 484 arcn->pat = NULL; 485 return(1); 486 } 487 488 /* 489 * fn_match() 490 * Return: 491 * 0 if this archive member should be processed, 1 if it should be 492 * skipped and -1 if we are done with all patterns (and pax should quit 493 * looking for more members) 494 * Note: *pend may be changed to show where the prefix ends. 495 */ 496 497 static int 498 fn_match(char *pattern, char *string, char **pend) 499 { 500 char c; 501 char test; 502 503 *pend = NULL; 504 for (;;) { 505 switch (c = *pattern++) { 506 case '\0': 507 /* 508 * Ok we found an exact match 509 */ 510 if (*string == '\0') 511 return(0); 512 513 /* 514 * Check if it is a prefix match 515 */ 516 if ((dflag == 1) || (*string != '/')) 517 return(-1); 518 519 /* 520 * It is a prefix match, remember where the trailing 521 * / is located 522 */ 523 *pend = string; 524 return(0); 525 case '?': 526 if ((test = *string++) == '\0') 527 return (-1); 528 break; 529 case '*': 530 c = *pattern; 531 /* 532 * Collapse multiple *'s. 533 */ 534 while (c == '*') 535 c = *++pattern; 536 537 /* 538 * Optimized hack for pattern with a * at the end 539 */ 540 if (c == '\0') 541 return (0); 542 543 /* 544 * General case, use recursion. 545 */ 546 while ((test = *string) != '\0') { 547 if (!fn_match(pattern, string, pend)) 548 return (0); 549 ++string; 550 } 551 return (-1); 552 case '[': 553 /* 554 * range match 555 */ 556 if (((test = *string++) == '\0') || 557 ((pattern = range_match(pattern, test)) == NULL)) 558 return (-1); 559 break; 560 case '\\': 561 default: 562 if (c != *string++) 563 return (-1); 564 break; 565 } 566 } 567 /* NOTREACHED */ 568 } 569 570 static char * 571 range_match(char *pattern, int test) 572 { 573 char c; 574 char c2; 575 int negate; 576 int ok = 0; 577 578 if ((negate = (*pattern == '!')) != 0) 579 ++pattern; 580 581 while ((c = *pattern++) != ']') { 582 /* 583 * Illegal pattern 584 */ 585 if (c == '\0') 586 return (NULL); 587 588 if ((*pattern == '-') && ((c2 = pattern[1]) != '\0') && 589 (c2 != ']')) { 590 if ((c <= test) && (test <= c2)) 591 ok = 1; 592 pattern += 2; 593 } else if (c == test) 594 ok = 1; 595 } 596 return (ok == negate ? NULL : pattern); 597 } 598 599 /* 600 * mod_name() 601 * modify a selected file name. first attempt to apply replacement string 602 * expressions, then apply interactive file rename. We apply replacement 603 * string expressions to both filenames and file links (if we didn't the 604 * links would point to the wrong place, and we could never be able to 605 * move an archive that has a file link in it). When we rename files 606 * interactively, we store that mapping (old name to user input name) so 607 * if we spot any file links to the old file name in the future, we will 608 * know exactly how to fix the file link. 609 * Return: 610 * 0 continue to process file, 1 skip this file, -1 pax is finished 611 */ 612 613 int 614 mod_name(ARCHD *arcn) 615 { 616 int res = 0; 617 618 /* 619 * Strip off leading '/' if appropriate. 620 * Currently, this option is only set for the tar format. 621 */ 622 if (rmleadslash && arcn->name[0] == '/') { 623 if (arcn->name[1] == '\0') { 624 arcn->name[0] = '.'; 625 } else { 626 (void)memmove(arcn->name, &arcn->name[1], 627 strlen(arcn->name)); 628 arcn->nlen--; 629 } 630 if (rmleadslash < 2) { 631 rmleadslash = 2; 632 paxwarn(0, "Removing leading / from absolute path names in the archive"); 633 } 634 } 635 if (rmleadslash && arcn->ln_name[0] == '/' && 636 (arcn->type == PAX_HLK || arcn->type == PAX_HRG)) { 637 if (arcn->ln_name[1] == '\0') { 638 arcn->ln_name[0] = '.'; 639 } else { 640 (void)memmove(arcn->ln_name, &arcn->ln_name[1], 641 strlen(arcn->ln_name)); 642 arcn->ln_nlen--; 643 } 644 if (rmleadslash < 2) { 645 rmleadslash = 2; 646 paxwarn(0, "Removing leading / from absolute path names in the archive"); 647 } 648 } 649 650 /* 651 * IMPORTANT: We have a problem. what do we do with symlinks? 652 * Modifying a hard link name makes sense, as we know the file it 653 * points at should have been seen already in the archive (and if it 654 * wasn't seen because of a read error or a bad archive, we lose 655 * anyway). But there are no such requirements for symlinks. On one 656 * hand the symlink that refers to a file in the archive will have to 657 * be modified to so it will still work at its new location in the 658 * file system. On the other hand a symlink that points elsewhere (and 659 * should continue to do so) should not be modified. There is clearly 660 * no perfect solution here. So we handle them like hardlinks. Clearly 661 * a replacement made by the interactive rename mapping is very likely 662 * to be correct since it applies to a single file and is an exact 663 * match. The regular expression replacements are a little harder to 664 * justify though. We claim that the symlink name is only likely 665 * to be replaced when it points within the file tree being moved and 666 * in that case it should be modified. what we really need to do is to 667 * call an oracle here. :) 668 */ 669 if (rephead != NULL) { 670 /* 671 * we have replacement strings, modify the name and the link 672 * name if any. 673 */ 674 if ((res = rep_name(arcn->name, &(arcn->nlen), 1)) != 0) 675 return(res); 676 677 if (((arcn->type == PAX_SLK) || (arcn->type == PAX_HLK) || 678 (arcn->type == PAX_HRG)) && 679 ((res = rep_name(arcn->ln_name, &(arcn->ln_nlen), 0)) != 0)) 680 return(res); 681 } 682 683 if (iflag) { 684 /* 685 * perform interactive file rename, then map the link if any 686 */ 687 if ((res = tty_rename(arcn)) != 0) 688 return(res); 689 if ((arcn->type == PAX_SLK) || (arcn->type == PAX_HLK) || 690 (arcn->type == PAX_HRG)) 691 sub_name(arcn->ln_name, &(arcn->ln_nlen), sizeof(arcn->ln_name)); 692 } 693 return(res); 694 } 695 696 /* 697 * tty_rename() 698 * Prompt the user for a replacement file name. A "." keeps the old name, 699 * a empty line skips the file, and an EOF on reading the tty, will cause 700 * pax to stop processing and exit. Otherwise the file name input, replaces 701 * the old one. 702 * Return: 703 * 0 process this file, 1 skip this file, -1 we need to exit pax 704 */ 705 706 static int 707 tty_rename(ARCHD *arcn) 708 { 709 char tmpname[PAXPATHLEN+2]; 710 int res; 711 712 /* 713 * prompt user for the replacement name for a file, keep trying until 714 * we get some reasonable input. Archives may have more than one file 715 * on them with the same name (from updates etc). We print verbose info 716 * on the file so the user knows what is up. 717 */ 718 tty_prnt("\nATTENTION: %s interactive file rename operation.\n", argv0); 719 720 for (;;) { 721 ls_tty(arcn); 722 tty_prnt("Input new name, or a \".\" to keep the old name, "); 723 tty_prnt("or a \"return\" to skip this file.\n"); 724 tty_prnt("Input > "); 725 if (tty_read(tmpname, sizeof(tmpname)) < 0) 726 return(-1); 727 if (strcmp(tmpname, "..") == 0) { 728 tty_prnt("Try again, illegal file name: ..\n"); 729 continue; 730 } 731 if (strlen(tmpname) > PAXPATHLEN) { 732 tty_prnt("Try again, file name too long\n"); 733 continue; 734 } 735 break; 736 } 737 738 /* 739 * empty file name, skips this file. a "." leaves it alone 740 */ 741 if (tmpname[0] == '\0') { 742 tty_prnt("Skipping file.\n"); 743 return(1); 744 } 745 if ((tmpname[0] == '.') && (tmpname[1] == '\0')) { 746 tty_prnt("Processing continues, name unchanged.\n"); 747 return(0); 748 } 749 750 /* 751 * ok the name changed. We may run into links that point at this 752 * file later. we have to remember where the user sent the file 753 * in order to repair any links. 754 */ 755 tty_prnt("Processing continues, name changed to: %s\n", tmpname); 756 res = add_name(arcn->name, arcn->nlen, tmpname); 757 arcn->nlen = l_strncpy(arcn->name, tmpname, sizeof(arcn->name) - 1); 758 arcn->name[arcn->nlen] = '\0'; 759 if (res < 0) 760 return(-1); 761 return(0); 762 } 763 764 /* 765 * set_dest() 766 * fix up the file name and the link name (if any) so this file will land 767 * in the destination directory (used during copy() -rw). 768 * Return: 769 * 0 if ok, -1 if failure (name too long) 770 */ 771 772 int 773 set_dest(ARCHD *arcn, char *dest_dir, int dir_len) 774 { 775 if (fix_path(arcn->name, &(arcn->nlen), dest_dir, dir_len) < 0) 776 return(-1); 777 778 /* 779 * It is really hard to deal with symlinks here, we cannot be sure 780 * if the name they point was moved (or will be moved). It is best to 781 * leave them alone. 782 */ 783 if ((arcn->type != PAX_HLK) && (arcn->type != PAX_HRG)) 784 return(0); 785 786 if (fix_path(arcn->ln_name, &(arcn->ln_nlen), dest_dir, dir_len) < 0) 787 return(-1); 788 return(0); 789 } 790 791 /* 792 * fix_path 793 * concatenate dir_name and or_name and store the result in or_name (if 794 * it fits). This is one ugly function. 795 * Return: 796 * 0 if ok, -1 if the final name is too long 797 */ 798 799 static int 800 fix_path( char *or_name, int *or_len, char *dir_name, int dir_len) 801 { 802 char *src; 803 char *dest; 804 char *start; 805 int len; 806 807 /* 808 * we shift the or_name to the right enough to tack in the dir_name 809 * at the front. We make sure we have enough space for it all before 810 * we start. since dest always ends in a slash, we skip of or_name 811 * if it also starts with one. 812 */ 813 start = or_name; 814 src = start + *or_len; 815 dest = src + dir_len; 816 if (*start == '/') { 817 ++start; 818 --dest; 819 } 820 if ((len = dest - or_name) > PAXPATHLEN) { 821 paxwarn(1, "File name %s/%s, too long", dir_name, start); 822 return(-1); 823 } 824 *or_len = len; 825 826 /* 827 * enough space, shift 828 */ 829 while (src >= start) 830 *dest-- = *src--; 831 src = dir_name + dir_len - 1; 832 833 /* 834 * splice in the destination directory name 835 */ 836 while (src >= dir_name) 837 *dest-- = *src--; 838 839 *(or_name + len) = '\0'; 840 return(0); 841 } 842 843 /* 844 * rep_name() 845 * walk down the list of replacement strings applying each one in order. 846 * when we find one with a successful substitution, we modify the name 847 * as specified. if required, we print the results. if the resulting name 848 * is empty, we will skip this archive member. We use the regexp(3) 849 * routines (regexp() ought to win a prize as having the most cryptic 850 * library function manual page). 851 * --Parameters-- 852 * name is the file name we are going to apply the regular expressions to 853 * (and may be modified) 854 * nlen is the length of this name (and is modified to hold the length of 855 * the final string). 856 * prnt is a flag that says whether to print the final result. 857 * Return: 858 * 0 if substitution was successful, 1 if we are to skip the file (the name 859 * ended up empty) 860 */ 861 862 static int 863 rep_name(char *name, int *nlen, int prnt) 864 { 865 REPLACE *pt; 866 char *inpt; 867 char *outpt; 868 char *endpt; 869 char *rpt; 870 int found = 0; 871 int res; 872 # ifndef NET2_REGEX 873 regmatch_t pm[MAXSUBEXP]; 874 # endif 875 char nname[PAXPATHLEN+1]; /* final result of all replacements */ 876 char buf1[PAXPATHLEN+1]; /* where we work on the name */ 877 878 /* 879 * copy the name into buf1, where we will work on it. We need to keep 880 * the orig string around so we can print out the result of the final 881 * replacement. We build up the final result in nname. inpt points at 882 * the string we apply the regular expression to. prnt is used to 883 * suppress printing when we handle replacements on the link field 884 * (the user already saw that substitution go by) 885 */ 886 pt = rephead; 887 (void)strcpy(buf1, name); 888 inpt = buf1; 889 outpt = nname; 890 endpt = outpt + PAXPATHLEN; 891 892 /* 893 * try each replacement string in order 894 */ 895 while (pt != NULL) { 896 do { 897 /* 898 * check for a successful substitution, if not go to 899 * the next pattern, or cleanup if we were global 900 */ 901 # ifdef NET2_REGEX 902 if (regexec(pt->rcmp, inpt) == 0) 903 # else 904 if (regexec(&(pt->rcmp), inpt, MAXSUBEXP, pm, 0) != 0) 905 # endif 906 break; 907 908 /* 909 * ok we found one. We have three parts, the prefix 910 * which did not match, the section that did and the 911 * tail (that also did not match). Copy the prefix to 912 * the final output buffer (watching to make sure we 913 * do not create a string too long). 914 */ 915 found = 1; 916 # ifdef NET2_REGEX 917 rpt = pt->rcmp->startp[0]; 918 # else 919 rpt = inpt + pm[0].rm_so; 920 # endif 921 922 while ((inpt < rpt) && (outpt < endpt)) 923 *outpt++ = *inpt++; 924 if (outpt == endpt) 925 break; 926 927 /* 928 * for the second part (which matched the regular 929 * expression) apply the substitution using the 930 * replacement string and place it the prefix in the 931 * final output. If we have problems, skip it. 932 */ 933 # ifdef NET2_REGEX 934 if ((res = resub(pt->rcmp,pt->nstr,outpt,endpt)) < 0) { 935 # else 936 if ((res = resub(&(pt->rcmp),pm,pt->nstr,outpt,endpt)) 937 < 0) { 938 # endif 939 if (prnt) 940 paxwarn(1, "Replacement name error %s", 941 name); 942 return(1); 943 } 944 outpt += res; 945 946 /* 947 * we set up to look again starting at the first 948 * character in the tail (of the input string right 949 * after the last character matched by the regular 950 * expression (inpt always points at the first char in 951 * the string to process). If we are not doing a global 952 * substitution, we will use inpt to copy the tail to 953 * the final result. Make sure we do not overrun the 954 * output buffer 955 */ 956 # ifdef NET2_REGEX 957 inpt = pt->rcmp->endp[0]; 958 # else 959 inpt += pm[0].rm_eo - pm[0].rm_so; 960 # endif 961 962 if ((outpt == endpt) || (*inpt == '\0')) 963 break; 964 965 /* 966 * if the user wants global we keep trying to 967 * substitute until it fails, then we are done. 968 */ 969 } while (pt->flgs & GLOB); 970 971 if (found) 972 break; 973 974 /* 975 * a successful substitution did NOT occur, try the next one 976 */ 977 pt = pt->fow; 978 } 979 980 if (found) { 981 /* 982 * we had a substitution, copy the last tail piece (if there is 983 * room) to the final result 984 */ 985 while ((outpt < endpt) && (*inpt != '\0')) 986 *outpt++ = *inpt++; 987 988 *outpt = '\0'; 989 if ((outpt == endpt) && (*inpt != '\0')) { 990 if (prnt) 991 paxwarn(1,"Replacement name too long %s >> %s", 992 name, nname); 993 return(1); 994 } 995 996 /* 997 * inform the user of the result if wanted 998 */ 999 if (prnt && (pt->flgs & PRNT)) { 1000 if (*nname == '\0') 1001 (void)fprintf(stderr,"%s >> <empty string>\n", 1002 name); 1003 else 1004 (void)fprintf(stderr,"%s >> %s\n", name, nname); 1005 } 1006 1007 /* 1008 * if empty inform the caller this file is to be skipped 1009 * otherwise copy the new name over the orig name and return 1010 */ 1011 if (*nname == '\0') 1012 return(1); 1013 *nlen = l_strncpy(name, nname, PAXPATHLEN + 1); 1014 name[PAXPATHLEN] = '\0'; 1015 } 1016 return(0); 1017 } 1018 1019 #ifdef NET2_REGEX 1020 /* 1021 * resub() 1022 * apply the replacement to the matched expression. expand out the old 1023 * style ed(1) subexpression expansion. 1024 * Return: 1025 * -1 if error, or the number of characters added to the destination. 1026 */ 1027 1028 static int 1029 resub(regexp *prog, char *src, char *dest, char *destend) 1030 { 1031 char *spt; 1032 char *dpt; 1033 char c; 1034 int no; 1035 int len; 1036 1037 spt = src; 1038 dpt = dest; 1039 while ((dpt < destend) && ((c = *spt++) != '\0')) { 1040 if (c == '&') 1041 no = 0; 1042 else if ((c == '\\') && (*spt >= '0') && (*spt <= '9')) 1043 no = *spt++ - '0'; 1044 else { 1045 if ((c == '\\') && ((*spt == '\\') || (*spt == '&'))) 1046 c = *spt++; 1047 *dpt++ = c; 1048 continue; 1049 } 1050 if ((prog->startp[no] == NULL) || (prog->endp[no] == NULL) || 1051 ((len = prog->endp[no] - prog->startp[no]) <= 0)) 1052 continue; 1053 1054 /* 1055 * copy the subexpression to the destination. 1056 * fail if we run out of space or the match string is damaged 1057 */ 1058 if (len > (destend - dpt)) 1059 len = destend - dpt; 1060 if (l_strncpy(dpt, prog->startp[no], len) != len) 1061 return(-1); 1062 dpt += len; 1063 } 1064 return(dpt - dest); 1065 } 1066 1067 #else 1068 1069 /* 1070 * resub() 1071 * apply the replacement to the matched expression. expand out the old 1072 * style ed(1) subexpression expansion. 1073 * Return: 1074 * -1 if error, or the number of characters added to the destination. 1075 */ 1076 1077 static int 1078 resub(regex_t *rp, regmatch_t *pm, char *src, char *dest, 1079 char *destend) 1080 { 1081 char *spt; 1082 char *dpt; 1083 char c; 1084 regmatch_t *pmpt; 1085 int len; 1086 int subexcnt; 1087 1088 spt = src; 1089 dpt = dest; 1090 subexcnt = rp->re_nsub; 1091 while ((dpt < destend) && ((c = *spt++) != '\0')) { 1092 /* 1093 * see if we just have an ordinary replacement character 1094 * or we refer to a subexpression. 1095 */ 1096 if (c == '&') { 1097 pmpt = pm; 1098 } else if ((c == '\\') && (*spt >= '0') && (*spt <= '9')) { 1099 /* 1100 * make sure there is a subexpression as specified 1101 */ 1102 if ((len = *spt++ - '0') > subexcnt) 1103 return(-1); 1104 pmpt = pm + len; 1105 } else { 1106 /* 1107 * Ordinary character, just copy it 1108 */ 1109 if ((c == '\\') && ((*spt == '\\') || (*spt == '&'))) 1110 c = *spt++; 1111 *dpt++ = c; 1112 continue; 1113 } 1114 1115 /* 1116 * continue if the subexpression is bogus 1117 */ 1118 if ((pmpt->rm_so < 0) || (pmpt->rm_eo < 0) || 1119 ((len = pmpt->rm_eo - pmpt->rm_so) <= 0)) 1120 continue; 1121 1122 /* 1123 * copy the subexpression to the destination. 1124 * fail if we run out of space or the match string is damaged 1125 */ 1126 if (len > (destend - dpt)) 1127 len = destend - dpt; 1128 if (l_strncpy(dpt, src + pmpt->rm_so, len) != len) 1129 return(-1); 1130 dpt += len; 1131 } 1132 return(dpt - dest); 1133 } 1134 #endif 1135