1 /*- 2 * Copyright (c) 1992 Keith Muller. 3 * Copyright (c) 1992, 1993 4 * The Regents of the University of California. All rights reserved. 5 * 6 * This code is derived from software contributed to Berkeley by 7 * Keith Muller of the University of California, San Diego. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 3. All advertising materials mentioning features or use of this software 18 * must display the following acknowledgement: 19 * This product includes software developed by the University of 20 * California, Berkeley and its contributors. 21 * 4. Neither the name of the University nor the names of its contributors 22 * may be used to endorse or promote products derived from this software 23 * without specific prior written permission. 24 * 25 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 26 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 27 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 28 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 29 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 30 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 31 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 32 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 33 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 34 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 35 * SUCH DAMAGE. 36 */ 37 38 #ifndef lint 39 #if 0 40 static char sccsid[] = "@(#)pat_rep.c 8.2 (Berkeley) 4/18/94"; 41 #endif 42 static const char rcsid[] = 43 "$FreeBSD$"; 44 #endif /* not lint */ 45 46 #include <sys/types.h> 47 #include <sys/stat.h> 48 #include <stdio.h> 49 #include <string.h> 50 #include <unistd.h> 51 #include <stdlib.h> 52 #include <errno.h> 53 #ifdef NET2_REGEX 54 #include <regexp.h> 55 #else 56 #include <regex.h> 57 #endif 58 #include "pax.h" 59 #include "pat_rep.h" 60 #include "extern.h" 61 62 /* 63 * routines to handle pattern matching, name modification (regular expression 64 * substitution and interactive renames), and destination name modification for 65 * copy (-rw). Both file name and link names are adjusted as required in these 66 * routines. 67 */ 68 69 #define MAXSUBEXP 10 /* max subexpressions, DO NOT CHANGE */ 70 static PATTERN *pathead = NULL; /* file pattern match list head */ 71 static PATTERN *pattail = NULL; /* file pattern match list tail */ 72 static REPLACE *rephead = NULL; /* replacement string list head */ 73 static REPLACE *reptail = NULL; /* replacement string list tail */ 74 75 static int rep_name __P((char *, int *, int)); 76 static int tty_rename __P((register ARCHD *)); 77 static int fix_path __P((char *, int *, char *, int)); 78 static int fn_match __P((register char *, register char *, char **)); 79 static char * range_match __P((register char *, register int)); 80 #ifdef NET2_REGEX 81 static int resub __P((regexp *, char *, char *, register char *)); 82 #else 83 static int resub __P((regex_t *, regmatch_t *, char *, char *, char *)); 84 #endif 85 86 /* 87 * rep_add() 88 * parses the -s replacement string; compiles the regular expression 89 * and stores the compiled value and it's replacement string together in 90 * replacement string list. Input to this function is of the form: 91 * /old/new/pg 92 * The first char in the string specifies the delimiter used by this 93 * replacement string. "Old" is a regular expression in "ed" format which 94 * is compiled by regcomp() and is applied to filenames. "new" is the 95 * substitution string; p and g are options flags for printing and global 96 * replacement (over the single filename) 97 * Return: 98 * 0 if a proper replacement string and regular expression was added to 99 * the list of replacement patterns; -1 otherwise. 100 */ 101 102 #ifdef __STDC__ 103 int 104 rep_add(register char *str) 105 #else 106 int 107 rep_add(str) 108 register char *str; 109 #endif 110 { 111 register char *pt1; 112 register char *pt2; 113 register REPLACE *rep; 114 # ifndef NET2_REGEX 115 register int res; 116 char rebuf[BUFSIZ]; 117 # endif 118 119 /* 120 * throw out the bad parameters 121 */ 122 if ((str == NULL) || (*str == '\0')) { 123 paxwarn(1, "Empty replacement string"); 124 return(-1); 125 } 126 127 /* 128 * first character in the string specifies what the delimiter is for 129 * this expression 130 */ 131 if ((pt1 = strchr(str+1, *str)) == NULL) { 132 paxwarn(1, "Invalid replacement string %s", str); 133 return(-1); 134 } 135 136 /* 137 * allocate space for the node that handles this replacement pattern 138 * and split out the regular expression and try to compile it 139 */ 140 if ((rep = (REPLACE *)malloc(sizeof(REPLACE))) == NULL) { 141 paxwarn(1, "Unable to allocate memory for replacement string"); 142 return(-1); 143 } 144 145 *pt1 = '\0'; 146 # ifdef NET2_REGEX 147 if ((rep->rcmp = regcomp(str+1)) == NULL) { 148 # else 149 if ((res = regcomp(&(rep->rcmp), str+1, 0)) != 0) { 150 regerror(res, &(rep->rcmp), rebuf, sizeof(rebuf)); 151 paxwarn(1, "%s while compiling regular expression %s", rebuf, str); 152 # endif 153 (void)free((char *)rep); 154 return(-1); 155 } 156 157 /* 158 * put the delimiter back in case we need an error message and 159 * locate the delimiter at the end of the replacement string 160 * we then point the node at the new substitution string 161 */ 162 *pt1++ = *str; 163 if ((pt2 = strchr(pt1, *str)) == NULL) { 164 # ifdef NET2_REGEX 165 (void)free((char *)rep->rcmp); 166 # else 167 regfree(&(rep->rcmp)); 168 # endif 169 (void)free((char *)rep); 170 paxwarn(1, "Invalid replacement string %s", str); 171 return(-1); 172 } 173 174 *pt2 = '\0'; 175 rep->nstr = pt1; 176 pt1 = pt2++; 177 rep->flgs = 0; 178 179 /* 180 * set the options if any 181 */ 182 while (*pt2 != '\0') { 183 switch(*pt2) { 184 case 'g': 185 case 'G': 186 rep->flgs |= GLOB; 187 break; 188 case 'p': 189 case 'P': 190 rep->flgs |= PRNT; 191 break; 192 default: 193 # ifdef NET2_REGEX 194 (void)free((char *)rep->rcmp); 195 # else 196 regfree(&(rep->rcmp)); 197 # endif 198 (void)free((char *)rep); 199 *pt1 = *str; 200 paxwarn(1, "Invalid replacement string option %s", str); 201 return(-1); 202 } 203 ++pt2; 204 } 205 206 /* 207 * all done, link it in at the end 208 */ 209 rep->fow = NULL; 210 if (rephead == NULL) { 211 reptail = rephead = rep; 212 return(0); 213 } 214 reptail->fow = rep; 215 reptail = rep; 216 return(0); 217 } 218 219 /* 220 * pat_add() 221 * add a pattern match to the pattern match list. Pattern matches are used 222 * to select which archive members are extracted. (They appear as 223 * arguments to pax in the list and read modes). If no patterns are 224 * supplied to pax, all members in the archive will be selected (and the 225 * pattern match list is empty). 226 * Return: 227 * 0 if the pattern was added to the list, -1 otherwise 228 */ 229 230 #ifdef __STDC__ 231 int 232 pat_add(char *str, char *chdname) 233 #else 234 int 235 pat_add(str, chdname) 236 char *str; 237 char *chdname; 238 #endif 239 { 240 register PATTERN *pt; 241 242 /* 243 * throw out the junk 244 */ 245 if ((str == NULL) || (*str == '\0')) { 246 paxwarn(1, "Empty pattern string"); 247 return(-1); 248 } 249 250 /* 251 * allocate space for the pattern and store the pattern. the pattern is 252 * part of argv so do not bother to copy it, just point at it. Add the 253 * node to the end of the pattern list 254 */ 255 if ((pt = (PATTERN *)malloc(sizeof(PATTERN))) == NULL) { 256 paxwarn(1, "Unable to allocate memory for pattern string"); 257 return(-1); 258 } 259 260 pt->pstr = str; 261 pt->pend = NULL; 262 pt->plen = strlen(str); 263 pt->fow = NULL; 264 pt->flgs = 0; 265 pt->chdname = chdname; 266 267 if (pathead == NULL) { 268 pattail = pathead = pt; 269 return(0); 270 } 271 pattail->fow = pt; 272 pattail = pt; 273 return(0); 274 } 275 276 /* 277 * pat_chk() 278 * complain if any the user supplied pattern did not result in a match to 279 * a selected archive member. 280 */ 281 282 #ifdef __STDC__ 283 void 284 pat_chk(void) 285 #else 286 void 287 pat_chk() 288 #endif 289 { 290 register PATTERN *pt; 291 register int wban = 0; 292 293 /* 294 * walk down the list checking the flags to make sure MTCH was set, 295 * if not complain 296 */ 297 for (pt = pathead; pt != NULL; pt = pt->fow) { 298 if (pt->flgs & MTCH) 299 continue; 300 if (!wban) { 301 paxwarn(1, "WARNING! These patterns were not matched:"); 302 ++wban; 303 } 304 (void)fprintf(stderr, "%s\n", pt->pstr); 305 } 306 } 307 308 /* 309 * pat_sel() 310 * the archive member which matches a pattern was selected. Mark the 311 * pattern as having selected an archive member. arcn->pat points at the 312 * pattern that was matched. arcn->pat is set in pat_match() 313 * 314 * NOTE: When the -c option is used, we are called when there was no match 315 * by pat_match() (that means we did match before the inverted sense of 316 * the logic). Now this seems really strange at first, but with -c we 317 * need to keep track of those patterns that cause a archive member to NOT 318 * be selected (it found an archive member with a specified pattern) 319 * Return: 320 * 0 if the pattern pointed at by arcn->pat was tagged as creating a 321 * match, -1 otherwise. 322 */ 323 324 #ifdef __STDC__ 325 int 326 pat_sel(register ARCHD *arcn) 327 #else 328 int 329 pat_sel(arcn) 330 register ARCHD *arcn; 331 #endif 332 { 333 register PATTERN *pt; 334 register PATTERN **ppt; 335 register int len; 336 337 /* 338 * if no patterns just return 339 */ 340 if ((pathead == NULL) || ((pt = arcn->pat) == NULL)) 341 return(0); 342 343 /* 344 * when we are NOT limited to a single match per pattern mark the 345 * pattern and return 346 */ 347 if (!nflag) { 348 pt->flgs |= MTCH; 349 return(0); 350 } 351 352 /* 353 * we reach this point only when we allow a single selected match per 354 * pattern, if the pattern matches a directory and we do not have -d 355 * (dflag) we are done with this pattern. We may also be handed a file 356 * in the subtree of a directory. in that case when we are operating 357 * with -d, this pattern was already selected and we are done 358 */ 359 if (pt->flgs & DIR_MTCH) 360 return(0); 361 362 if (!dflag && ((pt->pend != NULL) || (arcn->type == PAX_DIR))) { 363 /* 364 * ok we matched a directory and we are allowing 365 * subtree matches but because of the -n only its children will 366 * match. This is tagged as a DIR_MTCH type. 367 * WATCH IT, the code assumes that pt->pend points 368 * into arcn->name and arcn->name has not been modified. 369 * If not we will have a big mess. Yup this is another kludge 370 */ 371 372 /* 373 * if this was a prefix match, remove trailing part of path 374 * so we can copy it. Future matches will be exact prefix match 375 */ 376 if (pt->pend != NULL) 377 *pt->pend = '\0'; 378 379 if ((pt->pstr = strdup(arcn->name)) == NULL) { 380 paxwarn(1, "Pattern select out of memory"); 381 if (pt->pend != NULL) 382 *pt->pend = '/'; 383 pt->pend = NULL; 384 return(-1); 385 } 386 387 /* 388 * put the trailing / back in the source string 389 */ 390 if (pt->pend != NULL) { 391 *pt->pend = '/'; 392 pt->pend = NULL; 393 } 394 pt->plen = strlen(pt->pstr); 395 396 /* 397 * strip off any trailing /, this should really never happen 398 */ 399 len = pt->plen - 1; 400 if (*(pt->pstr + len) == '/') { 401 *(pt->pstr + len) = '\0'; 402 pt->plen = len; 403 } 404 pt->flgs = DIR_MTCH | MTCH; 405 arcn->pat = pt; 406 return(0); 407 } 408 409 /* 410 * we are then done with this pattern, so we delete it from the list 411 * because it can never be used for another match. 412 * Seems kind of strange to do for a -c, but the pax spec is really 413 * vague on the interaction of -c -n and -d. We assume that when -c 414 * and the pattern rejects a member (i.e. it matched it) it is done. 415 * In effect we place the order of the flags as having -c last. 416 */ 417 pt = pathead; 418 ppt = &pathead; 419 while ((pt != NULL) && (pt != arcn->pat)) { 420 ppt = &(pt->fow); 421 pt = pt->fow; 422 } 423 424 if (pt == NULL) { 425 /* 426 * should never happen.... 427 */ 428 paxwarn(1, "Pattern list inconsistant"); 429 return(-1); 430 } 431 *ppt = pt->fow; 432 (void)free((char *)pt); 433 arcn->pat = NULL; 434 return(0); 435 } 436 437 /* 438 * pat_match() 439 * see if this archive member matches any supplied pattern, if a match 440 * is found, arcn->pat is set to point at the potential pattern. Later if 441 * this archive member is "selected" we process and mark the pattern as 442 * one which matched a selected archive member (see pat_sel()) 443 * Return: 444 * 0 if this archive member should be processed, 1 if it should be 445 * skipped and -1 if we are done with all patterns (and pax should quit 446 * looking for more members) 447 */ 448 449 #ifdef __STDC__ 450 int 451 pat_match(register ARCHD *arcn) 452 #else 453 int 454 pat_match(arcn) 455 register ARCHD *arcn; 456 #endif 457 { 458 register PATTERN *pt; 459 460 arcn->pat = NULL; 461 462 /* 463 * if there are no more patterns and we have -n (and not -c) we are 464 * done. otherwise with no patterns to match, matches all 465 */ 466 if (pathead == NULL) { 467 if (nflag && !cflag) 468 return(-1); 469 return(0); 470 } 471 472 /* 473 * have to search down the list one at a time looking for a match. 474 */ 475 pt = pathead; 476 while (pt != NULL) { 477 /* 478 * check for a file name match unless we have DIR_MTCH set in 479 * this pattern then we want a prefix match 480 */ 481 if (pt->flgs & DIR_MTCH) { 482 /* 483 * this pattern was matched before to a directory 484 * as we must have -n set for this (but not -d). We can 485 * only match CHILDREN of that directory so we must use 486 * an exact prefix match (no wildcards). 487 */ 488 if ((arcn->name[pt->plen] == '/') && 489 (strncmp(pt->pstr, arcn->name, pt->plen) == 0)) 490 break; 491 } else if (fn_match(pt->pstr, arcn->name, &pt->pend) == 0) 492 break; 493 pt = pt->fow; 494 } 495 496 /* 497 * return the result, remember that cflag (-c) inverts the sense of a 498 * match 499 */ 500 if (pt == NULL) 501 return(cflag ? 0 : 1); 502 503 /* 504 * we had a match, now when we invert the sense (-c) we reject this 505 * member. However we have to tag the pattern a being successful, (in a 506 * match, not in selecting a archive member) so we call pat_sel() here. 507 */ 508 arcn->pat = pt; 509 if (!cflag) 510 return(0); 511 512 if (pat_sel(arcn) < 0) 513 return(-1); 514 arcn->pat = NULL; 515 return(1); 516 } 517 518 /* 519 * fn_match() 520 * Return: 521 * 0 if this archive member should be processed, 1 if it should be 522 * skipped and -1 if we are done with all patterns (and pax should quit 523 * looking for more members) 524 * Note: *pend may be changed to show where the prefix ends. 525 */ 526 527 #ifdef __STDC__ 528 static int 529 fn_match(register char *pattern, register char *string, char **pend) 530 #else 531 static int 532 fn_match(pattern, string, pend) 533 register char *pattern; 534 register char *string; 535 char **pend; 536 #endif 537 { 538 register char c; 539 char test; 540 541 *pend = NULL; 542 for (;;) { 543 switch (c = *pattern++) { 544 case '\0': 545 /* 546 * Ok we found an exact match 547 */ 548 if (*string == '\0') 549 return(0); 550 551 /* 552 * Check if it is a prefix match 553 */ 554 if ((dflag == 1) || (*string != '/')) 555 return(-1); 556 557 /* 558 * It is a prefix match, remember where the trailing 559 * / is located 560 */ 561 *pend = string; 562 return(0); 563 case '?': 564 if ((test = *string++) == '\0') 565 return (-1); 566 break; 567 case '*': 568 c = *pattern; 569 /* 570 * Collapse multiple *'s. 571 */ 572 while (c == '*') 573 c = *++pattern; 574 575 /* 576 * Optimized hack for pattern with a * at the end 577 */ 578 if (c == '\0') 579 return (0); 580 581 /* 582 * General case, use recursion. 583 */ 584 while ((test = *string) != '\0') { 585 if (!fn_match(pattern, string, pend)) 586 return (0); 587 ++string; 588 } 589 return (-1); 590 case '[': 591 /* 592 * range match 593 */ 594 if (((test = *string++) == '\0') || 595 ((pattern = range_match(pattern, test)) == NULL)) 596 return (-1); 597 break; 598 case '\\': 599 default: 600 if (c != *string++) 601 return (-1); 602 break; 603 } 604 } 605 /* NOTREACHED */ 606 } 607 608 #ifdef __STDC__ 609 static char * 610 range_match(register char *pattern, register int test) 611 #else 612 static char * 613 range_match(pattern, test) 614 register char *pattern; 615 register int test; 616 #endif 617 { 618 register char c; 619 register char c2; 620 int negate; 621 int ok = 0; 622 623 if ((negate = (*pattern == '!')) != 0) 624 ++pattern; 625 626 while ((c = *pattern++) != ']') { 627 /* 628 * Illegal pattern 629 */ 630 if (c == '\0') 631 return (NULL); 632 633 if ((*pattern == '-') && ((c2 = pattern[1]) != '\0') && 634 (c2 != ']')) { 635 if ((c <= test) && (test <= c2)) 636 ok = 1; 637 pattern += 2; 638 } else if (c == test) 639 ok = 1; 640 } 641 return (ok == negate ? NULL : pattern); 642 } 643 644 /* 645 * mod_name() 646 * modify a selected file name. first attempt to apply replacement string 647 * expressions, then apply interactive file rename. We apply replacement 648 * string expressions to both filenames and file links (if we didn't the 649 * links would point to the wrong place, and we could never be able to 650 * move an archive that has a file link in it). When we rename files 651 * interactively, we store that mapping (old name to user input name) so 652 * if we spot any file links to the old file name in the future, we will 653 * know exactly how to fix the file link. 654 * Return: 655 * 0 continue to process file, 1 skip this file, -1 pax is finished 656 */ 657 658 #ifdef __STDC__ 659 int 660 mod_name(register ARCHD *arcn) 661 #else 662 int 663 mod_name(arcn) 664 register ARCHD *arcn; 665 #endif 666 { 667 register int res = 0; 668 669 /* 670 * Strip off leading '/' if appropriate. 671 * Currently, this option is only set for the tar format. 672 */ 673 if (rmleadslash && arcn->name[0] == '/') { 674 if (arcn->name[1] == '\0') { 675 arcn->name[0] = '.'; 676 } else { 677 (void)memmove(arcn->name, &arcn->name[1], 678 strlen(arcn->name)); 679 arcn->nlen--; 680 } 681 if (rmleadslash < 2) { 682 rmleadslash = 2; 683 paxwarn(0, "Removing leading / from absolute path names in the archive"); 684 } 685 } 686 if (rmleadslash && arcn->ln_name[0] == '/' && 687 (arcn->type == PAX_HLK || arcn->type == PAX_HRG)) { 688 if (arcn->ln_name[1] == '\0') { 689 arcn->ln_name[0] = '.'; 690 } else { 691 (void)memmove(arcn->ln_name, &arcn->ln_name[1], 692 strlen(arcn->ln_name)); 693 arcn->ln_nlen--; 694 } 695 if (rmleadslash < 2) { 696 rmleadslash = 2; 697 paxwarn(0, "Removing leading / from absolute path names in the archive"); 698 } 699 } 700 701 /* 702 * IMPORTANT: We have a problem. what do we do with symlinks? 703 * Modifying a hard link name makes sense, as we know the file it 704 * points at should have been seen already in the archive (and if it 705 * wasn't seen because of a read error or a bad archive, we lose 706 * anyway). But there are no such requirements for symlinks. On one 707 * hand the symlink that refers to a file in the archive will have to 708 * be modified to so it will still work at its new location in the 709 * file system. On the other hand a symlink that points elsewhere (and 710 * should continue to do so) should not be modified. There is clearly 711 * no perfect solution here. So we handle them like hardlinks. Clearly 712 * a replacement made by the interactive rename mapping is very likely 713 * to be correct since it applies to a single file and is an exact 714 * match. The regular expression replacements are a little harder to 715 * justify though. We claim that the symlink name is only likely 716 * to be replaced when it points within the file tree being moved and 717 * in that case it should be modified. what we really need to do is to 718 * call an oracle here. :) 719 */ 720 if (rephead != NULL) { 721 /* 722 * we have replacement strings, modify the name and the link 723 * name if any. 724 */ 725 if ((res = rep_name(arcn->name, &(arcn->nlen), 1)) != 0) 726 return(res); 727 728 if (((arcn->type == PAX_SLK) || (arcn->type == PAX_HLK) || 729 (arcn->type == PAX_HRG)) && 730 ((res = rep_name(arcn->ln_name, &(arcn->ln_nlen), 0)) != 0)) 731 return(res); 732 } 733 734 if (iflag) { 735 /* 736 * perform interactive file rename, then map the link if any 737 */ 738 if ((res = tty_rename(arcn)) != 0) 739 return(res); 740 if ((arcn->type == PAX_SLK) || (arcn->type == PAX_HLK) || 741 (arcn->type == PAX_HRG)) 742 sub_name(arcn->ln_name, &(arcn->ln_nlen), sizeof(arcn->ln_name)); 743 } 744 return(res); 745 } 746 747 /* 748 * tty_rename() 749 * Prompt the user for a replacement file name. A "." keeps the old name, 750 * a empty line skips the file, and an EOF on reading the tty, will cause 751 * pax to stop processing and exit. Otherwise the file name input, replaces 752 * the old one. 753 * Return: 754 * 0 process this file, 1 skip this file, -1 we need to exit pax 755 */ 756 757 #ifdef __STDC__ 758 static int 759 tty_rename(register ARCHD *arcn) 760 #else 761 static int 762 tty_rename(arcn) 763 register ARCHD *arcn; 764 #endif 765 { 766 char tmpname[PAXPATHLEN+2]; 767 int res; 768 769 /* 770 * prompt user for the replacement name for a file, keep trying until 771 * we get some reasonable input. Archives may have more than one file 772 * on them with the same name (from updates etc). We print verbose info 773 * on the file so the user knows what is up. 774 */ 775 tty_prnt("\nATTENTION: %s interactive file rename operation.\n", argv0); 776 777 for (;;) { 778 ls_tty(arcn); 779 tty_prnt("Input new name, or a \".\" to keep the old name, "); 780 tty_prnt("or a \"return\" to skip this file.\n"); 781 tty_prnt("Input > "); 782 if (tty_read(tmpname, sizeof(tmpname)) < 0) 783 return(-1); 784 if (strcmp(tmpname, "..") == 0) { 785 tty_prnt("Try again, illegal file name: ..\n"); 786 continue; 787 } 788 if (strlen(tmpname) > PAXPATHLEN) { 789 tty_prnt("Try again, file name too long\n"); 790 continue; 791 } 792 break; 793 } 794 795 /* 796 * empty file name, skips this file. a "." leaves it alone 797 */ 798 if (tmpname[0] == '\0') { 799 tty_prnt("Skipping file.\n"); 800 return(1); 801 } 802 if ((tmpname[0] == '.') && (tmpname[1] == '\0')) { 803 tty_prnt("Processing continues, name unchanged.\n"); 804 return(0); 805 } 806 807 /* 808 * ok the name changed. We may run into links that point at this 809 * file later. we have to remember where the user sent the file 810 * in order to repair any links. 811 */ 812 tty_prnt("Processing continues, name changed to: %s\n", tmpname); 813 res = add_name(arcn->name, arcn->nlen, tmpname); 814 arcn->nlen = l_strncpy(arcn->name, tmpname, sizeof(arcn->name) - 1); 815 arcn->name[arcn->nlen] = '\0'; 816 if (res < 0) 817 return(-1); 818 return(0); 819 } 820 821 /* 822 * set_dest() 823 * fix up the file name and the link name (if any) so this file will land 824 * in the destination directory (used during copy() -rw). 825 * Return: 826 * 0 if ok, -1 if failure (name too long) 827 */ 828 829 #ifdef __STDC__ 830 int 831 set_dest(register ARCHD *arcn, char *dest_dir, int dir_len) 832 #else 833 int 834 set_dest(arcn, dest_dir, dir_len) 835 register ARCHD *arcn; 836 char *dest_dir; 837 int dir_len; 838 #endif 839 { 840 if (fix_path(arcn->name, &(arcn->nlen), dest_dir, dir_len) < 0) 841 return(-1); 842 843 /* 844 * It is really hard to deal with symlinks here, we cannot be sure 845 * if the name they point was moved (or will be moved). It is best to 846 * leave them alone. 847 */ 848 if ((arcn->type != PAX_HLK) && (arcn->type != PAX_HRG)) 849 return(0); 850 851 if (fix_path(arcn->ln_name, &(arcn->ln_nlen), dest_dir, dir_len) < 0) 852 return(-1); 853 return(0); 854 } 855 856 /* 857 * fix_path 858 * concatenate dir_name and or_name and store the result in or_name (if 859 * it fits). This is one ugly function. 860 * Return: 861 * 0 if ok, -1 if the final name is too long 862 */ 863 864 #ifdef __STDC__ 865 static int 866 fix_path( char *or_name, int *or_len, char *dir_name, int dir_len) 867 #else 868 static int 869 fix_path(or_name, or_len, dir_name, dir_len) 870 char *or_name; 871 int *or_len; 872 char *dir_name; 873 int dir_len; 874 #endif 875 { 876 register char *src; 877 register char *dest; 878 register char *start; 879 int len; 880 881 /* 882 * we shift the or_name to the right enough to tack in the dir_name 883 * at the front. We make sure we have enough space for it all before 884 * we start. since dest always ends in a slash, we skip of or_name 885 * if it also starts with one. 886 */ 887 start = or_name; 888 src = start + *or_len; 889 dest = src + dir_len; 890 if (*start == '/') { 891 ++start; 892 --dest; 893 } 894 if ((len = dest - or_name) > PAXPATHLEN) { 895 paxwarn(1, "File name %s/%s, too long", dir_name, start); 896 return(-1); 897 } 898 *or_len = len; 899 900 /* 901 * enough space, shift 902 */ 903 while (src >= start) 904 *dest-- = *src--; 905 src = dir_name + dir_len - 1; 906 907 /* 908 * splice in the destination directory name 909 */ 910 while (src >= dir_name) 911 *dest-- = *src--; 912 913 *(or_name + len) = '\0'; 914 return(0); 915 } 916 917 /* 918 * rep_name() 919 * walk down the list of replacement strings applying each one in order. 920 * when we find one with a successful substitution, we modify the name 921 * as specified. if required, we print the results. if the resulting name 922 * is empty, we will skip this archive member. We use the regexp(3) 923 * routines (regexp() ought to win a prize as having the most cryptic 924 * library function manual page). 925 * --Parameters-- 926 * name is the file name we are going to apply the regular expressions to 927 * (and may be modified) 928 * nlen is the length of this name (and is modified to hold the length of 929 * the final string). 930 * prnt is a flag that says whether to print the final result. 931 * Return: 932 * 0 if substitution was successful, 1 if we are to skip the file (the name 933 * ended up empty) 934 */ 935 936 #ifdef __STDC__ 937 static int 938 rep_name(char *name, int *nlen, int prnt) 939 #else 940 static int 941 rep_name(name, nlen, prnt) 942 char *name; 943 int *nlen; 944 int prnt; 945 #endif 946 { 947 register REPLACE *pt; 948 register char *inpt; 949 register char *outpt; 950 register char *endpt; 951 register char *rpt; 952 register int found = 0; 953 register int res; 954 # ifndef NET2_REGEX 955 regmatch_t pm[MAXSUBEXP]; 956 # endif 957 char nname[PAXPATHLEN+1]; /* final result of all replacements */ 958 char buf1[PAXPATHLEN+1]; /* where we work on the name */ 959 960 /* 961 * copy the name into buf1, where we will work on it. We need to keep 962 * the orig string around so we can print out the result of the final 963 * replacement. We build up the final result in nname. inpt points at 964 * the string we apply the regular expression to. prnt is used to 965 * suppress printing when we handle replacements on the link field 966 * (the user already saw that substitution go by) 967 */ 968 pt = rephead; 969 (void)strcpy(buf1, name); 970 inpt = buf1; 971 outpt = nname; 972 endpt = outpt + PAXPATHLEN; 973 974 /* 975 * try each replacement string in order 976 */ 977 while (pt != NULL) { 978 do { 979 /* 980 * check for a successful substitution, if not go to 981 * the next pattern, or cleanup if we were global 982 */ 983 # ifdef NET2_REGEX 984 if (regexec(pt->rcmp, inpt) == 0) 985 # else 986 if (regexec(&(pt->rcmp), inpt, MAXSUBEXP, pm, 0) != 0) 987 # endif 988 break; 989 990 /* 991 * ok we found one. We have three parts, the prefix 992 * which did not match, the section that did and the 993 * tail (that also did not match). Copy the prefix to 994 * the final output buffer (watching to make sure we 995 * do not create a string too long). 996 */ 997 found = 1; 998 # ifdef NET2_REGEX 999 rpt = pt->rcmp->startp[0]; 1000 # else 1001 rpt = inpt + pm[0].rm_so; 1002 # endif 1003 1004 while ((inpt < rpt) && (outpt < endpt)) 1005 *outpt++ = *inpt++; 1006 if (outpt == endpt) 1007 break; 1008 1009 /* 1010 * for the second part (which matched the regular 1011 * expression) apply the substitution using the 1012 * replacement string and place it the prefix in the 1013 * final output. If we have problems, skip it. 1014 */ 1015 # ifdef NET2_REGEX 1016 if ((res = resub(pt->rcmp,pt->nstr,outpt,endpt)) < 0) { 1017 # else 1018 if ((res = resub(&(pt->rcmp),pm,pt->nstr,outpt,endpt)) 1019 < 0) { 1020 # endif 1021 if (prnt) 1022 paxwarn(1, "Replacement name error %s", 1023 name); 1024 return(1); 1025 } 1026 outpt += res; 1027 1028 /* 1029 * we set up to look again starting at the first 1030 * character in the tail (of the input string right 1031 * after the last character matched by the regular 1032 * expression (inpt always points at the first char in 1033 * the string to process). If we are not doing a global 1034 * substitution, we will use inpt to copy the tail to 1035 * the final result. Make sure we do not overrun the 1036 * output buffer 1037 */ 1038 # ifdef NET2_REGEX 1039 inpt = pt->rcmp->endp[0]; 1040 # else 1041 inpt += pm[0].rm_eo - pm[0].rm_so; 1042 # endif 1043 1044 if ((outpt == endpt) || (*inpt == '\0')) 1045 break; 1046 1047 /* 1048 * if the user wants global we keep trying to 1049 * substitute until it fails, then we are done. 1050 */ 1051 } while (pt->flgs & GLOB); 1052 1053 if (found) 1054 break; 1055 1056 /* 1057 * a successful substitution did NOT occur, try the next one 1058 */ 1059 pt = pt->fow; 1060 } 1061 1062 if (found) { 1063 /* 1064 * we had a substitution, copy the last tail piece (if there is 1065 * room) to the final result 1066 */ 1067 while ((outpt < endpt) && (*inpt != '\0')) 1068 *outpt++ = *inpt++; 1069 1070 *outpt = '\0'; 1071 if ((outpt == endpt) && (*inpt != '\0')) { 1072 if (prnt) 1073 paxwarn(1,"Replacement name too long %s >> %s", 1074 name, nname); 1075 return(1); 1076 } 1077 1078 /* 1079 * inform the user of the result if wanted 1080 */ 1081 if (prnt && (pt->flgs & PRNT)) { 1082 if (*nname == '\0') 1083 (void)fprintf(stderr,"%s >> <empty string>\n", 1084 name); 1085 else 1086 (void)fprintf(stderr,"%s >> %s\n", name, nname); 1087 } 1088 1089 /* 1090 * if empty inform the caller this file is to be skipped 1091 * otherwise copy the new name over the orig name and return 1092 */ 1093 if (*nname == '\0') 1094 return(1); 1095 *nlen = l_strncpy(name, nname, PAXPATHLEN + 1); 1096 name[PAXPATHLEN] = '\0'; 1097 } 1098 return(0); 1099 } 1100 1101 #ifdef NET2_REGEX 1102 /* 1103 * resub() 1104 * apply the replacement to the matched expression. expand out the old 1105 * style ed(1) subexpression expansion. 1106 * Return: 1107 * -1 if error, or the number of characters added to the destination. 1108 */ 1109 1110 #ifdef __STDC__ 1111 static int 1112 resub(regexp *prog, char *src, char *dest, register char *destend) 1113 #else 1114 static int 1115 resub(prog, src, dest, destend) 1116 regexp *prog; 1117 char *src; 1118 char *dest; 1119 register char *destend; 1120 #endif 1121 { 1122 register char *spt; 1123 register char *dpt; 1124 register char c; 1125 register int no; 1126 register int len; 1127 1128 spt = src; 1129 dpt = dest; 1130 while ((dpt < destend) && ((c = *spt++) != '\0')) { 1131 if (c == '&') 1132 no = 0; 1133 else if ((c == '\\') && (*spt >= '0') && (*spt <= '9')) 1134 no = *spt++ - '0'; 1135 else { 1136 if ((c == '\\') && ((*spt == '\\') || (*spt == '&'))) 1137 c = *spt++; 1138 *dpt++ = c; 1139 continue; 1140 } 1141 if ((prog->startp[no] == NULL) || (prog->endp[no] == NULL) || 1142 ((len = prog->endp[no] - prog->startp[no]) <= 0)) 1143 continue; 1144 1145 /* 1146 * copy the subexpression to the destination. 1147 * fail if we run out of space or the match string is damaged 1148 */ 1149 if (len > (destend - dpt)) 1150 len = destend - dpt; 1151 if (l_strncpy(dpt, prog->startp[no], len) != len) 1152 return(-1); 1153 dpt += len; 1154 } 1155 return(dpt - dest); 1156 } 1157 1158 #else 1159 1160 /* 1161 * resub() 1162 * apply the replacement to the matched expression. expand out the old 1163 * style ed(1) subexpression expansion. 1164 * Return: 1165 * -1 if error, or the number of characters added to the destination. 1166 */ 1167 1168 #ifdef __STDC__ 1169 static int 1170 resub(regex_t *rp, register regmatch_t *pm, char *src, char *dest, 1171 register char *destend) 1172 #else 1173 static int 1174 resub(rp, pm, src, dest, destend) 1175 regex_t *rp; 1176 register regmatch_t *pm; 1177 char *src; 1178 char *dest; 1179 register char *destend; 1180 #endif 1181 { 1182 register char *spt; 1183 register char *dpt; 1184 register char c; 1185 register regmatch_t *pmpt; 1186 register int len; 1187 int subexcnt; 1188 1189 spt = src; 1190 dpt = dest; 1191 subexcnt = rp->re_nsub; 1192 while ((dpt < destend) && ((c = *spt++) != '\0')) { 1193 /* 1194 * see if we just have an ordinary replacement character 1195 * or we refer to a subexpression. 1196 */ 1197 if (c == '&') { 1198 pmpt = pm; 1199 } else if ((c == '\\') && (*spt >= '0') && (*spt <= '9')) { 1200 /* 1201 * make sure there is a subexpression as specified 1202 */ 1203 if ((len = *spt++ - '0') > subexcnt) 1204 return(-1); 1205 pmpt = pm + len; 1206 } else { 1207 /* 1208 * Ordinary character, just copy it 1209 */ 1210 if ((c == '\\') && ((*spt == '\\') || (*spt == '&'))) 1211 c = *spt++; 1212 *dpt++ = c; 1213 continue; 1214 } 1215 1216 /* 1217 * continue if the subexpression is bogus 1218 */ 1219 if ((pmpt->rm_so < 0) || (pmpt->rm_eo < 0) || 1220 ((len = pmpt->rm_eo - pmpt->rm_so) <= 0)) 1221 continue; 1222 1223 /* 1224 * copy the subexpression to the destination. 1225 * fail if we run out of space or the match string is damaged 1226 */ 1227 if (len > (destend - dpt)) 1228 len = destend - dpt; 1229 if (l_strncpy(dpt, src + pmpt->rm_so, len) != len) 1230 return(-1); 1231 dpt += len; 1232 } 1233 return(dpt - dest); 1234 } 1235 #endif 1236