xref: /freebsd/bin/pax/pat_rep.c (revision 7aa383846770374466b1dcb2cefd71bde9acf463)
1 /*-
2  * Copyright (c) 1992 Keith Muller.
3  * Copyright (c) 1992, 1993
4  *	The Regents of the University of California.  All rights reserved.
5  *
6  * This code is derived from software contributed to Berkeley by
7  * Keith Muller of the University of California, San Diego.
8  *
9  * Redistribution and use in source and binary forms, with or without
10  * modification, are permitted provided that the following conditions
11  * are met:
12  * 1. Redistributions of source code must retain the above copyright
13  *    notice, this list of conditions and the following disclaimer.
14  * 2. Redistributions in binary form must reproduce the above copyright
15  *    notice, this list of conditions and the following disclaimer in the
16  *    documentation and/or other materials provided with the distribution.
17  * 4. Neither the name of the University nor the names of its contributors
18  *    may be used to endorse or promote products derived from this software
19  *    without specific prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31  * SUCH DAMAGE.
32  */
33 
34 #ifndef lint
35 #if 0
36 static char sccsid[] = "@(#)pat_rep.c	8.2 (Berkeley) 4/18/94";
37 #endif
38 #endif /* not lint */
39 #include <sys/cdefs.h>
40 __FBSDID("$FreeBSD$");
41 
42 #include <sys/types.h>
43 #include <sys/stat.h>
44 #include <stdio.h>
45 #include <string.h>
46 #include <unistd.h>
47 #include <stdlib.h>
48 #include <errno.h>
49 #ifdef NET2_REGEX
50 #include <regexp.h>
51 #else
52 #include <regex.h>
53 #endif
54 #include "pax.h"
55 #include "pat_rep.h"
56 #include "extern.h"
57 
58 /*
59  * routines to handle pattern matching, name modification (regular expression
60  * substitution and interactive renames), and destination name modification for
61  * copy (-rw). Both file name and link names are adjusted as required in these
62  * routines.
63  */
64 
65 #define MAXSUBEXP	10		/* max subexpressions, DO NOT CHANGE */
66 static PATTERN *pathead = NULL;		/* file pattern match list head */
67 static PATTERN *pattail = NULL;		/* file pattern match list tail */
68 static REPLACE *rephead = NULL;		/* replacement string list head */
69 static REPLACE *reptail = NULL;		/* replacement string list tail */
70 
71 static int rep_name(char *, int *, int);
72 static int tty_rename(ARCHD *);
73 static int fix_path(char *, int *, char *, int);
74 static int fn_match(char *, char *, char **);
75 static char * range_match(char *, int);
76 #ifdef NET2_REGEX
77 static int resub(regexp *, char *, char *, char *);
78 #else
79 static int resub(regex_t *, regmatch_t *, char *, char *, char *, char *);
80 #endif
81 
82 /*
83  * rep_add()
84  *	parses the -s replacement string; compiles the regular expression
85  *	and stores the compiled value and it's replacement string together in
86  *	replacement string list. Input to this function is of the form:
87  *		/old/new/pg
88  *	The first char in the string specifies the delimiter used by this
89  *	replacement string. "Old" is a regular expression in "ed" format which
90  *	is compiled by regcomp() and is applied to filenames. "new" is the
91  *	substitution string; p and g are options flags for printing and global
92  *	replacement (over the single filename)
93  * Return:
94  *	0 if a proper replacement string and regular expression was added to
95  *	the list of replacement patterns; -1 otherwise.
96  */
97 
98 int
99 rep_add(char *str)
100 {
101 	char *pt1;
102 	char *pt2;
103 	REPLACE *rep;
104 #	ifndef NET2_REGEX
105 	int res;
106 	char rebuf[BUFSIZ];
107 #	endif
108 
109 	/*
110 	 * throw out the bad parameters
111 	 */
112 	if ((str == NULL) || (*str == '\0')) {
113 		paxwarn(1, "Empty replacement string");
114 		return(-1);
115 	}
116 
117 	/*
118 	 * first character in the string specifies what the delimiter is for
119 	 * this expression
120 	 */
121 	if ((pt1 = strchr(str+1, *str)) == NULL) {
122 		paxwarn(1, "Invalid replacement string %s", str);
123 		return(-1);
124 	}
125 
126 	/*
127 	 * allocate space for the node that handles this replacement pattern
128 	 * and split out the regular expression and try to compile it
129 	 */
130 	if ((rep = (REPLACE *)malloc(sizeof(REPLACE))) == NULL) {
131 		paxwarn(1, "Unable to allocate memory for replacement string");
132 		return(-1);
133 	}
134 
135 	*pt1 = '\0';
136 #	ifdef NET2_REGEX
137 	if ((rep->rcmp = regcomp(str+1)) == NULL) {
138 #	else
139 	if ((res = regcomp(&(rep->rcmp), str+1, 0)) != 0) {
140 		regerror(res, &(rep->rcmp), rebuf, sizeof(rebuf));
141 		paxwarn(1, "%s while compiling regular expression %s", rebuf, str);
142 #	endif
143 		free(rep);
144 		return(-1);
145 	}
146 
147 	/*
148 	 * put the delimiter back in case we need an error message and
149 	 * locate the delimiter at the end of the replacement string
150 	 * we then point the node at the new substitution string
151 	 */
152 	*pt1++ = *str;
153 	if ((pt2 = strchr(pt1, *str)) == NULL) {
154 #		ifdef NET2_REGEX
155 		free(rep->rcmp);
156 #		else
157 		regfree(&rep->rcmp);
158 #		endif
159 		free(rep);
160 		paxwarn(1, "Invalid replacement string %s", str);
161 		return(-1);
162 	}
163 
164 	*pt2 = '\0';
165 	rep->nstr = pt1;
166 	pt1 = pt2++;
167 	rep->flgs = 0;
168 
169 	/*
170 	 * set the options if any
171 	 */
172 	while (*pt2 != '\0') {
173 		switch(*pt2) {
174 		case 'g':
175 		case 'G':
176 			rep->flgs  |= GLOB;
177 			break;
178 		case 'p':
179 		case 'P':
180 			rep->flgs  |= PRNT;
181 			break;
182 		default:
183 #			ifdef NET2_REGEX
184 			free(rep->rcmp);
185 #			else
186 			regfree(&rep->rcmp);
187 #			endif
188 			free(rep);
189 			*pt1 = *str;
190 			paxwarn(1, "Invalid replacement string option %s", str);
191 			return(-1);
192 		}
193 		++pt2;
194 	}
195 
196 	/*
197 	 * all done, link it in at the end
198 	 */
199 	rep->fow = NULL;
200 	if (rephead == NULL) {
201 		reptail = rephead = rep;
202 		return(0);
203 	}
204 	reptail->fow = rep;
205 	reptail = rep;
206 	return(0);
207 }
208 
209 /*
210  * pat_add()
211  *	add a pattern match to the pattern match list. Pattern matches are used
212  *	to select which archive members are extracted. (They appear as
213  *	arguments to pax in the list and read modes). If no patterns are
214  *	supplied to pax, all members in the archive will be selected (and the
215  *	pattern match list is empty).
216  * Return:
217  *	0 if the pattern was added to the list, -1 otherwise
218  */
219 
220 int
221 pat_add(char *str, char *chdnam)
222 {
223 	PATTERN *pt;
224 
225 	/*
226 	 * throw out the junk
227 	 */
228 	if ((str == NULL) || (*str == '\0')) {
229 		paxwarn(1, "Empty pattern string");
230 		return(-1);
231 	}
232 
233 	/*
234 	 * allocate space for the pattern and store the pattern. the pattern is
235 	 * part of argv so do not bother to copy it, just point at it. Add the
236 	 * node to the end of the pattern list
237 	 */
238 	if ((pt = (PATTERN *)malloc(sizeof(PATTERN))) == NULL) {
239 		paxwarn(1, "Unable to allocate memory for pattern string");
240 		return(-1);
241 	}
242 
243 	pt->pstr = str;
244 	pt->pend = NULL;
245 	pt->plen = strlen(str);
246 	pt->fow = NULL;
247 	pt->flgs = 0;
248 	pt->chdname = chdnam;
249 
250 	if (pathead == NULL) {
251 		pattail = pathead = pt;
252 		return(0);
253 	}
254 	pattail->fow = pt;
255 	pattail = pt;
256 	return(0);
257 }
258 
259 /*
260  * pat_chk()
261  *	complain if any the user supplied pattern did not result in a match to
262  *	a selected archive member.
263  */
264 
265 void
266 pat_chk(void)
267 {
268 	PATTERN *pt;
269 	int wban = 0;
270 
271 	/*
272 	 * walk down the list checking the flags to make sure MTCH was set,
273 	 * if not complain
274 	 */
275 	for (pt = pathead; pt != NULL; pt = pt->fow) {
276 		if (pt->flgs & MTCH)
277 			continue;
278 		if (!wban) {
279 			paxwarn(1, "WARNING! These patterns were not matched:");
280 			++wban;
281 		}
282 		(void)fprintf(stderr, "%s\n", pt->pstr);
283 	}
284 }
285 
286 /*
287  * pat_sel()
288  *	the archive member which matches a pattern was selected. Mark the
289  *	pattern as having selected an archive member. arcn->pat points at the
290  *	pattern that was matched. arcn->pat is set in pat_match()
291  *
292  *	NOTE: When the -c option is used, we are called when there was no match
293  *	by pat_match() (that means we did match before the inverted sense of
294  *	the logic). Now this seems really strange at first, but with -c  we
295  *	need to keep track of those patterns that cause an archive member to NOT
296  *	be selected (it found an archive member with a specified pattern)
297  * Return:
298  *	0 if the pattern pointed at by arcn->pat was tagged as creating a
299  *	match, -1 otherwise.
300  */
301 
302 int
303 pat_sel(ARCHD *arcn)
304 {
305 	PATTERN *pt;
306 	PATTERN **ppt;
307 	int len;
308 
309 	/*
310 	 * if no patterns just return
311 	 */
312 	if ((pathead == NULL) || ((pt = arcn->pat) == NULL))
313 		return(0);
314 
315 	/*
316 	 * when we are NOT limited to a single match per pattern mark the
317 	 * pattern and return
318 	 */
319 	if (!nflag) {
320 		pt->flgs |= MTCH;
321 		return(0);
322 	}
323 
324 	/*
325 	 * we reach this point only when we allow a single selected match per
326 	 * pattern, if the pattern matches a directory and we do not have -d
327 	 * (dflag) we are done with this pattern. We may also be handed a file
328 	 * in the subtree of a directory. in that case when we are operating
329 	 * with -d, this pattern was already selected and we are done
330 	 */
331 	if (pt->flgs & DIR_MTCH)
332 		return(0);
333 
334 	if (!dflag && ((pt->pend != NULL) || (arcn->type == PAX_DIR))) {
335 		/*
336 		 * ok we matched a directory and we are allowing
337 		 * subtree matches but because of the -n only its children will
338 		 * match. This is tagged as a DIR_MTCH type.
339 		 * WATCH IT, the code assumes that pt->pend points
340 		 * into arcn->name and arcn->name has not been modified.
341 		 * If not we will have a big mess. Yup this is another kludge
342 		 */
343 
344 		/*
345 		 * if this was a prefix match, remove trailing part of path
346 		 * so we can copy it. Future matches will be exact prefix match
347 		 */
348 		if (pt->pend != NULL)
349 			*pt->pend = '\0';
350 
351 		if ((pt->pstr = strdup(arcn->name)) == NULL) {
352 			paxwarn(1, "Pattern select out of memory");
353 			if (pt->pend != NULL)
354 				*pt->pend = '/';
355 			pt->pend = NULL;
356 			return(-1);
357 		}
358 
359 		/*
360 		 * put the trailing / back in the source string
361 		 */
362 		if (pt->pend != NULL) {
363 			*pt->pend = '/';
364 			pt->pend = NULL;
365 		}
366 		pt->plen = strlen(pt->pstr);
367 
368 		/*
369 		 * strip off any trailing /, this should really never happen
370 		 */
371 		len = pt->plen - 1;
372 		if (*(pt->pstr + len) == '/') {
373 			*(pt->pstr + len) = '\0';
374 			pt->plen = len;
375 		}
376 		pt->flgs = DIR_MTCH | MTCH;
377 		arcn->pat = pt;
378 		return(0);
379 	}
380 
381 	/*
382 	 * we are then done with this pattern, so we delete it from the list
383 	 * because it can never be used for another match.
384 	 * Seems kind of strange to do for a -c, but the pax spec is really
385 	 * vague on the interaction of -c -n and -d. We assume that when -c
386 	 * and the pattern rejects a member (i.e. it matched it) it is done.
387 	 * In effect we place the order of the flags as having -c last.
388 	 */
389 	pt = pathead;
390 	ppt = &pathead;
391 	while ((pt != NULL) && (pt != arcn->pat)) {
392 		ppt = &(pt->fow);
393 		pt = pt->fow;
394 	}
395 
396 	if (pt == NULL) {
397 		/*
398 		 * should never happen....
399 		 */
400 		paxwarn(1, "Pattern list inconsistant");
401 		return(-1);
402 	}
403 	*ppt = pt->fow;
404 	free(pt);
405 	arcn->pat = NULL;
406 	return(0);
407 }
408 
409 /*
410  * pat_match()
411  *	see if this archive member matches any supplied pattern, if a match
412  *	is found, arcn->pat is set to point at the potential pattern. Later if
413  *	this archive member is "selected" we process and mark the pattern as
414  *	one which matched a selected archive member (see pat_sel())
415  * Return:
416  *	0 if this archive member should be processed, 1 if it should be
417  *	skipped and -1 if we are done with all patterns (and pax should quit
418  *	looking for more members)
419  */
420 
421 int
422 pat_match(ARCHD *arcn)
423 {
424 	PATTERN *pt;
425 
426 	arcn->pat = NULL;
427 
428 	/*
429 	 * if there are no more patterns and we have -n (and not -c) we are
430 	 * done. otherwise with no patterns to match, matches all
431 	 */
432 	if (pathead == NULL) {
433 		if (nflag && !cflag)
434 			return(-1);
435 		return(0);
436 	}
437 
438 	/*
439 	 * have to search down the list one at a time looking for a match.
440 	 */
441 	pt = pathead;
442 	while (pt != NULL) {
443 		/*
444 		 * check for a file name match unless we have DIR_MTCH set in
445 		 * this pattern then we want a prefix match
446 		 */
447 		if (pt->flgs & DIR_MTCH) {
448 			/*
449 			 * this pattern was matched before to a directory
450 			 * as we must have -n set for this (but not -d). We can
451 			 * only match CHILDREN of that directory so we must use
452 			 * an exact prefix match (no wildcards).
453 			 */
454 			if ((arcn->name[pt->plen] == '/') &&
455 			    (strncmp(pt->pstr, arcn->name, pt->plen) == 0))
456 				break;
457 		} else if (fn_match(pt->pstr, arcn->name, &pt->pend) == 0)
458 			break;
459 		pt = pt->fow;
460 	}
461 
462 	/*
463 	 * return the result, remember that cflag (-c) inverts the sense of a
464 	 * match
465 	 */
466 	if (pt == NULL)
467 		return(cflag ? 0 : 1);
468 
469 	/*
470 	 * We had a match, now when we invert the sense (-c) we reject this
471 	 * member. However we have to tag the pattern a being successful, (in a
472 	 * match, not in selecting an archive member) so we call pat_sel() here.
473 	 */
474 	arcn->pat = pt;
475 	if (!cflag)
476 		return(0);
477 
478 	if (pat_sel(arcn) < 0)
479 		return(-1);
480 	arcn->pat = NULL;
481 	return(1);
482 }
483 
484 /*
485  * fn_match()
486  * Return:
487  *	0 if this archive member should be processed, 1 if it should be
488  *	skipped and -1 if we are done with all patterns (and pax should quit
489  *	looking for more members)
490  *	Note: *pend may be changed to show where the prefix ends.
491  */
492 
493 static int
494 fn_match(char *pattern, char *string, char **pend)
495 {
496 	char c;
497 	char test;
498 
499 	*pend = NULL;
500 	for (;;) {
501 		switch (c = *pattern++) {
502 		case '\0':
503 			/*
504 			 * Ok we found an exact match
505 			 */
506 			if (*string == '\0')
507 				return(0);
508 
509 			/*
510 			 * Check if it is a prefix match
511 			 */
512 			if ((dflag == 1) || (*string != '/'))
513 				return(-1);
514 
515 			/*
516 			 * It is a prefix match, remember where the trailing
517 			 * / is located
518 			 */
519 			*pend = string;
520 			return(0);
521 		case '?':
522 			if ((test = *string++) == '\0')
523 				return (-1);
524 			break;
525 		case '*':
526 			c = *pattern;
527 			/*
528 			 * Collapse multiple *'s.
529 			 */
530 			while (c == '*')
531 				c = *++pattern;
532 
533 			/*
534 			 * Optimized hack for pattern with a * at the end
535 			 */
536 			if (c == '\0')
537 				return (0);
538 
539 			/*
540 			 * General case, use recursion.
541 			 */
542 			while ((test = *string) != '\0') {
543 				if (!fn_match(pattern, string, pend))
544 					return (0);
545 				++string;
546 			}
547 			return (-1);
548 		case '[':
549 			/*
550 			 * range match
551 			 */
552 			if (((test = *string++) == '\0') ||
553 			    ((pattern = range_match(pattern, test)) == NULL))
554 				return (-1);
555 			break;
556 		case '\\':
557 		default:
558 			if (c != *string++)
559 				return (-1);
560 			break;
561 		}
562 	}
563 	/* NOTREACHED */
564 }
565 
566 static char *
567 range_match(char *pattern, int test)
568 {
569 	char c;
570 	char c2;
571 	int negate;
572 	int ok = 0;
573 
574 	if ((negate = (*pattern == '!')) != 0)
575 		++pattern;
576 
577 	while ((c = *pattern++) != ']') {
578 		/*
579 		 * Illegal pattern
580 		 */
581 		if (c == '\0')
582 			return (NULL);
583 
584 		if ((*pattern == '-') && ((c2 = pattern[1]) != '\0') &&
585 		    (c2 != ']')) {
586 			if ((c <= test) && (test <= c2))
587 				ok = 1;
588 			pattern += 2;
589 		} else if (c == test)
590 			ok = 1;
591 	}
592 	return (ok == negate ? NULL : pattern);
593 }
594 
595 /*
596  * mod_name()
597  *	modify a selected file name. first attempt to apply replacement string
598  *	expressions, then apply interactive file rename. We apply replacement
599  *	string expressions to both filenames and file links (if we didn't the
600  *	links would point to the wrong place, and we could never be able to
601  *	move an archive that has a file link in it). When we rename files
602  *	interactively, we store that mapping (old name to user input name) so
603  *	if we spot any file links to the old file name in the future, we will
604  *	know exactly how to fix the file link.
605  * Return:
606  *	0 continue to  process file, 1 skip this file, -1 pax is finished
607  */
608 
609 int
610 mod_name(ARCHD *arcn)
611 {
612 	int res = 0;
613 
614 	/*
615 	 * Strip off leading '/' if appropriate.
616 	 * Currently, this option is only set for the tar format.
617 	 */
618 	if (rmleadslash && arcn->name[0] == '/') {
619 		if (arcn->name[1] == '\0') {
620 			arcn->name[0] = '.';
621 		} else {
622 			(void)memmove(arcn->name, &arcn->name[1],
623 			    strlen(arcn->name));
624 			arcn->nlen--;
625 		}
626 		if (rmleadslash < 2) {
627 			rmleadslash = 2;
628 			paxwarn(0, "Removing leading / from absolute path names in the archive");
629 		}
630 	}
631 	if (rmleadslash && arcn->ln_name[0] == '/' &&
632 	    (arcn->type == PAX_HLK || arcn->type == PAX_HRG)) {
633 		if (arcn->ln_name[1] == '\0') {
634 			arcn->ln_name[0] = '.';
635 		} else {
636 			(void)memmove(arcn->ln_name, &arcn->ln_name[1],
637 			    strlen(arcn->ln_name));
638 			arcn->ln_nlen--;
639 		}
640 		if (rmleadslash < 2) {
641 			rmleadslash = 2;
642 			paxwarn(0, "Removing leading / from absolute path names in the archive");
643 		}
644 	}
645 
646 	/*
647 	 * IMPORTANT: We have a problem. what do we do with symlinks?
648 	 * Modifying a hard link name makes sense, as we know the file it
649 	 * points at should have been seen already in the archive (and if it
650 	 * wasn't seen because of a read error or a bad archive, we lose
651 	 * anyway). But there are no such requirements for symlinks. On one
652 	 * hand the symlink that refers to a file in the archive will have to
653 	 * be modified to so it will still work at its new location in the
654 	 * file system. On the other hand a symlink that points elsewhere (and
655 	 * should continue to do so) should not be modified. There is clearly
656 	 * no perfect solution here. So we handle them like hardlinks. Clearly
657 	 * a replacement made by the interactive rename mapping is very likely
658 	 * to be correct since it applies to a single file and is an exact
659 	 * match. The regular expression replacements are a little harder to
660 	 * justify though. We claim that the symlink name is only likely
661 	 * to be replaced when it points within the file tree being moved and
662 	 * in that case it should be modified. what we really need to do is to
663 	 * call an oracle here. :)
664 	 */
665 	if (rephead != NULL) {
666 		/*
667 		 * we have replacement strings, modify the name and the link
668 		 * name if any.
669 		 */
670 		if ((res = rep_name(arcn->name, &(arcn->nlen), 1)) != 0)
671 			return(res);
672 
673 		if (((arcn->type == PAX_SLK) || (arcn->type == PAX_HLK) ||
674 		    (arcn->type == PAX_HRG)) &&
675 		    ((res = rep_name(arcn->ln_name, &(arcn->ln_nlen), 0)) != 0))
676 			return(res);
677 	}
678 
679 	if (iflag) {
680 		/*
681 		 * perform interactive file rename, then map the link if any
682 		 */
683 		if ((res = tty_rename(arcn)) != 0)
684 			return(res);
685 		if ((arcn->type == PAX_SLK) || (arcn->type == PAX_HLK) ||
686 		    (arcn->type == PAX_HRG))
687 			sub_name(arcn->ln_name, &(arcn->ln_nlen), sizeof(arcn->ln_name));
688 	}
689 	return(res);
690 }
691 
692 /*
693  * tty_rename()
694  *	Prompt the user for a replacement file name. A "." keeps the old name,
695  *	a empty line skips the file, and an EOF on reading the tty, will cause
696  *	pax to stop processing and exit. Otherwise the file name input, replaces
697  *	the old one.
698  * Return:
699  *	0 process this file, 1 skip this file, -1 we need to exit pax
700  */
701 
702 static int
703 tty_rename(ARCHD *arcn)
704 {
705 	char tmpname[PAXPATHLEN+2];
706 	int res;
707 
708 	/*
709 	 * prompt user for the replacement name for a file, keep trying until
710 	 * we get some reasonable input. Archives may have more than one file
711 	 * on them with the same name (from updates etc). We print verbose info
712 	 * on the file so the user knows what is up.
713 	 */
714 	tty_prnt("\nATTENTION: %s interactive file rename operation.\n", argv0);
715 
716 	for (;;) {
717 		ls_tty(arcn);
718 		tty_prnt("Input new name, or a \".\" to keep the old name, ");
719 		tty_prnt("or a \"return\" to skip this file.\n");
720 		tty_prnt("Input > ");
721 		if (tty_read(tmpname, sizeof(tmpname)) < 0)
722 			return(-1);
723 		if (strcmp(tmpname, "..") == 0) {
724 			tty_prnt("Try again, illegal file name: ..\n");
725 			continue;
726 		}
727 		if (strlen(tmpname) > PAXPATHLEN) {
728 			tty_prnt("Try again, file name too long\n");
729 			continue;
730 		}
731 		break;
732 	}
733 
734 	/*
735 	 * empty file name, skips this file. a "." leaves it alone
736 	 */
737 	if (tmpname[0] == '\0') {
738 		tty_prnt("Skipping file.\n");
739 		return(1);
740 	}
741 	if ((tmpname[0] == '.') && (tmpname[1] == '\0')) {
742 		tty_prnt("Processing continues, name unchanged.\n");
743 		return(0);
744 	}
745 
746 	/*
747 	 * ok the name changed. We may run into links that point at this
748 	 * file later. we have to remember where the user sent the file
749 	 * in order to repair any links.
750 	 */
751 	tty_prnt("Processing continues, name changed to: %s\n", tmpname);
752 	res = add_name(arcn->name, arcn->nlen, tmpname);
753 	arcn->nlen = l_strncpy(arcn->name, tmpname, sizeof(arcn->name) - 1);
754 	arcn->name[arcn->nlen] = '\0';
755 	if (res < 0)
756 		return(-1);
757 	return(0);
758 }
759 
760 /*
761  * set_dest()
762  *	fix up the file name and the link name (if any) so this file will land
763  *	in the destination directory (used during copy() -rw).
764  * Return:
765  *	0 if ok, -1 if failure (name too long)
766  */
767 
768 int
769 set_dest(ARCHD *arcn, char *dest_dir, int dir_len)
770 {
771 	if (fix_path(arcn->name, &(arcn->nlen), dest_dir, dir_len) < 0)
772 		return(-1);
773 
774 	/*
775 	 * It is really hard to deal with symlinks here, we cannot be sure
776 	 * if the name they point was moved (or will be moved). It is best to
777 	 * leave them alone.
778 	 */
779 	if ((arcn->type != PAX_HLK) && (arcn->type != PAX_HRG))
780 		return(0);
781 
782 	if (fix_path(arcn->ln_name, &(arcn->ln_nlen), dest_dir, dir_len) < 0)
783 		return(-1);
784 	return(0);
785 }
786 
787 /*
788  * fix_path
789  *	concatenate dir_name and or_name and store the result in or_name (if
790  *	it fits). This is one ugly function.
791  * Return:
792  *	0 if ok, -1 if the final name is too long
793  */
794 
795 static int
796 fix_path( char *or_name, int *or_len, char *dir_name, int dir_len)
797 {
798 	char *src;
799 	char *dest;
800 	char *start;
801 	int len;
802 
803 	/*
804 	 * we shift the or_name to the right enough to tack in the dir_name
805 	 * at the front. We make sure we have enough space for it all before
806 	 * we start. since dest always ends in a slash, we skip of or_name
807 	 * if it also starts with one.
808 	 */
809 	start = or_name;
810 	src = start + *or_len;
811 	dest = src + dir_len;
812 	if (*start == '/') {
813 		++start;
814 		--dest;
815 	}
816 	if ((len = dest - or_name) > PAXPATHLEN) {
817 		paxwarn(1, "File name %s/%s, too long", dir_name, start);
818 		return(-1);
819 	}
820 	*or_len = len;
821 
822 	/*
823 	 * enough space, shift
824 	 */
825 	while (src >= start)
826 		*dest-- = *src--;
827 	src = dir_name + dir_len - 1;
828 
829 	/*
830 	 * splice in the destination directory name
831 	 */
832 	while (src >= dir_name)
833 		*dest-- = *src--;
834 
835 	*(or_name + len) = '\0';
836 	return(0);
837 }
838 
839 /*
840  * rep_name()
841  *	walk down the list of replacement strings applying each one in order.
842  *	when we find one with a successful substitution, we modify the name
843  *	as specified. if required, we print the results. if the resulting name
844  *	is empty, we will skip this archive member. We use the regexp(3)
845  *	routines (regexp() ought to win a prize as having the most cryptic
846  *	library function manual page).
847  *	--Parameters--
848  *	name is the file name we are going to apply the regular expressions to
849  *	(and may be modified)
850  *	nlen is the length of this name (and is modified to hold the length of
851  *	the final string).
852  *	prnt is a flag that says whether to print the final result.
853  * Return:
854  *	0 if substitution was successful, 1 if we are to skip the file (the name
855  *	ended up empty)
856  */
857 
858 static int
859 rep_name(char *name, int *nlen, int prnt)
860 {
861 	REPLACE *pt;
862 	char *inpt;
863 	char *outpt;
864 	char *endpt;
865 	char *rpt;
866 	int found = 0;
867 	int res;
868 #	ifndef NET2_REGEX
869 	regmatch_t pm[MAXSUBEXP];
870 #	endif
871 	char nname[PAXPATHLEN+1];	/* final result of all replacements */
872 	char buf1[PAXPATHLEN+1];	/* where we work on the name */
873 
874 	/*
875 	 * copy the name into buf1, where we will work on it. We need to keep
876 	 * the orig string around so we can print out the result of the final
877 	 * replacement. We build up the final result in nname. inpt points at
878 	 * the string we apply the regular expression to. prnt is used to
879 	 * suppress printing when we handle replacements on the link field
880 	 * (the user already saw that substitution go by)
881 	 */
882 	pt = rephead;
883 	(void)strcpy(buf1, name);
884 	inpt = buf1;
885 	outpt = nname;
886 	endpt = outpt + PAXPATHLEN;
887 
888 	/*
889 	 * try each replacement string in order
890 	 */
891 	while (pt != NULL) {
892 		do {
893 			/*
894 			 * check for a successful substitution, if not go to
895 			 * the next pattern, or cleanup if we were global
896 			 */
897 #			ifdef NET2_REGEX
898 			if (regexec(pt->rcmp, inpt) == 0)
899 #			else
900 			if (regexec(&(pt->rcmp), inpt, MAXSUBEXP, pm, 0) != 0)
901 #			endif
902 				break;
903 
904 			/*
905 			 * ok we found one. We have three parts, the prefix
906 			 * which did not match, the section that did and the
907 			 * tail (that also did not match). Copy the prefix to
908 			 * the final output buffer (watching to make sure we
909 			 * do not create a string too long).
910 			 */
911 			found = 1;
912 #			ifdef NET2_REGEX
913 			rpt = pt->rcmp->startp[0];
914 #			else
915 			rpt = inpt + pm[0].rm_so;
916 #			endif
917 
918 			while ((inpt < rpt) && (outpt < endpt))
919 				*outpt++ = *inpt++;
920 			if (outpt == endpt)
921 				break;
922 
923 			/*
924 			 * for the second part (which matched the regular
925 			 * expression) apply the substitution using the
926 			 * replacement string and place it the prefix in the
927 			 * final output. If we have problems, skip it.
928 			 */
929 #			ifdef NET2_REGEX
930 			if ((res = resub(pt->rcmp,pt->nstr,outpt,endpt)) < 0) {
931 #			else
932 			if ((res = resub(&(pt->rcmp),pm,inpt,pt->nstr,outpt,endpt))
933 			    < 0) {
934 #			endif
935 				if (prnt)
936 					paxwarn(1, "Replacement name error %s",
937 					    name);
938 				return(1);
939 			}
940 			outpt += res;
941 
942 			/*
943 			 * we set up to look again starting at the first
944 			 * character in the tail (of the input string right
945 			 * after the last character matched by the regular
946 			 * expression (inpt always points at the first char in
947 			 * the string to process). If we are not doing a global
948 			 * substitution, we will use inpt to copy the tail to
949 			 * the final result. Make sure we do not overrun the
950 			 * output buffer
951 			 */
952 #			ifdef NET2_REGEX
953 			inpt = pt->rcmp->endp[0];
954 #			else
955 			inpt += pm[0].rm_eo - pm[0].rm_so;
956 #			endif
957 
958 			if ((outpt == endpt) || (*inpt == '\0'))
959 				break;
960 
961 			/*
962 			 * if the user wants global we keep trying to
963 			 * substitute until it fails, then we are done.
964 			 */
965 		} while (pt->flgs & GLOB);
966 
967 		if (found)
968 			break;
969 
970 		/*
971 		 * a successful substitution did NOT occur, try the next one
972 		 */
973 		pt = pt->fow;
974 	}
975 
976 	if (found) {
977 		/*
978 		 * we had a substitution, copy the last tail piece (if there is
979 		 * room) to the final result
980 		 */
981 		while ((outpt < endpt) && (*inpt != '\0'))
982 			*outpt++ = *inpt++;
983 
984 		*outpt = '\0';
985 		if ((outpt == endpt) && (*inpt != '\0')) {
986 			if (prnt)
987 				paxwarn(1,"Replacement name too long %s >> %s",
988 				    name, nname);
989 			return(1);
990 		}
991 
992 		/*
993 		 * inform the user of the result if wanted
994 		 */
995 		if (prnt && (pt->flgs & PRNT)) {
996 			if (*nname == '\0')
997 				(void)fprintf(stderr,"%s >> <empty string>\n",
998 				    name);
999 			else
1000 				(void)fprintf(stderr,"%s >> %s\n", name, nname);
1001 		}
1002 
1003 		/*
1004 		 * if empty inform the caller this file is to be skipped
1005 		 * otherwise copy the new name over the orig name and return
1006 		 */
1007 		if (*nname == '\0')
1008 			return(1);
1009 		*nlen = l_strncpy(name, nname, PAXPATHLEN + 1);
1010 		name[PAXPATHLEN] = '\0';
1011 	}
1012 	return(0);
1013 }
1014 
1015 #ifdef NET2_REGEX
1016 /*
1017  * resub()
1018  *	apply the replacement to the matched expression. expand out the old
1019  * 	style ed(1) subexpression expansion.
1020  * Return:
1021  *	-1 if error, or the number of characters added to the destination.
1022  */
1023 
1024 static int
1025 resub(regexp *prog, char *src, char *dest, char *destend)
1026 {
1027 	char *spt;
1028 	char *dpt;
1029 	char c;
1030 	int no;
1031 	int len;
1032 
1033 	spt = src;
1034 	dpt = dest;
1035 	while ((dpt < destend) && ((c = *spt++) != '\0')) {
1036 		if (c == '&')
1037 			no = 0;
1038 		else if ((c == '\\') && (*spt >= '0') && (*spt <= '9'))
1039 			no = *spt++ - '0';
1040 		else {
1041  			if ((c == '\\') && ((*spt == '\\') || (*spt == '&')))
1042  				c = *spt++;
1043  			*dpt++ = c;
1044 			continue;
1045 		}
1046  		if ((prog->startp[no] == NULL) || (prog->endp[no] == NULL) ||
1047 		    ((len = prog->endp[no] - prog->startp[no]) <= 0))
1048 			continue;
1049 
1050 		/*
1051 		 * copy the subexpression to the destination.
1052 		 * fail if we run out of space or the match string is damaged
1053 		 */
1054 		if (len > (destend - dpt))
1055 			len = destend - dpt;
1056 		if (l_strncpy(dpt, prog->startp[no], len) != len)
1057 			return(-1);
1058 		dpt += len;
1059 	}
1060 	return(dpt - dest);
1061 }
1062 
1063 #else
1064 
1065 /*
1066  * resub()
1067  *	apply the replacement to the matched expression. expand out the old
1068  * 	style ed(1) subexpression expansion.
1069  * Return:
1070  *	-1 if error, or the number of characters added to the destination.
1071  */
1072 
1073 static int
1074 resub(regex_t *rp, regmatch_t *pm, char *orig, char *src, char *dest,
1075 	char *destend)
1076 {
1077 	char *spt;
1078 	char *dpt;
1079 	char c;
1080 	regmatch_t *pmpt;
1081 	int len;
1082 	int subexcnt;
1083 
1084 	spt =  src;
1085 	dpt = dest;
1086 	subexcnt = rp->re_nsub;
1087 	while ((dpt < destend) && ((c = *spt++) != '\0')) {
1088 		/*
1089 		 * see if we just have an ordinary replacement character
1090 		 * or we refer to a subexpression.
1091 		 */
1092 		if (c == '&') {
1093 			pmpt = pm;
1094 		} else if ((c == '\\') && (*spt >= '0') && (*spt <= '9')) {
1095 			/*
1096 			 * make sure there is a subexpression as specified
1097 			 */
1098 			if ((len = *spt++ - '0') > subexcnt)
1099 				return(-1);
1100 			pmpt = pm + len;
1101 		} else {
1102  			/*
1103 			 * Ordinary character, just copy it
1104 			 */
1105  			if ((c == '\\') && ((*spt == '\\') || (*spt == '&')))
1106  				c = *spt++;
1107  			*dpt++ = c;
1108 			continue;
1109 		}
1110 
1111 		/*
1112 		 * continue if the subexpression is bogus
1113 		 */
1114 		if ((pmpt->rm_so < 0) || (pmpt->rm_eo < 0) ||
1115 		    ((len = pmpt->rm_eo - pmpt->rm_so) <= 0))
1116 			continue;
1117 
1118 		/*
1119 		 * copy the subexpression to the destination.
1120 		 * fail if we run out of space or the match string is damaged
1121 		 */
1122 		if (len > (destend - dpt))
1123 			len = destend - dpt;
1124 		if (l_strncpy(dpt, orig + pmpt->rm_so, len) != len)
1125 			return(-1);
1126 		dpt += len;
1127 	}
1128 	return(dpt - dest);
1129 }
1130 #endif
1131