xref: /freebsd/bin/pax/pat_rep.c (revision 417ed37975261df51f61d13e179ad04d8f4839c7)
1 /*-
2  * Copyright (c) 1992 Keith Muller.
3  * Copyright (c) 1992, 1993
4  *	The Regents of the University of California.  All rights reserved.
5  *
6  * This code is derived from software contributed to Berkeley by
7  * Keith Muller of the University of California, San Diego.
8  *
9  * Redistribution and use in source and binary forms, with or without
10  * modification, are permitted provided that the following conditions
11  * are met:
12  * 1. Redistributions of source code must retain the above copyright
13  *    notice, this list of conditions and the following disclaimer.
14  * 2. Redistributions in binary form must reproduce the above copyright
15  *    notice, this list of conditions and the following disclaimer in the
16  *    documentation and/or other materials provided with the distribution.
17  * 3. All advertising materials mentioning features or use of this software
18  *    must display the following acknowledgement:
19  *	This product includes software developed by the University of
20  *	California, Berkeley and its contributors.
21  * 4. Neither the name of the University nor the names of its contributors
22  *    may be used to endorse or promote products derived from this software
23  *    without specific prior written permission.
24  *
25  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
26  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
28  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
29  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
30  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
31  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
32  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
33  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
34  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
35  * SUCH DAMAGE.
36  *
37  *	$Id$
38  */
39 
40 #ifndef lint
41 static char sccsid[] = "@(#)pat_rep.c	8.2 (Berkeley) 4/18/94";
42 #endif /* not lint */
43 
44 #include <sys/types.h>
45 #include <sys/time.h>
46 #include <sys/stat.h>
47 #include <sys/param.h>
48 #include <stdio.h>
49 #include <ctype.h>
50 #include <string.h>
51 #include <unistd.h>
52 #include <stdlib.h>
53 #ifdef NET2_REGEX
54 #include <regexp.h>
55 #else
56 #include <regex.h>
57 #endif
58 #include "pax.h"
59 #include "pat_rep.h"
60 #include "extern.h"
61 
62 /*
63  * routines to handle pattern matching, name modification (regular expression
64  * substitution and interactive renames), and destination name modification for
65  * copy (-rw). Both file name and link names are adjusted as required in these
66  * routines.
67  */
68 
69 #define MAXSUBEXP	10		/* max subexpressions, DO NOT CHANGE */
70 static PATTERN *pathead = NULL;		/* file pattern match list head */
71 static PATTERN *pattail = NULL;		/* file pattern match list tail */
72 static REPLACE *rephead = NULL;		/* replacement string list head */
73 static REPLACE *reptail = NULL;		/* replacement string list tail */
74 
75 static int rep_name __P((char *, int *, int));
76 static int tty_rename __P((register ARCHD *));
77 static int fix_path __P((char *, int *, char *, int));
78 static int fn_match __P((register char *, register char *, char **));
79 static char * range_match __P((register char *, register int));
80 #ifdef NET2_REGEX
81 static int resub __P((regexp *, char *, char *, register char *));
82 #else
83 static int resub __P((regex_t *, regmatch_t *, char *, char *, char *));
84 #endif
85 
86 /*
87  * rep_add()
88  *	parses the -s replacement string; compiles the regular expression
89  *	and stores the compiled value and it's replacement string together in
90  *	replacement string list. Input to this function is of the form:
91  *		/old/new/pg
92  *	The first char in the string specifies the delimiter used by this
93  *	replacement string. "Old" is a regular expression in "ed" format which
94  *	is compiled by regcomp() and is applied to filenames. "new" is the
95  *	substitution string; p and g are options flags for printing and global
96  *	replacement (over the single filename)
97  * Return:
98  *	0 if a proper replacement string and regular expression was added to
99  *	the list of replacement patterns; -1 otherwise.
100  */
101 
102 #if __STDC__
103 int
104 rep_add(register char *str)
105 #else
106 int
107 rep_add(str)
108 	register char *str;
109 #endif
110 {
111 	register char *pt1;
112 	register char *pt2;
113 	register REPLACE *rep;
114 #	ifndef NET2_REGEX
115 	register int res;
116 	char rebuf[BUFSIZ];
117 #	endif
118 
119 	/*
120 	 * throw out the bad parameters
121 	 */
122 	if ((str == NULL) || (*str == '\0')) {
123 		warn(1, "Empty replacement string");
124 		return(-1);
125 	}
126 
127 	/*
128 	 * first character in the string specifies what the delimiter is for
129 	 * this expression
130 	 */
131 	if ((pt1 = strchr(str+1, *str)) == NULL) {
132 		warn(1, "Invalid replacement string %s", str);
133 		return(-1);
134 	}
135 
136 	/*
137 	 * allocate space for the node that handles this replacement pattern
138 	 * and split out the regular expression and try to compile it
139 	 */
140 	if ((rep = (REPLACE *)malloc(sizeof(REPLACE))) == NULL) {
141 		warn(1, "Unable to allocate memory for replacement string");
142 		return(-1);
143 	}
144 
145 	*pt1 = '\0';
146 #	ifdef NET2_REGEX
147 	if ((rep->rcmp = regcomp(str+1)) == NULL) {
148 #	else
149 	if ((res = regcomp(&(rep->rcmp), str+1, 0)) != 0) {
150 		regerror(res, &(rep->rcmp), rebuf, sizeof(rebuf));
151 		warn(1, "%s while compiling regular expression %s", rebuf, str);
152 #	endif
153 		(void)free((char *)rep);
154 		return(-1);
155 	}
156 
157 	/*
158 	 * put the delimiter back in case we need an error message and
159 	 * locate the delimiter at the end of the replacement string
160 	 * we then point the node at the new substitution string
161 	 */
162 	*pt1++ = *str;
163 	if ((pt2 = strchr(pt1, *str)) == NULL) {
164 #		ifdef NET2_REGEX
165 		(void)free((char *)rep->rcmp);
166 #		else
167 		regfree(&(rep->rcmp));
168 #		endif
169 		(void)free((char *)rep);
170 		warn(1, "Invalid replacement string %s", str);
171 		return(-1);
172 	}
173 
174 	*pt2 = '\0';
175 	rep->nstr = pt1;
176 	pt1 = pt2++;
177 	rep->flgs = 0;
178 
179 	/*
180 	 * set the options if any
181 	 */
182 	while (*pt2 != '\0') {
183 		switch(*pt2) {
184 		case 'g':
185 		case 'G':
186 			rep->flgs  |= GLOB;
187 			break;
188 		case 'p':
189 		case 'P':
190 			rep->flgs  |= PRNT;
191 			break;
192 		default:
193 #			ifdef NET2_REGEX
194 			(void)free((char *)rep->rcmp);
195 #			else
196 			regfree(&(rep->rcmp));
197 #			endif
198 			(void)free((char *)rep);
199 			*pt1 = *str;
200 			warn(1, "Invalid replacement string option %s", str);
201 			return(-1);
202 		}
203 		++pt2;
204 	}
205 
206 	/*
207 	 * all done, link it in at the end
208 	 */
209 	rep->fow = NULL;
210 	if (rephead == NULL) {
211 		reptail = rephead = rep;
212 		return(0);
213 	}
214 	reptail->fow = rep;
215 	reptail = rep;
216 	return(0);
217 }
218 
219 /*
220  * pat_add()
221  *	add a pattern match to the pattern match list. Pattern matches are used
222  *	to select which archive members are extracted. (They appear as
223  *	arguments to pax in the list and read modes). If no patterns are
224  *	supplied to pax, all members in the archive will be selected (and the
225  *	pattern match list is empty).
226  * Return:
227  *	0 if the pattern was added to the list, -1 otherwise
228  */
229 
230 #if __STDC__
231 int
232 pat_add(char *str)
233 #else
234 int
235 pat_add(str)
236 	char *str;
237 #endif
238 {
239 	register PATTERN *pt;
240 
241 	/*
242 	 * throw out the junk
243 	 */
244 	if ((str == NULL) || (*str == '\0')) {
245 		warn(1, "Empty pattern string");
246 		return(-1);
247 	}
248 
249 	/*
250 	 * allocate space for the pattern and store the pattern. the pattern is
251 	 * part of argv so do not bother to copy it, just point at it. Add the
252 	 * node to the end of the pattern list
253 	 */
254 	if ((pt = (PATTERN *)malloc(sizeof(PATTERN))) == NULL) {
255 		warn(1, "Unable to allocate memory for pattern string");
256 		return(-1);
257 	}
258 
259 	pt->pstr = str;
260 	pt->pend = NULL;
261 	pt->plen = strlen(str);
262 	pt->fow = NULL;
263 	pt->flgs = 0;
264 	if (pathead == NULL) {
265 		pattail = pathead = pt;
266 		return(0);
267 	}
268 	pattail->fow = pt;
269 	pattail = pt;
270 	return(0);
271 }
272 
273 /*
274  * pat_chk()
275  *	complain if any the user supplied pattern did not result in a match to
276  *	a selected archive member.
277  */
278 
279 #if __STDC__
280 void
281 pat_chk(void)
282 #else
283 void
284 pat_chk()
285 #endif
286 {
287 	register PATTERN *pt;
288 	register int wban = 0;
289 
290 	/*
291 	 * walk down the list checking the flags to make sure MTCH was set,
292 	 * if not complain
293 	 */
294 	for (pt = pathead; pt != NULL; pt = pt->fow) {
295 		if (pt->flgs & MTCH)
296 			continue;
297 		if (!wban) {
298 			warn(1, "WARNING! These patterns were not matched:");
299 			++wban;
300 		}
301 		(void)fprintf(stderr, "%s\n", pt->pstr);
302 	}
303 }
304 
305 /*
306  * pat_sel()
307  *	the archive member which matches a pattern was selected. Mark the
308  *	pattern as having selected an archive member. arcn->pat points at the
309  *	pattern that was matched. arcn->pat is set in pat_match()
310  *
311  *	NOTE: When the -c option is used, we are called when there was no match
312  *	by pat_match() (that means we did match before the inverted sense of
313  *	the logic). Now this seems really strange at first, but with -c  we
314  *	need to keep track of those patterns that cause a archive member to NOT
315  *	be selected (it found an archive member with a specified pattern)
316  * Return:
317  *	0 if the pattern pointed at by arcn->pat was tagged as creating a
318  *	match, -1 otherwise.
319  */
320 
321 #if __STDC__
322 int
323 pat_sel(register ARCHD *arcn)
324 #else
325 int
326 pat_sel(arcn)
327 	register ARCHD *arcn;
328 #endif
329 {
330 	register PATTERN *pt;
331 	register PATTERN **ppt;
332 	register int len;
333 
334 	/*
335 	 * if no patterns just return
336 	 */
337 	if ((pathead == NULL) || ((pt = arcn->pat) == NULL))
338 		return(0);
339 
340 	/*
341 	 * when we are NOT limited to a single match per pattern mark the
342 	 * pattern and return
343 	 */
344 	if (!nflag) {
345 		pt->flgs |= MTCH;
346 		return(0);
347 	}
348 
349 	/*
350 	 * we reach this point only when we allow a single selected match per
351 	 * pattern, if the pattern matches a directory and we do not have -d
352 	 * (dflag) we are done with this pattern. We may also be handed a file
353 	 * in the subtree of a directory. in that case when we are operating
354 	 * with -d, this pattern was already selected and we are done
355 	 */
356 	if (pt->flgs & DIR_MTCH)
357 		return(0);
358 
359 	if (!dflag && ((pt->pend != NULL) || (arcn->type == PAX_DIR))) {
360 		/*
361 		 * ok we matched a directory and we are allowing
362 		 * subtree matches but because of the -n only its children will
363 		 * match. This is tagged as a DIR_MTCH type.
364 		 * WATCH IT, the code assumes that pt->pend points
365 		 * into arcn->name and arcn->name has not been modified.
366 		 * If not we will have a big mess. Yup this is another kludge
367 		 */
368 
369 		/*
370 		 * if this was a prefix match, remove trailing part of path
371 		 * so we can copy it. Future matches will be exact prefix match
372 		 */
373 		if (pt->pend != NULL)
374 			*pt->pend = '\0';
375 
376 		if ((pt->pstr = strdup(arcn->name)) == NULL) {
377 			warn(1, "Pattern select out of memory");
378 			if (pt->pend != NULL)
379 				*pt->pend = '/';
380 			pt->pend = NULL;
381 			return(-1);
382 		}
383 
384 		/*
385 		 * put the trailing / back in the source string
386 		 */
387 		if (pt->pend != NULL) {
388 			*pt->pend = '/';
389 			pt->pend = NULL;
390 		}
391 		pt->plen = strlen(pt->pstr);
392 
393 		/*
394 		 * strip off any trailing /, this should really never happen
395 		 */
396 		len = pt->plen - 1;
397 		if (*(pt->pstr + len) == '/') {
398 			*(pt->pstr + len) = '\0';
399 			pt->plen = len;
400 		}
401 		pt->flgs = DIR_MTCH | MTCH;
402 		arcn->pat = pt;
403 		return(0);
404 	}
405 
406 	/*
407 	 * we are then done with this pattern, so we delete it from the list
408 	 * because it can never be used for another match.
409 	 * Seems kind of strange to do for a -c, but the pax spec is really
410 	 * vague on the interaction of -c -n and -d. We assume that when -c
411 	 * and the pattern rejects a member (i.e. it matched it) it is done.
412 	 * In effect we place the order of the flags as having -c last.
413 	 */
414 	pt = pathead;
415 	ppt = &pathead;
416 	while ((pt != NULL) && (pt != arcn->pat)) {
417 		ppt = &(pt->fow);
418 		pt = pt->fow;
419 	}
420 
421 	if (pt == NULL) {
422 		/*
423 		 * should never happen....
424 		 */
425 		warn(1, "Pattern list inconsistant");
426 		return(-1);
427 	}
428 	*ppt = pt->fow;
429 	(void)free((char *)pt);
430 	arcn->pat = NULL;
431 	return(0);
432 }
433 
434 /*
435  * pat_match()
436  *	see if this archive member matches any supplied pattern, if a match
437  *	is found, arcn->pat is set to point at the potential pattern. Later if
438  *	this archive member is "selected" we process and mark the pattern as
439  *	one which matched a selected archive member (see pat_sel())
440  * Return:
441  *	0 if this archive member should be processed, 1 if it should be
442  *	skipped and -1 if we are done with all patterns (and pax should quit
443  *	looking for more members)
444  */
445 
446 #if __STDC__
447 int
448 pat_match(register ARCHD *arcn)
449 #else
450 int
451 pat_match(arcn)
452 	register ARCHD *arcn;
453 #endif
454 {
455 	register PATTERN *pt;
456 
457 	arcn->pat = NULL;
458 
459 	/*
460 	 * if there are no more patterns and we have -n (and not -c) we are
461 	 * done. otherwise with no patterns to match, matches all
462 	 */
463 	if (pathead == NULL) {
464 		if (nflag && !cflag)
465 			return(-1);
466 		return(0);
467 	}
468 
469 	/*
470 	 * have to search down the list one at a time looking for a match.
471 	 */
472 	pt = pathead;
473 	while (pt != NULL) {
474 		/*
475 		 * check for a file name match unless we have DIR_MTCH set in
476 		 * this pattern then we want a prefix match
477 		 */
478 		if (pt->flgs & DIR_MTCH) {
479 			/*
480 			 * this pattern was matched before to a directory
481 			 * as we must have -n set for this (but not -d). We can
482 			 * only match CHILDREN of that directory so we must use
483 			 * an exact prefix match (no wildcards).
484 			 */
485 			if ((arcn->name[pt->plen] == '/') &&
486 			    (strncmp(pt->pstr, arcn->name, pt->plen) == 0))
487 				break;
488 		} else if (fn_match(pt->pstr, arcn->name, &pt->pend) == 0)
489 			break;
490 		pt = pt->fow;
491 	}
492 
493 	/*
494 	 * return the result, remember that cflag (-c) inverts the sense of a
495 	 * match
496 	 */
497 	if (pt == NULL)
498 		return(cflag ? 0 : 1);
499 
500 	/*
501 	 * we had a match, now when we invert the sense (-c) we reject this
502 	 * member. However we have to tag the pattern a being successful, (in a
503 	 * match, not in selecting a archive member) so we call pat_sel() here.
504 	 */
505 	arcn->pat = pt;
506 	if (!cflag)
507 		return(0);
508 
509 	if (pat_sel(arcn) < 0)
510 		return(-1);
511 	arcn->pat = NULL;
512 	return(1);
513 }
514 
515 /*
516  * fn_match()
517  * Return:
518  *	0 if this archive member should be processed, 1 if it should be
519  *	skipped and -1 if we are done with all patterns (and pax should quit
520  *	looking for more members)
521  *	Note: *pend may be changed to show where the prefix ends.
522  */
523 
524 #if __STDC__
525 static int
526 fn_match(register char *pattern, register char *string, char **pend)
527 #else
528 static int
529 fn_match(pattern, string, pend)
530 	register char *pattern;
531 	register char *string;
532 	char **pend;
533 #endif
534 {
535 	register char c;
536 	char test;
537 
538 	*pend = NULL;
539 	for (;;) {
540 		switch (c = *pattern++) {
541 		case '\0':
542 			/*
543 			 * Ok we found an exact match
544 			 */
545 			if (*string == '\0')
546 				return(0);
547 
548 			/*
549 			 * Check if it is a prefix match
550 			 */
551 			if ((dflag == 1) || (*string != '/'))
552 				return(-1);
553 
554 			/*
555 			 * It is a prefix match, remember where the trailing
556 			 * / is located
557 			 */
558 			*pend = string;
559 			return(0);
560 		case '?':
561 			if ((test = *string++) == '\0')
562 				return (-1);
563 			break;
564 		case '*':
565 			c = *pattern;
566 			/*
567 			 * Collapse multiple *'s.
568 			 */
569 			while (c == '*')
570 				c = *++pattern;
571 
572 			/*
573 			 * Optimized hack for pattern with a * at the end
574 			 */
575 			if (c == '\0')
576 				return (0);
577 
578 			/*
579 			 * General case, use recursion.
580 			 */
581 			while ((test = *string) != '\0') {
582 				if (!fn_match(pattern, string, pend))
583 					return (0);
584 				++string;
585 			}
586 			return (-1);
587 		case '[':
588 			/*
589 			 * range match
590 			 */
591 			if (((test = *string++) == '\0') ||
592 			    ((pattern = range_match(pattern, test)) == NULL))
593 				return (-1);
594 			break;
595 		case '\\':
596 		default:
597 			if (c != *string++)
598 				return (-1);
599 			break;
600 		}
601 	}
602 	/* NOTREACHED */
603 }
604 
605 #ifdef __STDC__
606 static char *
607 range_match(register char *pattern, register int test)
608 #else
609 static char *
610 range_match(pattern, test)
611 	register char *pattern;
612 	register int test;
613 #endif
614 {
615 	register char c;
616 	register char c2;
617 	int negate;
618 	int ok = 0;
619 
620 	if (negate = (*pattern == '!'))
621 		++pattern;
622 
623 	while ((c = *pattern++) != ']') {
624 		/*
625 		 * Illegal pattern
626 		 */
627 		if (c == '\0')
628 			return (NULL);
629 
630 		if ((*pattern == '-') && ((c2 = pattern[1]) != '\0') &&
631 		    (c2 != ']')) {
632 			if ((c <= test) && (test <= c2))
633 				ok = 1;
634 			pattern += 2;
635 		} else if (c == test)
636 			ok = 1;
637 	}
638 	return (ok == negate ? NULL : pattern);
639 }
640 
641 /*
642  * mod_name()
643  *	modify a selected file name. first attempt to apply replacement string
644  *	expressions, then apply interactive file rename. We apply replacement
645  *	string expressions to both filenames and file links (if we didn't the
646  *	links would point to the wrong place, and we could never be able to
647  *	move an archive that has a file link in it). When we rename files
648  *	interactively, we store that mapping (old name to user input name) so
649  *	if we spot any file links to the old file name in the future, we will
650  *	know exactly how to fix the file link.
651  * Return:
652  *	0 continue to  process file, 1 skip this file, -1 pax is finished
653  */
654 
655 #if __STDC__
656 int
657 mod_name(register ARCHD *arcn)
658 #else
659 int
660 mod_name(arcn)
661 	register ARCHD *arcn;
662 #endif
663 {
664 	register int res = 0;
665 
666 	/*
667 	 * IMPORTANT: We have a problem. what do we do with symlinks?
668 	 * Modifying a hard link name makes sense, as we know the file it
669 	 * points at should have been seen already in the archive (and if it
670 	 * wasn't seen because of a read error or a bad archive, we lose
671 	 * anyway). But there are no such requirements for symlinks. On one
672 	 * hand the symlink that refers to a file in the archive will have to
673 	 * be modified to so it will still work at its new location in the
674 	 * file system. On the other hand a symlink that points elsewhere (and
675 	 * should continue to do so) should not be modified. There is clearly
676 	 * no perfect solution here. So we handle them like hardlinks. Clearly
677 	 * a replacement made by the interactive rename mapping is very likely
678 	 * to be correct since it applies to a single file and is an exact
679 	 * match. The regular expression replacements are a little harder to
680 	 * justify though. We claim that the symlink name is only likely
681 	 * to be replaced when it points within the file tree being moved and
682 	 * in that case it should be modified. what we really need to do is to
683 	 * call an oracle here. :)
684 	 */
685 	if (rephead != NULL) {
686 		/*
687 		 * we have replacement strings, modify the name and the link
688 		 * name if any.
689 		 */
690 		if ((res = rep_name(arcn->name, &(arcn->nlen), 1)) != 0)
691 			return(res);
692 
693 		if (((arcn->type == PAX_SLK) || (arcn->type == PAX_HLK) ||
694 		    (arcn->type == PAX_HRG)) &&
695 		    ((res = rep_name(arcn->ln_name, &(arcn->ln_nlen), 0)) != 0))
696 			return(res);
697 	}
698 
699 	if (iflag) {
700 		/*
701 		 * perform interactive file rename, then map the link if any
702 		 */
703 		if ((res = tty_rename(arcn)) != 0)
704 			return(res);
705 		if ((arcn->type == PAX_SLK) || (arcn->type == PAX_HLK) ||
706 		    (arcn->type == PAX_HRG))
707 			sub_name(arcn->ln_name, &(arcn->ln_nlen));
708 	}
709 	return(res);
710 }
711 
712 /*
713  * tty_rename()
714  *	Prompt the user for a replacement file name. A "." keeps the old name,
715  *	a empty line skips the file, and an EOF on reading the tty, will cause
716  *	pax to stop processing and exit. Otherwise the file name input, replaces
717  *	the old one.
718  * Return:
719  *	0 process this file, 1 skip this file, -1 we need to exit pax
720  */
721 
722 #if __STDC__
723 static int
724 tty_rename(register ARCHD *arcn)
725 #else
726 static int
727 tty_rename(arcn)
728 	register ARCHD *arcn;
729 #endif
730 {
731 	char tmpname[PAXPATHLEN+2];
732 	int res;
733 
734 	/*
735 	 * prompt user for the replacement name for a file, keep trying until
736 	 * we get some reasonable input. Archives may have more than one file
737 	 * on them with the same name (from updates etc). We print verbose info
738 	 * on the file so the user knows what is up.
739 	 */
740 	tty_prnt("\nATTENTION: %s interactive file rename operation.\n", argv0);
741 
742 	for (;;) {
743 		ls_tty(arcn);
744 		tty_prnt("Input new name, or a \".\" to keep the old name, ");
745 		tty_prnt("or a \"return\" to skip this file.\n");
746 		tty_prnt("Input > ");
747 		if (tty_read(tmpname, sizeof(tmpname)) < 0)
748 			return(-1);
749 		if (strcmp(tmpname, "..") == 0) {
750 			tty_prnt("Try again, illegal file name: ..\n");
751 			continue;
752 		}
753 		if (strlen(tmpname) > PAXPATHLEN) {
754 			tty_prnt("Try again, file name too long\n");
755 			continue;
756 		}
757 		break;
758 	}
759 
760 	/*
761 	 * empty file name, skips this file. a "." leaves it alone
762 	 */
763 	if (tmpname[0] == '\0') {
764 		tty_prnt("Skipping file.\n");
765 		return(1);
766 	}
767 	if ((tmpname[0] == '.') && (tmpname[1] == '\0')) {
768 		tty_prnt("Processing continues, name unchanged.\n");
769 		return(0);
770 	}
771 
772 	/*
773 	 * ok the name changed. We may run into links that point at this
774 	 * file later. we have to remember where the user sent the file
775 	 * in order to repair any links.
776 	 */
777 	tty_prnt("Processing continues, name changed to: %s\n", tmpname);
778 	res = add_name(arcn->name, arcn->nlen, tmpname);
779 	arcn->nlen = l_strncpy(arcn->name, tmpname, PAXPATHLEN+1);
780 	if (res < 0)
781 		return(-1);
782 	return(0);
783 }
784 
785 /*
786  * set_dest()
787  *	fix up the file name and the link name (if any) so this file will land
788  *	in the destination directory (used during copy() -rw).
789  * Return:
790  *	0 if ok, -1 if failure (name too long)
791  */
792 
793 #if __STDC__
794 int
795 set_dest(register ARCHD *arcn, char *dest_dir, int dir_len)
796 #else
797 int
798 set_dest(arcn, dest_dir, dir_len)
799 	register ARCHD *arcn;
800 	char *dest_dir;
801 	int dir_len;
802 #endif
803 {
804 	if (fix_path(arcn->name, &(arcn->nlen), dest_dir, dir_len) < 0)
805 		return(-1);
806 
807 	/*
808 	 * It is really hard to deal with symlinks here, we cannot be sure
809 	 * if the name they point was moved (or will be moved). It is best to
810 	 * leave them alone.
811 	 */
812 	if ((arcn->type != PAX_HLK) && (arcn->type != PAX_HRG))
813 		return(0);
814 
815 	if (fix_path(arcn->ln_name, &(arcn->ln_nlen), dest_dir, dir_len) < 0)
816 		return(-1);
817 	return(0);
818 }
819 
820 /*
821  * fix_path
822  *	concatenate dir_name and or_name and store the result in or_name (if
823  *	it fits). This is one ugly function.
824  * Return:
825  *	0 if ok, -1 if the final name is too long
826  */
827 
828 #if __STDC__
829 static int
830 fix_path( char *or_name, int *or_len, char *dir_name, int dir_len)
831 #else
832 static int
833 fix_path(or_name, or_len, dir_name, dir_len)
834 	char *or_name;
835 	int *or_len;
836 	char *dir_name;
837 	int dir_len;
838 #endif
839 {
840 	register char *src;
841 	register char *dest;
842 	register char *start;
843 	int len;
844 
845 	/*
846 	 * we shift the or_name to the right enough to tack in the dir_name
847 	 * at the front. We make sure we have enough space for it all before
848 	 * we start. since dest always ends in a slash, we skip of or_name
849 	 * if it also starts with one.
850 	 */
851 	start = or_name;
852 	src = start + *or_len;
853 	dest = src + dir_len;
854 	if (*start == '/') {
855 		++start;
856 		--dest;
857 	}
858 	if ((len = dest - or_name) > PAXPATHLEN) {
859 		warn(1, "File name %s/%s, too long", dir_name, start);
860 		return(-1);
861 	}
862 	*or_len = len;
863 
864 	/*
865 	 * enough space, shift
866 	 */
867 	while (src >= start)
868 		*dest-- = *src--;
869 	src = dir_name + dir_len - 1;
870 
871 	/*
872 	 * splice in the destination directory name
873 	 */
874 	while (src >= dir_name)
875 		*dest-- = *src--;
876 
877 	*(or_name + len) = '\0';
878 	return(0);
879 }
880 
881 /*
882  * rep_name()
883  *	walk down the list of replacement strings applying each one in order.
884  *	when we find one with a successful substitution, we modify the name
885  *	as specified. if required, we print the results. if the resulting name
886  *	is empty, we will skip this archive member. We use the regexp(3)
887  *	routines (regexp() ought to win a prize as having the most cryptic
888  *	library function manual page).
889  *	--Parameters--
890  *	name is the file name we are going to apply the regular expressions to
891  *	(and may be modified)
892  *	nlen is the length of this name (and is modified to hold the length of
893  *	the final string).
894  *	prnt is a flag that says whether to print the final result.
895  * Return:
896  *	0 if substitution was successful, 1 if we are to skip the file (the name
897  *	ended up empty)
898  */
899 
900 #if __STDC__
901 static int
902 rep_name(char *name, int *nlen, int prnt)
903 #else
904 static int
905 rep_name(name, nlen, prnt)
906 	char *name;
907 	int *nlen;
908 	int prnt;
909 #endif
910 {
911 	register REPLACE *pt;
912 	register char *inpt;
913 	register char *outpt;
914 	register char *endpt;
915 	register char *rpt;
916 	register int found = 0;
917 	register int res;
918 #	ifndef NET2_REGEX
919 	regmatch_t pm[MAXSUBEXP];
920 #	endif
921 	char nname[PAXPATHLEN+1];	/* final result of all replacements */
922 	char buf1[PAXPATHLEN+1];	/* where we work on the name */
923 
924 	/*
925 	 * copy the name into buf1, where we will work on it. We need to keep
926 	 * the orig string around so we can print out the result of the final
927 	 * replacement. We build up the final result in nname. inpt points at
928 	 * the string we apply the regular expression to. prnt is used to
929 	 * suppress printing when we handle replacements on the link field
930 	 * (the user already saw that substitution go by)
931 	 */
932 	pt = rephead;
933 	(void)strcpy(buf1, name);
934 	inpt = buf1;
935 	outpt = nname;
936 	endpt = outpt + PAXPATHLEN;
937 
938 	/*
939 	 * try each replacement string in order
940 	 */
941 	while (pt != NULL) {
942 		do {
943 			/*
944 			 * check for a successful substitution, if not go to
945 			 * the next pattern, or cleanup if we were global
946 			 */
947 #			ifdef NET2_REGEX
948 			if (regexec(pt->rcmp, inpt) == 0)
949 #			else
950 			if (regexec(&(pt->rcmp), inpt, MAXSUBEXP, pm, 0) != 0)
951 #			endif
952 				break;
953 
954 			/*
955 			 * ok we found one. We have three parts, the prefix
956 			 * which did not match, the section that did and the
957 			 * tail (that also did not match). Copy the prefix to
958 			 * the final output buffer (watching to make sure we
959 			 * do not create a string too long).
960 			 */
961 			found = 1;
962 #			ifdef NET2_REGEX
963 			rpt = pt->rcmp->startp[0];
964 #			else
965 			rpt = inpt + pm[0].rm_so;
966 #			endif
967 
968 			while ((inpt < rpt) && (outpt < endpt))
969 				*outpt++ = *inpt++;
970 			if (outpt == endpt)
971 				break;
972 
973 			/*
974 			 * for the second part (which matched the regular
975 			 * expression) apply the substitution using the
976 			 * replacement string and place it the prefix in the
977 			 * final output. If we have problems, skip it.
978 			 */
979 #			ifdef NET2_REGEX
980 			if ((res = resub(pt->rcmp,pt->nstr,outpt,endpt)) < 0) {
981 #			else
982 			if ((res = resub(&(pt->rcmp),pm,pt->nstr,outpt,endpt))
983 			    < 0) {
984 #			endif
985 				if (prnt)
986 					warn(1, "Replacement name error %s",
987 					    name);
988 				return(1);
989 			}
990 			outpt += res;
991 
992 			/*
993 			 * we set up to look again starting at the first
994 			 * character in the tail (of the input string right
995 			 * after the last character matched by the regular
996 			 * expression (inpt always points at the first char in
997 			 * the string to process). If we are not doing a global
998 			 * substitution, we will use inpt to copy the tail to
999 			 * the final result. Make sure we do not overrun the
1000 			 * output buffer
1001 			 */
1002 #			ifdef NET2_REGEX
1003 			inpt = pt->rcmp->endp[0];
1004 #			else
1005 			inpt += pm[0].rm_eo;
1006 #			endif
1007 
1008 			if ((outpt == endpt) || (*inpt == '\0'))
1009 				break;
1010 
1011 			/*
1012 			 * if the user wants global we keep trying to
1013 			 * substitute until it fails, then we are done.
1014 			 */
1015 		} while (pt->flgs & GLOB);
1016 
1017 		if (found)
1018 			break;
1019 
1020 		/*
1021 		 * a successful substitution did NOT occur, try the next one
1022 		 */
1023 		pt = pt->fow;
1024 	}
1025 
1026 	if (found) {
1027 		/*
1028 		 * we had a substitution, copy the last tail piece (if there is
1029 		 * room) to the final result
1030 		 */
1031 		while ((outpt < endpt) && (*inpt != '\0'))
1032 			*outpt++ = *inpt++;
1033 
1034 		*outpt = '\0';
1035 		if ((outpt == endpt) && (*inpt != '\0')) {
1036 			if (prnt)
1037 				warn(1,"Replacement name too long %s >> %s",
1038 				    name, nname);
1039 			return(1);
1040 		}
1041 
1042 		/*
1043 		 * inform the user of the result if wanted
1044 		 */
1045 		if (prnt && (pt->flgs & PRNT)) {
1046 			if (*nname == '\0')
1047 				(void)fprintf(stderr,"%s >> <empty string>\n",
1048 				    name);
1049 			else
1050 				(void)fprintf(stderr,"%s >> %s\n", name, nname);
1051 		}
1052 
1053 		/*
1054 		 * if empty inform the caller this file is to be skipped
1055 		 * otherwise copy the new name over the orig name and return
1056 		 */
1057 		if (*nname == '\0')
1058 			return(1);
1059 		*nlen = l_strncpy(name, nname, PAXPATHLEN + 1);
1060 	}
1061 	return(0);
1062 }
1063 
1064 #ifdef NET2_REGEX
1065 /*
1066  * resub()
1067  *	apply the replacement to the matched expression. expand out the old
1068  * 	style ed(1) subexpression expansion.
1069  * Return:
1070  *	-1 if error, or the number of characters added to the destination.
1071  */
1072 
1073 #if __STDC__
1074 static int
1075 resub(regexp *prog, char *src, char *dest, register char *destend)
1076 #else
1077 static int
1078 resub(prog, src, dest, destend)
1079 	regexp *prog;
1080 	char *src;
1081 	char *dest;
1082 	register char *destend;
1083 #endif
1084 {
1085 	register char *spt;
1086 	register char *dpt;
1087 	register char c;
1088 	register int no;
1089 	register int len;
1090 
1091 	spt = src;
1092 	dpt = dest;
1093 	while ((dpt < destend) && ((c = *spt++) != '\0')) {
1094 		if (c == '&')
1095 			no = 0;
1096 		else if ((c == '\\') && (*spt >= '0') && (*spt <= '9'))
1097 			no = *spt++ - '0';
1098 		else {
1099  			if ((c == '\\') && ((*spt == '\\') || (*spt == '&')))
1100  				c = *spt++;
1101  			*dpt++ = c;
1102 			continue;
1103 		}
1104  		if ((prog->startp[no] == NULL) || (prog->endp[no] == NULL) ||
1105 		    ((len = prog->endp[no] - prog->startp[no]) <= 0))
1106 			continue;
1107 
1108 		/*
1109 		 * copy the subexpression to the destination.
1110 		 * fail if we run out of space or the match string is damaged
1111 		 */
1112 		if (len > (destend - dpt))
1113 			len = destend - dpt;
1114 		if (l_strncpy(dpt, prog->startp[no], len) != len)
1115 			return(-1);
1116 		dpt += len;
1117 	}
1118 	return(dpt - dest);
1119 }
1120 
1121 #else
1122 
1123 /*
1124  * resub()
1125  *	apply the replacement to the matched expression. expand out the old
1126  * 	style ed(1) subexpression expansion.
1127  * Return:
1128  *	-1 if error, or the number of characters added to the destination.
1129  */
1130 
1131 #if __STDC__
1132 static int
1133 resub(regex_t *rp, register regmatch_t *pm, char *src, char *dest,
1134 	register char *destend)
1135 #else
1136 static int
1137 resub(rp, pm, src, dest, destend)
1138 	regex_t *rp;
1139 	register regmatch_t *pm;
1140 	char *src;
1141 	char *dest;
1142 	register char *destend;
1143 #endif
1144 {
1145 	register char *spt;
1146 	register char *dpt;
1147 	register char c;
1148 	register regmatch_t *pmpt;
1149 	register int len;
1150 	int subexcnt;
1151 
1152 	spt =  src;
1153 	dpt = dest;
1154 	subexcnt = rp->re_nsub;
1155 	while ((dpt < destend) && ((c = *spt++) != '\0')) {
1156 		/*
1157 		 * see if we just have an ordinary replacement character
1158 		 * or we refer to a subexpression.
1159 		 */
1160 		if (c == '&') {
1161 			pmpt = pm;
1162 		} else if ((c == '\\') && (*spt >= '0') && (*spt <= '9')) {
1163 			/*
1164 			 * make sure there is a subexpression as specified
1165 			 */
1166 			if ((len = *spt++ - '0') > subexcnt)
1167 				return(-1);
1168 			pmpt = pm + len;
1169 		} else {
1170  			/*
1171 			 * Ordinary character, just copy it
1172 			 */
1173  			if ((c == '\\') && ((*spt == '\\') || (*spt == '&')))
1174  				c = *spt++;
1175  			*dpt++ = c;
1176 			continue;
1177 		}
1178 
1179 		/*
1180 		 * continue if the subexpression is bogus
1181 		 */
1182 		if ((pmpt->rm_so < 0) || (pmpt->rm_eo < 0) ||
1183 		    ((len = pmpt->rm_eo - pmpt->rm_so) <= 0))
1184 			continue;
1185 
1186 		/*
1187 		 * copy the subexpression to the destination.
1188 		 * fail if we run out of space or the match string is damaged
1189 		 */
1190 		if (len > (destend - dpt))
1191 			len = destend - dpt;
1192 		if (l_strncpy(dpt, src + pmpt->rm_so, len) != len)
1193 			return(-1);
1194 		dpt += len;
1195 	}
1196 	return(dpt - dest);
1197 }
1198 #endif
1199