xref: /freebsd/bin/pax/pat_rep.c (revision 361e428888e630eb708c72cf31579a25ba5d4f03)
1 /*-
2  * Copyright (c) 1992 Keith Muller.
3  * Copyright (c) 1992, 1993
4  *	The Regents of the University of California.  All rights reserved.
5  *
6  * This code is derived from software contributed to Berkeley by
7  * Keith Muller of the University of California, San Diego.
8  *
9  * Redistribution and use in source and binary forms, with or without
10  * modification, are permitted provided that the following conditions
11  * are met:
12  * 1. Redistributions of source code must retain the above copyright
13  *    notice, this list of conditions and the following disclaimer.
14  * 2. Redistributions in binary form must reproduce the above copyright
15  *    notice, this list of conditions and the following disclaimer in the
16  *    documentation and/or other materials provided with the distribution.
17  * 4. Neither the name of the University nor the names of its contributors
18  *    may be used to endorse or promote products derived from this software
19  *    without specific prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31  * SUCH DAMAGE.
32  */
33 
34 #ifndef lint
35 #if 0
36 static char sccsid[] = "@(#)pat_rep.c	8.2 (Berkeley) 4/18/94";
37 #endif
38 #endif /* not lint */
39 #include <sys/cdefs.h>
40 __FBSDID("$FreeBSD$");
41 
42 #include <sys/types.h>
43 #include <sys/stat.h>
44 #include <stdio.h>
45 #include <string.h>
46 #include <stdlib.h>
47 #ifdef NET2_REGEX
48 #include <regexp.h>
49 #else
50 #include <regex.h>
51 #endif
52 #include "pax.h"
53 #include "pat_rep.h"
54 #include "extern.h"
55 
56 /*
57  * routines to handle pattern matching, name modification (regular expression
58  * substitution and interactive renames), and destination name modification for
59  * copy (-rw). Both file name and link names are adjusted as required in these
60  * routines.
61  */
62 
63 #define MAXSUBEXP	10		/* max subexpressions, DO NOT CHANGE */
64 static PATTERN *pathead = NULL;		/* file pattern match list head */
65 static PATTERN *pattail = NULL;		/* file pattern match list tail */
66 static REPLACE *rephead = NULL;		/* replacement string list head */
67 static REPLACE *reptail = NULL;		/* replacement string list tail */
68 
69 static int rep_name(char *, int *, int);
70 static int tty_rename(ARCHD *);
71 static int fix_path(char *, int *, char *, int);
72 static int fn_match(char *, char *, char **);
73 static char * range_match(char *, int);
74 #ifdef NET2_REGEX
75 static int resub(regexp *, char *, char *, char *);
76 #else
77 static int resub(regex_t *, regmatch_t *, char *, char *, char *, char *);
78 #endif
79 
80 /*
81  * rep_add()
82  *	parses the -s replacement string; compiles the regular expression
83  *	and stores the compiled value and it's replacement string together in
84  *	replacement string list. Input to this function is of the form:
85  *		/old/new/pg
86  *	The first char in the string specifies the delimiter used by this
87  *	replacement string. "Old" is a regular expression in "ed" format which
88  *	is compiled by regcomp() and is applied to filenames. "new" is the
89  *	substitution string; p and g are options flags for printing and global
90  *	replacement (over the single filename)
91  * Return:
92  *	0 if a proper replacement string and regular expression was added to
93  *	the list of replacement patterns; -1 otherwise.
94  */
95 
96 int
97 rep_add(char *str)
98 {
99 	char *pt1;
100 	char *pt2;
101 	REPLACE *rep;
102 #	ifndef NET2_REGEX
103 	int res;
104 	char rebuf[BUFSIZ];
105 #	endif
106 
107 	/*
108 	 * throw out the bad parameters
109 	 */
110 	if ((str == NULL) || (*str == '\0')) {
111 		paxwarn(1, "Empty replacement string");
112 		return(-1);
113 	}
114 
115 	/*
116 	 * first character in the string specifies what the delimiter is for
117 	 * this expression
118 	 */
119 	if ((pt1 = strchr(str+1, *str)) == NULL) {
120 		paxwarn(1, "Invalid replacement string %s", str);
121 		return(-1);
122 	}
123 
124 	/*
125 	 * allocate space for the node that handles this replacement pattern
126 	 * and split out the regular expression and try to compile it
127 	 */
128 	if ((rep = (REPLACE *)malloc(sizeof(REPLACE))) == NULL) {
129 		paxwarn(1, "Unable to allocate memory for replacement string");
130 		return(-1);
131 	}
132 
133 	*pt1 = '\0';
134 #	ifdef NET2_REGEX
135 	if ((rep->rcmp = regcomp(str+1)) == NULL) {
136 #	else
137 	if ((res = regcomp(&(rep->rcmp), str+1, 0)) != 0) {
138 		regerror(res, &(rep->rcmp), rebuf, sizeof(rebuf));
139 		paxwarn(1, "%s while compiling regular expression %s", rebuf, str);
140 #	endif
141 		free(rep);
142 		return(-1);
143 	}
144 
145 	/*
146 	 * put the delimiter back in case we need an error message and
147 	 * locate the delimiter at the end of the replacement string
148 	 * we then point the node at the new substitution string
149 	 */
150 	*pt1++ = *str;
151 	if ((pt2 = strchr(pt1, *str)) == NULL) {
152 #		ifdef NET2_REGEX
153 		free(rep->rcmp);
154 #		else
155 		regfree(&rep->rcmp);
156 #		endif
157 		free(rep);
158 		paxwarn(1, "Invalid replacement string %s", str);
159 		return(-1);
160 	}
161 
162 	*pt2 = '\0';
163 	rep->nstr = pt1;
164 	pt1 = pt2++;
165 	rep->flgs = 0;
166 
167 	/*
168 	 * set the options if any
169 	 */
170 	while (*pt2 != '\0') {
171 		switch(*pt2) {
172 		case 'g':
173 		case 'G':
174 			rep->flgs  |= GLOB;
175 			break;
176 		case 'p':
177 		case 'P':
178 			rep->flgs  |= PRNT;
179 			break;
180 		default:
181 #			ifdef NET2_REGEX
182 			free(rep->rcmp);
183 #			else
184 			regfree(&rep->rcmp);
185 #			endif
186 			free(rep);
187 			*pt1 = *str;
188 			paxwarn(1, "Invalid replacement string option %s", str);
189 			return(-1);
190 		}
191 		++pt2;
192 	}
193 
194 	/*
195 	 * all done, link it in at the end
196 	 */
197 	rep->fow = NULL;
198 	if (rephead == NULL) {
199 		reptail = rephead = rep;
200 		return(0);
201 	}
202 	reptail->fow = rep;
203 	reptail = rep;
204 	return(0);
205 }
206 
207 /*
208  * pat_add()
209  *	add a pattern match to the pattern match list. Pattern matches are used
210  *	to select which archive members are extracted. (They appear as
211  *	arguments to pax in the list and read modes). If no patterns are
212  *	supplied to pax, all members in the archive will be selected (and the
213  *	pattern match list is empty).
214  * Return:
215  *	0 if the pattern was added to the list, -1 otherwise
216  */
217 
218 int
219 pat_add(char *str, char *chdnam)
220 {
221 	PATTERN *pt;
222 
223 	/*
224 	 * throw out the junk
225 	 */
226 	if ((str == NULL) || (*str == '\0')) {
227 		paxwarn(1, "Empty pattern string");
228 		return(-1);
229 	}
230 
231 	/*
232 	 * allocate space for the pattern and store the pattern. the pattern is
233 	 * part of argv so do not bother to copy it, just point at it. Add the
234 	 * node to the end of the pattern list
235 	 */
236 	if ((pt = (PATTERN *)malloc(sizeof(PATTERN))) == NULL) {
237 		paxwarn(1, "Unable to allocate memory for pattern string");
238 		return(-1);
239 	}
240 
241 	pt->pstr = str;
242 	pt->pend = NULL;
243 	pt->plen = strlen(str);
244 	pt->fow = NULL;
245 	pt->flgs = 0;
246 	pt->chdname = chdnam;
247 
248 	if (pathead == NULL) {
249 		pattail = pathead = pt;
250 		return(0);
251 	}
252 	pattail->fow = pt;
253 	pattail = pt;
254 	return(0);
255 }
256 
257 /*
258  * pat_chk()
259  *	complain if any the user supplied pattern did not result in a match to
260  *	a selected archive member.
261  */
262 
263 void
264 pat_chk(void)
265 {
266 	PATTERN *pt;
267 	int wban = 0;
268 
269 	/*
270 	 * walk down the list checking the flags to make sure MTCH was set,
271 	 * if not complain
272 	 */
273 	for (pt = pathead; pt != NULL; pt = pt->fow) {
274 		if (pt->flgs & MTCH)
275 			continue;
276 		if (!wban) {
277 			paxwarn(1, "WARNING! These patterns were not matched:");
278 			++wban;
279 		}
280 		(void)fprintf(stderr, "%s\n", pt->pstr);
281 	}
282 }
283 
284 /*
285  * pat_sel()
286  *	the archive member which matches a pattern was selected. Mark the
287  *	pattern as having selected an archive member. arcn->pat points at the
288  *	pattern that was matched. arcn->pat is set in pat_match()
289  *
290  *	NOTE: When the -c option is used, we are called when there was no match
291  *	by pat_match() (that means we did match before the inverted sense of
292  *	the logic). Now this seems really strange at first, but with -c  we
293  *	need to keep track of those patterns that cause an archive member to NOT
294  *	be selected (it found an archive member with a specified pattern)
295  * Return:
296  *	0 if the pattern pointed at by arcn->pat was tagged as creating a
297  *	match, -1 otherwise.
298  */
299 
300 int
301 pat_sel(ARCHD *arcn)
302 {
303 	PATTERN *pt;
304 	PATTERN **ppt;
305 	int len;
306 
307 	/*
308 	 * if no patterns just return
309 	 */
310 	if ((pathead == NULL) || ((pt = arcn->pat) == NULL))
311 		return(0);
312 
313 	/*
314 	 * when we are NOT limited to a single match per pattern mark the
315 	 * pattern and return
316 	 */
317 	if (!nflag) {
318 		pt->flgs |= MTCH;
319 		return(0);
320 	}
321 
322 	/*
323 	 * we reach this point only when we allow a single selected match per
324 	 * pattern, if the pattern matches a directory and we do not have -d
325 	 * (dflag) we are done with this pattern. We may also be handed a file
326 	 * in the subtree of a directory. in that case when we are operating
327 	 * with -d, this pattern was already selected and we are done
328 	 */
329 	if (pt->flgs & DIR_MTCH)
330 		return(0);
331 
332 	if (!dflag && ((pt->pend != NULL) || (arcn->type == PAX_DIR))) {
333 		/*
334 		 * ok we matched a directory and we are allowing
335 		 * subtree matches but because of the -n only its children will
336 		 * match. This is tagged as a DIR_MTCH type.
337 		 * WATCH IT, the code assumes that pt->pend points
338 		 * into arcn->name and arcn->name has not been modified.
339 		 * If not we will have a big mess. Yup this is another kludge
340 		 */
341 
342 		/*
343 		 * if this was a prefix match, remove trailing part of path
344 		 * so we can copy it. Future matches will be exact prefix match
345 		 */
346 		if (pt->pend != NULL)
347 			*pt->pend = '\0';
348 
349 		if ((pt->pstr = strdup(arcn->name)) == NULL) {
350 			paxwarn(1, "Pattern select out of memory");
351 			if (pt->pend != NULL)
352 				*pt->pend = '/';
353 			pt->pend = NULL;
354 			return(-1);
355 		}
356 
357 		/*
358 		 * put the trailing / back in the source string
359 		 */
360 		if (pt->pend != NULL) {
361 			*pt->pend = '/';
362 			pt->pend = NULL;
363 		}
364 		pt->plen = strlen(pt->pstr);
365 
366 		/*
367 		 * strip off any trailing /, this should really never happen
368 		 */
369 		len = pt->plen - 1;
370 		if (*(pt->pstr + len) == '/') {
371 			*(pt->pstr + len) = '\0';
372 			pt->plen = len;
373 		}
374 		pt->flgs = DIR_MTCH | MTCH;
375 		arcn->pat = pt;
376 		return(0);
377 	}
378 
379 	/*
380 	 * we are then done with this pattern, so we delete it from the list
381 	 * because it can never be used for another match.
382 	 * Seems kind of strange to do for a -c, but the pax spec is really
383 	 * vague on the interaction of -c -n and -d. We assume that when -c
384 	 * and the pattern rejects a member (i.e. it matched it) it is done.
385 	 * In effect we place the order of the flags as having -c last.
386 	 */
387 	pt = pathead;
388 	ppt = &pathead;
389 	while ((pt != NULL) && (pt != arcn->pat)) {
390 		ppt = &(pt->fow);
391 		pt = pt->fow;
392 	}
393 
394 	if (pt == NULL) {
395 		/*
396 		 * should never happen....
397 		 */
398 		paxwarn(1, "Pattern list inconsistent");
399 		return(-1);
400 	}
401 	*ppt = pt->fow;
402 	free(pt);
403 	arcn->pat = NULL;
404 	return(0);
405 }
406 
407 /*
408  * pat_match()
409  *	see if this archive member matches any supplied pattern, if a match
410  *	is found, arcn->pat is set to point at the potential pattern. Later if
411  *	this archive member is "selected" we process and mark the pattern as
412  *	one which matched a selected archive member (see pat_sel())
413  * Return:
414  *	0 if this archive member should be processed, 1 if it should be
415  *	skipped and -1 if we are done with all patterns (and pax should quit
416  *	looking for more members)
417  */
418 
419 int
420 pat_match(ARCHD *arcn)
421 {
422 	PATTERN *pt;
423 
424 	arcn->pat = NULL;
425 
426 	/*
427 	 * if there are no more patterns and we have -n (and not -c) we are
428 	 * done. otherwise with no patterns to match, matches all
429 	 */
430 	if (pathead == NULL) {
431 		if (nflag && !cflag)
432 			return(-1);
433 		return(0);
434 	}
435 
436 	/*
437 	 * have to search down the list one at a time looking for a match.
438 	 */
439 	pt = pathead;
440 	while (pt != NULL) {
441 		/*
442 		 * check for a file name match unless we have DIR_MTCH set in
443 		 * this pattern then we want a prefix match
444 		 */
445 		if (pt->flgs & DIR_MTCH) {
446 			/*
447 			 * this pattern was matched before to a directory
448 			 * as we must have -n set for this (but not -d). We can
449 			 * only match CHILDREN of that directory so we must use
450 			 * an exact prefix match (no wildcards).
451 			 */
452 			if ((arcn->name[pt->plen] == '/') &&
453 			    (strncmp(pt->pstr, arcn->name, pt->plen) == 0))
454 				break;
455 		} else if (fn_match(pt->pstr, arcn->name, &pt->pend) == 0)
456 			break;
457 		pt = pt->fow;
458 	}
459 
460 	/*
461 	 * return the result, remember that cflag (-c) inverts the sense of a
462 	 * match
463 	 */
464 	if (pt == NULL)
465 		return(cflag ? 0 : 1);
466 
467 	/*
468 	 * We had a match, now when we invert the sense (-c) we reject this
469 	 * member. However we have to tag the pattern a being successful, (in a
470 	 * match, not in selecting an archive member) so we call pat_sel() here.
471 	 */
472 	arcn->pat = pt;
473 	if (!cflag)
474 		return(0);
475 
476 	if (pat_sel(arcn) < 0)
477 		return(-1);
478 	arcn->pat = NULL;
479 	return(1);
480 }
481 
482 /*
483  * fn_match()
484  * Return:
485  *	0 if this archive member should be processed, 1 if it should be
486  *	skipped and -1 if we are done with all patterns (and pax should quit
487  *	looking for more members)
488  *	Note: *pend may be changed to show where the prefix ends.
489  */
490 
491 static int
492 fn_match(char *pattern, char *string, char **pend)
493 {
494 	char c;
495 	char test;
496 
497 	*pend = NULL;
498 	for (;;) {
499 		switch (c = *pattern++) {
500 		case '\0':
501 			/*
502 			 * Ok we found an exact match
503 			 */
504 			if (*string == '\0')
505 				return(0);
506 
507 			/*
508 			 * Check if it is a prefix match
509 			 */
510 			if ((dflag == 1) || (*string != '/'))
511 				return(-1);
512 
513 			/*
514 			 * It is a prefix match, remember where the trailing
515 			 * / is located
516 			 */
517 			*pend = string;
518 			return(0);
519 		case '?':
520 			if ((test = *string++) == '\0')
521 				return (-1);
522 			break;
523 		case '*':
524 			c = *pattern;
525 			/*
526 			 * Collapse multiple *'s.
527 			 */
528 			while (c == '*')
529 				c = *++pattern;
530 
531 			/*
532 			 * Optimized hack for pattern with a * at the end
533 			 */
534 			if (c == '\0')
535 				return (0);
536 
537 			/*
538 			 * General case, use recursion.
539 			 */
540 			while ((test = *string) != '\0') {
541 				if (!fn_match(pattern, string, pend))
542 					return (0);
543 				++string;
544 			}
545 			return (-1);
546 		case '[':
547 			/*
548 			 * range match
549 			 */
550 			if (((test = *string++) == '\0') ||
551 			    ((pattern = range_match(pattern, test)) == NULL))
552 				return (-1);
553 			break;
554 		case '\\':
555 		default:
556 			if (c != *string++)
557 				return (-1);
558 			break;
559 		}
560 	}
561 	/* NOTREACHED */
562 }
563 
564 static char *
565 range_match(char *pattern, int test)
566 {
567 	char c;
568 	char c2;
569 	int negate;
570 	int ok = 0;
571 
572 	if ((negate = (*pattern == '!')) != 0)
573 		++pattern;
574 
575 	while ((c = *pattern++) != ']') {
576 		/*
577 		 * Illegal pattern
578 		 */
579 		if (c == '\0')
580 			return (NULL);
581 
582 		if ((*pattern == '-') && ((c2 = pattern[1]) != '\0') &&
583 		    (c2 != ']')) {
584 			if ((c <= test) && (test <= c2))
585 				ok = 1;
586 			pattern += 2;
587 		} else if (c == test)
588 			ok = 1;
589 	}
590 	return (ok == negate ? NULL : pattern);
591 }
592 
593 /*
594  * mod_name()
595  *	modify a selected file name. first attempt to apply replacement string
596  *	expressions, then apply interactive file rename. We apply replacement
597  *	string expressions to both filenames and file links (if we didn't the
598  *	links would point to the wrong place, and we could never be able to
599  *	move an archive that has a file link in it). When we rename files
600  *	interactively, we store that mapping (old name to user input name) so
601  *	if we spot any file links to the old file name in the future, we will
602  *	know exactly how to fix the file link.
603  * Return:
604  *	0 continue to  process file, 1 skip this file, -1 pax is finished
605  */
606 
607 int
608 mod_name(ARCHD *arcn)
609 {
610 	int res = 0;
611 
612 	/*
613 	 * Strip off leading '/' if appropriate.
614 	 * Currently, this option is only set for the tar format.
615 	 */
616 	if (rmleadslash && arcn->name[0] == '/') {
617 		if (arcn->name[1] == '\0') {
618 			arcn->name[0] = '.';
619 		} else {
620 			(void)memmove(arcn->name, &arcn->name[1],
621 			    strlen(arcn->name));
622 			arcn->nlen--;
623 		}
624 		if (rmleadslash < 2) {
625 			rmleadslash = 2;
626 			paxwarn(0, "Removing leading / from absolute path names in the archive");
627 		}
628 	}
629 	if (rmleadslash && arcn->ln_name[0] == '/' &&
630 	    (arcn->type == PAX_HLK || arcn->type == PAX_HRG)) {
631 		if (arcn->ln_name[1] == '\0') {
632 			arcn->ln_name[0] = '.';
633 		} else {
634 			(void)memmove(arcn->ln_name, &arcn->ln_name[1],
635 			    strlen(arcn->ln_name));
636 			arcn->ln_nlen--;
637 		}
638 		if (rmleadslash < 2) {
639 			rmleadslash = 2;
640 			paxwarn(0, "Removing leading / from absolute path names in the archive");
641 		}
642 	}
643 
644 	/*
645 	 * IMPORTANT: We have a problem. what do we do with symlinks?
646 	 * Modifying a hard link name makes sense, as we know the file it
647 	 * points at should have been seen already in the archive (and if it
648 	 * wasn't seen because of a read error or a bad archive, we lose
649 	 * anyway). But there are no such requirements for symlinks. On one
650 	 * hand the symlink that refers to a file in the archive will have to
651 	 * be modified to so it will still work at its new location in the
652 	 * file system. On the other hand a symlink that points elsewhere (and
653 	 * should continue to do so) should not be modified. There is clearly
654 	 * no perfect solution here. So we handle them like hardlinks. Clearly
655 	 * a replacement made by the interactive rename mapping is very likely
656 	 * to be correct since it applies to a single file and is an exact
657 	 * match. The regular expression replacements are a little harder to
658 	 * justify though. We claim that the symlink name is only likely
659 	 * to be replaced when it points within the file tree being moved and
660 	 * in that case it should be modified. what we really need to do is to
661 	 * call an oracle here. :)
662 	 */
663 	if (rephead != NULL) {
664 		/*
665 		 * we have replacement strings, modify the name and the link
666 		 * name if any.
667 		 */
668 		if ((res = rep_name(arcn->name, &(arcn->nlen), 1)) != 0)
669 			return(res);
670 
671 		if (((arcn->type == PAX_SLK) || (arcn->type == PAX_HLK) ||
672 		    (arcn->type == PAX_HRG)) &&
673 		    ((res = rep_name(arcn->ln_name, &(arcn->ln_nlen), 0)) != 0))
674 			return(res);
675 	}
676 
677 	if (iflag) {
678 		/*
679 		 * perform interactive file rename, then map the link if any
680 		 */
681 		if ((res = tty_rename(arcn)) != 0)
682 			return(res);
683 		if ((arcn->type == PAX_SLK) || (arcn->type == PAX_HLK) ||
684 		    (arcn->type == PAX_HRG))
685 			sub_name(arcn->ln_name, &(arcn->ln_nlen), sizeof(arcn->ln_name));
686 	}
687 	return(res);
688 }
689 
690 /*
691  * tty_rename()
692  *	Prompt the user for a replacement file name. A "." keeps the old name,
693  *	a empty line skips the file, and an EOF on reading the tty, will cause
694  *	pax to stop processing and exit. Otherwise the file name input, replaces
695  *	the old one.
696  * Return:
697  *	0 process this file, 1 skip this file, -1 we need to exit pax
698  */
699 
700 static int
701 tty_rename(ARCHD *arcn)
702 {
703 	char tmpname[PAXPATHLEN+2];
704 	int res;
705 
706 	/*
707 	 * prompt user for the replacement name for a file, keep trying until
708 	 * we get some reasonable input. Archives may have more than one file
709 	 * on them with the same name (from updates etc). We print verbose info
710 	 * on the file so the user knows what is up.
711 	 */
712 	tty_prnt("\nATTENTION: %s interactive file rename operation.\n", argv0);
713 
714 	for (;;) {
715 		ls_tty(arcn);
716 		tty_prnt("Input new name, or a \".\" to keep the old name, ");
717 		tty_prnt("or a \"return\" to skip this file.\n");
718 		tty_prnt("Input > ");
719 		if (tty_read(tmpname, sizeof(tmpname)) < 0)
720 			return(-1);
721 		if (strcmp(tmpname, "..") == 0) {
722 			tty_prnt("Try again, illegal file name: ..\n");
723 			continue;
724 		}
725 		if (strlen(tmpname) > PAXPATHLEN) {
726 			tty_prnt("Try again, file name too long\n");
727 			continue;
728 		}
729 		break;
730 	}
731 
732 	/*
733 	 * empty file name, skips this file. a "." leaves it alone
734 	 */
735 	if (tmpname[0] == '\0') {
736 		tty_prnt("Skipping file.\n");
737 		return(1);
738 	}
739 	if ((tmpname[0] == '.') && (tmpname[1] == '\0')) {
740 		tty_prnt("Processing continues, name unchanged.\n");
741 		return(0);
742 	}
743 
744 	/*
745 	 * ok the name changed. We may run into links that point at this
746 	 * file later. we have to remember where the user sent the file
747 	 * in order to repair any links.
748 	 */
749 	tty_prnt("Processing continues, name changed to: %s\n", tmpname);
750 	res = add_name(arcn->name, arcn->nlen, tmpname);
751 	arcn->nlen = l_strncpy(arcn->name, tmpname, sizeof(arcn->name) - 1);
752 	arcn->name[arcn->nlen] = '\0';
753 	if (res < 0)
754 		return(-1);
755 	return(0);
756 }
757 
758 /*
759  * set_dest()
760  *	fix up the file name and the link name (if any) so this file will land
761  *	in the destination directory (used during copy() -rw).
762  * Return:
763  *	0 if ok, -1 if failure (name too long)
764  */
765 
766 int
767 set_dest(ARCHD *arcn, char *dest_dir, int dir_len)
768 {
769 	if (fix_path(arcn->name, &(arcn->nlen), dest_dir, dir_len) < 0)
770 		return(-1);
771 
772 	/*
773 	 * It is really hard to deal with symlinks here, we cannot be sure
774 	 * if the name they point was moved (or will be moved). It is best to
775 	 * leave them alone.
776 	 */
777 	if ((arcn->type != PAX_HLK) && (arcn->type != PAX_HRG))
778 		return(0);
779 
780 	if (fix_path(arcn->ln_name, &(arcn->ln_nlen), dest_dir, dir_len) < 0)
781 		return(-1);
782 	return(0);
783 }
784 
785 /*
786  * fix_path
787  *	concatenate dir_name and or_name and store the result in or_name (if
788  *	it fits). This is one ugly function.
789  * Return:
790  *	0 if ok, -1 if the final name is too long
791  */
792 
793 static int
794 fix_path( char *or_name, int *or_len, char *dir_name, int dir_len)
795 {
796 	char *src;
797 	char *dest;
798 	char *start;
799 	int len;
800 
801 	/*
802 	 * we shift the or_name to the right enough to tack in the dir_name
803 	 * at the front. We make sure we have enough space for it all before
804 	 * we start. since dest always ends in a slash, we skip of or_name
805 	 * if it also starts with one.
806 	 */
807 	start = or_name;
808 	src = start + *or_len;
809 	dest = src + dir_len;
810 	if (*start == '/') {
811 		++start;
812 		--dest;
813 	}
814 	if ((len = dest - or_name) > PAXPATHLEN) {
815 		paxwarn(1, "File name %s/%s, too long", dir_name, start);
816 		return(-1);
817 	}
818 	*or_len = len;
819 
820 	/*
821 	 * enough space, shift
822 	 */
823 	while (src >= start)
824 		*dest-- = *src--;
825 	src = dir_name + dir_len - 1;
826 
827 	/*
828 	 * splice in the destination directory name
829 	 */
830 	while (src >= dir_name)
831 		*dest-- = *src--;
832 
833 	*(or_name + len) = '\0';
834 	return(0);
835 }
836 
837 /*
838  * rep_name()
839  *	walk down the list of replacement strings applying each one in order.
840  *	when we find one with a successful substitution, we modify the name
841  *	as specified. if required, we print the results. if the resulting name
842  *	is empty, we will skip this archive member. We use the regexp(3)
843  *	routines (regexp() ought to win a prize as having the most cryptic
844  *	library function manual page).
845  *	--Parameters--
846  *	name is the file name we are going to apply the regular expressions to
847  *	(and may be modified)
848  *	nlen is the length of this name (and is modified to hold the length of
849  *	the final string).
850  *	prnt is a flag that says whether to print the final result.
851  * Return:
852  *	0 if substitution was successful, 1 if we are to skip the file (the name
853  *	ended up empty)
854  */
855 
856 static int
857 rep_name(char *name, int *nlen, int prnt)
858 {
859 	REPLACE *pt;
860 	char *inpt;
861 	char *outpt;
862 	char *endpt;
863 	char *rpt;
864 	int found = 0;
865 	int res;
866 #	ifndef NET2_REGEX
867 	regmatch_t pm[MAXSUBEXP];
868 #	endif
869 	char nname[PAXPATHLEN+1];	/* final result of all replacements */
870 	char buf1[PAXPATHLEN+1];	/* where we work on the name */
871 
872 	/*
873 	 * copy the name into buf1, where we will work on it. We need to keep
874 	 * the orig string around so we can print out the result of the final
875 	 * replacement. We build up the final result in nname. inpt points at
876 	 * the string we apply the regular expression to. prnt is used to
877 	 * suppress printing when we handle replacements on the link field
878 	 * (the user already saw that substitution go by)
879 	 */
880 	pt = rephead;
881 	(void)strlcpy(buf1, name, sizeof(buf1));
882 	inpt = buf1;
883 	outpt = nname;
884 	endpt = outpt + PAXPATHLEN;
885 
886 	/*
887 	 * try each replacement string in order
888 	 */
889 	while (pt != NULL) {
890 		do {
891 			/*
892 			 * check for a successful substitution, if not go to
893 			 * the next pattern, or cleanup if we were global
894 			 */
895 #			ifdef NET2_REGEX
896 			if (regexec(pt->rcmp, inpt) == 0)
897 #			else
898 			if (regexec(&(pt->rcmp), inpt, MAXSUBEXP, pm, 0) != 0)
899 #			endif
900 				break;
901 
902 			/*
903 			 * ok we found one. We have three parts, the prefix
904 			 * which did not match, the section that did and the
905 			 * tail (that also did not match). Copy the prefix to
906 			 * the final output buffer (watching to make sure we
907 			 * do not create a string too long).
908 			 */
909 			found = 1;
910 #			ifdef NET2_REGEX
911 			rpt = pt->rcmp->startp[0];
912 #			else
913 			rpt = inpt + pm[0].rm_so;
914 #			endif
915 
916 			while ((inpt < rpt) && (outpt < endpt))
917 				*outpt++ = *inpt++;
918 			if (outpt == endpt)
919 				break;
920 
921 			/*
922 			 * for the second part (which matched the regular
923 			 * expression) apply the substitution using the
924 			 * replacement string and place it the prefix in the
925 			 * final output. If we have problems, skip it.
926 			 */
927 #			ifdef NET2_REGEX
928 			if ((res = resub(pt->rcmp,pt->nstr,outpt,endpt)) < 0) {
929 #			else
930 			if ((res = resub(&(pt->rcmp),pm,inpt,pt->nstr,outpt,endpt))
931 			    < 0) {
932 #			endif
933 				if (prnt)
934 					paxwarn(1, "Replacement name error %s",
935 					    name);
936 				return(1);
937 			}
938 			outpt += res;
939 
940 			/*
941 			 * we set up to look again starting at the first
942 			 * character in the tail (of the input string right
943 			 * after the last character matched by the regular
944 			 * expression (inpt always points at the first char in
945 			 * the string to process). If we are not doing a global
946 			 * substitution, we will use inpt to copy the tail to
947 			 * the final result. Make sure we do not overrun the
948 			 * output buffer
949 			 */
950 #			ifdef NET2_REGEX
951 			inpt = pt->rcmp->endp[0];
952 #			else
953 			inpt += pm[0].rm_eo - pm[0].rm_so;
954 #			endif
955 
956 			if ((outpt == endpt) || (*inpt == '\0'))
957 				break;
958 
959 			/*
960 			 * if the user wants global we keep trying to
961 			 * substitute until it fails, then we are done.
962 			 */
963 		} while (pt->flgs & GLOB);
964 
965 		if (found)
966 			break;
967 
968 		/*
969 		 * a successful substitution did NOT occur, try the next one
970 		 */
971 		pt = pt->fow;
972 	}
973 
974 	if (found) {
975 		/*
976 		 * we had a substitution, copy the last tail piece (if there is
977 		 * room) to the final result
978 		 */
979 		while ((outpt < endpt) && (*inpt != '\0'))
980 			*outpt++ = *inpt++;
981 
982 		*outpt = '\0';
983 		if ((outpt == endpt) && (*inpt != '\0')) {
984 			if (prnt)
985 				paxwarn(1,"Replacement name too long %s >> %s",
986 				    name, nname);
987 			return(1);
988 		}
989 
990 		/*
991 		 * inform the user of the result if wanted
992 		 */
993 		if (prnt && (pt->flgs & PRNT)) {
994 			if (*nname == '\0')
995 				(void)fprintf(stderr,"%s >> <empty string>\n",
996 				    name);
997 			else
998 				(void)fprintf(stderr,"%s >> %s\n", name, nname);
999 		}
1000 
1001 		/*
1002 		 * if empty inform the caller this file is to be skipped
1003 		 * otherwise copy the new name over the orig name and return
1004 		 */
1005 		if (*nname == '\0')
1006 			return(1);
1007 		*nlen = l_strncpy(name, nname, PAXPATHLEN + 1);
1008 		name[PAXPATHLEN] = '\0';
1009 	}
1010 	return(0);
1011 }
1012 
1013 #ifdef NET2_REGEX
1014 /*
1015  * resub()
1016  *	apply the replacement to the matched expression. expand out the old
1017  * 	style ed(1) subexpression expansion.
1018  * Return:
1019  *	-1 if error, or the number of characters added to the destination.
1020  */
1021 
1022 static int
1023 resub(regexp *prog, char *src, char *dest, char *destend)
1024 {
1025 	char *spt;
1026 	char *dpt;
1027 	char c;
1028 	int no;
1029 	int len;
1030 
1031 	spt = src;
1032 	dpt = dest;
1033 	while ((dpt < destend) && ((c = *spt++) != '\0')) {
1034 		if (c == '&')
1035 			no = 0;
1036 		else if ((c == '\\') && (*spt >= '0') && (*spt <= '9'))
1037 			no = *spt++ - '0';
1038 		else {
1039  			if ((c == '\\') && ((*spt == '\\') || (*spt == '&')))
1040  				c = *spt++;
1041  			*dpt++ = c;
1042 			continue;
1043 		}
1044  		if ((prog->startp[no] == NULL) || (prog->endp[no] == NULL) ||
1045 		    ((len = prog->endp[no] - prog->startp[no]) <= 0))
1046 			continue;
1047 
1048 		/*
1049 		 * copy the subexpression to the destination.
1050 		 * fail if we run out of space or the match string is damaged
1051 		 */
1052 		if (len > (destend - dpt))
1053 			len = destend - dpt;
1054 		if (l_strncpy(dpt, prog->startp[no], len) != len)
1055 			return(-1);
1056 		dpt += len;
1057 	}
1058 	return(dpt - dest);
1059 }
1060 
1061 #else
1062 
1063 /*
1064  * resub()
1065  *	apply the replacement to the matched expression. expand out the old
1066  * 	style ed(1) subexpression expansion.
1067  * Return:
1068  *	-1 if error, or the number of characters added to the destination.
1069  */
1070 
1071 static int
1072 resub(regex_t *rp, regmatch_t *pm, char *orig, char *src, char *dest,
1073 	char *destend)
1074 {
1075 	char *spt;
1076 	char *dpt;
1077 	char c;
1078 	regmatch_t *pmpt;
1079 	int len;
1080 	int subexcnt;
1081 
1082 	spt =  src;
1083 	dpt = dest;
1084 	subexcnt = rp->re_nsub;
1085 	while ((dpt < destend) && ((c = *spt++) != '\0')) {
1086 		/*
1087 		 * see if we just have an ordinary replacement character
1088 		 * or we refer to a subexpression.
1089 		 */
1090 		if (c == '&') {
1091 			pmpt = pm;
1092 		} else if ((c == '\\') && (*spt >= '0') && (*spt <= '9')) {
1093 			/*
1094 			 * make sure there is a subexpression as specified
1095 			 */
1096 			if ((len = *spt++ - '0') > subexcnt)
1097 				return(-1);
1098 			pmpt = pm + len;
1099 		} else {
1100  			/*
1101 			 * Ordinary character, just copy it
1102 			 */
1103  			if ((c == '\\') && ((*spt == '\\') || (*spt == '&')))
1104  				c = *spt++;
1105  			*dpt++ = c;
1106 			continue;
1107 		}
1108 
1109 		/*
1110 		 * continue if the subexpression is bogus
1111 		 */
1112 		if ((pmpt->rm_so < 0) || (pmpt->rm_eo < 0) ||
1113 		    ((len = pmpt->rm_eo - pmpt->rm_so) <= 0))
1114 			continue;
1115 
1116 		/*
1117 		 * copy the subexpression to the destination.
1118 		 * fail if we run out of space or the match string is damaged
1119 		 */
1120 		if (len > (destend - dpt))
1121 			len = destend - dpt;
1122 		if (l_strncpy(dpt, orig + pmpt->rm_so, len) != len)
1123 			return(-1);
1124 		dpt += len;
1125 	}
1126 	return(dpt - dest);
1127 }
1128 #endif
1129