xref: /freebsd/bin/pax/pat_rep.c (revision bdcbfde31e8e9b343f113a1956384bdf30d1ed62)
1 /*-
2  * SPDX-License-Identifier: BSD-3-Clause
3  *
4  * Copyright (c) 1992 Keith Muller.
5  * Copyright (c) 1992, 1993
6  *	The Regents of the University of California.  All rights reserved.
7  *
8  * This code is derived from software contributed to Berkeley by
9  * Keith Muller of the University of California, San Diego.
10  *
11  * Redistribution and use in source and binary forms, with or without
12  * modification, are permitted provided that the following conditions
13  * are met:
14  * 1. Redistributions of source code must retain the above copyright
15  *    notice, this list of conditions and the following disclaimer.
16  * 2. Redistributions in binary form must reproduce the above copyright
17  *    notice, this list of conditions and the following disclaimer in the
18  *    documentation and/or other materials provided with the distribution.
19  * 3. Neither the name of the University nor the names of its contributors
20  *    may be used to endorse or promote products derived from this software
21  *    without specific prior written permission.
22  *
23  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
24  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
27  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
32  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33  * SUCH DAMAGE.
34  */
35 
36 #ifndef lint
37 #endif /* not lint */
38 #include <sys/cdefs.h>
39 #include <sys/types.h>
40 #include <sys/stat.h>
41 #include <stdio.h>
42 #include <string.h>
43 #include <stdlib.h>
44 #include <regex.h>
45 #include "pax.h"
46 #include "pat_rep.h"
47 #include "extern.h"
48 
49 /*
50  * routines to handle pattern matching, name modification (regular expression
51  * substitution and interactive renames), and destination name modification for
52  * copy (-rw). Both file name and link names are adjusted as required in these
53  * routines.
54  */
55 
56 #define MAXSUBEXP	10		/* max subexpressions, DO NOT CHANGE */
57 static PATTERN *pathead = NULL;		/* file pattern match list head */
58 static PATTERN *pattail = NULL;		/* file pattern match list tail */
59 static REPLACE *rephead = NULL;		/* replacement string list head */
60 static REPLACE *reptail = NULL;		/* replacement string list tail */
61 
62 static int rep_name(char *, int *, int);
63 static int tty_rename(ARCHD *);
64 static int fix_path(char *, int *, char *, int);
65 static int fn_match(char *, char *, char **);
66 static char * range_match(char *, int);
67 static int resub(regex_t *, regmatch_t *, char *, char *, char *, char *);
68 
69 /*
70  * rep_add()
71  *	parses the -s replacement string; compiles the regular expression
72  *	and stores the compiled value and it's replacement string together in
73  *	replacement string list. Input to this function is of the form:
74  *		/old/new/pg
75  *	The first char in the string specifies the delimiter used by this
76  *	replacement string. "Old" is a regular expression in "ed" format which
77  *	is compiled by regcomp() and is applied to filenames. "new" is the
78  *	substitution string; p and g are options flags for printing and global
79  *	replacement (over the single filename)
80  * Return:
81  *	0 if a proper replacement string and regular expression was added to
82  *	the list of replacement patterns; -1 otherwise.
83  */
84 
85 int
86 rep_add(char *str)
87 {
88 	char *pt1;
89 	char *pt2;
90 	REPLACE *rep;
91 	int res;
92 	char rebuf[BUFSIZ];
93 
94 	/*
95 	 * throw out the bad parameters
96 	 */
97 	if ((str == NULL) || (*str == '\0')) {
98 		paxwarn(1, "Empty replacement string");
99 		return(-1);
100 	}
101 
102 	/*
103 	 * first character in the string specifies what the delimiter is for
104 	 * this expression
105 	 */
106 	if ((pt1 = strchr(str+1, *str)) == NULL) {
107 		paxwarn(1, "Invalid replacement string %s", str);
108 		return(-1);
109 	}
110 
111 	/*
112 	 * allocate space for the node that handles this replacement pattern
113 	 * and split out the regular expression and try to compile it
114 	 */
115 	if ((rep = (REPLACE *)malloc(sizeof(REPLACE))) == NULL) {
116 		paxwarn(1, "Unable to allocate memory for replacement string");
117 		return(-1);
118 	}
119 
120 	*pt1 = '\0';
121 	if ((res = regcomp(&(rep->rcmp), str+1, 0)) != 0) {
122 		regerror(res, &(rep->rcmp), rebuf, sizeof(rebuf));
123 		paxwarn(1, "%s while compiling regular expression %s", rebuf, str);
124 		free(rep);
125 		return(-1);
126 	}
127 
128 	/*
129 	 * put the delimiter back in case we need an error message and
130 	 * locate the delimiter at the end of the replacement string
131 	 * we then point the node at the new substitution string
132 	 */
133 	*pt1++ = *str;
134 	if ((pt2 = strchr(pt1, *str)) == NULL) {
135 		regfree(&rep->rcmp);
136 		free(rep);
137 		paxwarn(1, "Invalid replacement string %s", str);
138 		return(-1);
139 	}
140 
141 	*pt2 = '\0';
142 	rep->nstr = pt1;
143 	pt1 = pt2++;
144 	rep->flgs = 0;
145 
146 	/*
147 	 * set the options if any
148 	 */
149 	while (*pt2 != '\0') {
150 		switch(*pt2) {
151 		case 'g':
152 		case 'G':
153 			rep->flgs  |= GLOB;
154 			break;
155 		case 'p':
156 		case 'P':
157 			rep->flgs  |= PRNT;
158 			break;
159 		default:
160 			regfree(&rep->rcmp);
161 			free(rep);
162 			*pt1 = *str;
163 			paxwarn(1, "Invalid replacement string option %s", str);
164 			return(-1);
165 		}
166 		++pt2;
167 	}
168 
169 	/*
170 	 * all done, link it in at the end
171 	 */
172 	rep->fow = NULL;
173 	if (rephead == NULL) {
174 		reptail = rephead = rep;
175 		return(0);
176 	}
177 	reptail->fow = rep;
178 	reptail = rep;
179 	return(0);
180 }
181 
182 /*
183  * pat_add()
184  *	add a pattern match to the pattern match list. Pattern matches are used
185  *	to select which archive members are extracted. (They appear as
186  *	arguments to pax in the list and read modes). If no patterns are
187  *	supplied to pax, all members in the archive will be selected (and the
188  *	pattern match list is empty).
189  * Return:
190  *	0 if the pattern was added to the list, -1 otherwise
191  */
192 
193 int
194 pat_add(char *str, char *chdnam)
195 {
196 	PATTERN *pt;
197 
198 	/*
199 	 * throw out the junk
200 	 */
201 	if ((str == NULL) || (*str == '\0')) {
202 		paxwarn(1, "Empty pattern string");
203 		return(-1);
204 	}
205 
206 	/*
207 	 * allocate space for the pattern and store the pattern. the pattern is
208 	 * part of argv so do not bother to copy it, just point at it. Add the
209 	 * node to the end of the pattern list
210 	 */
211 	if ((pt = (PATTERN *)malloc(sizeof(PATTERN))) == NULL) {
212 		paxwarn(1, "Unable to allocate memory for pattern string");
213 		return(-1);
214 	}
215 
216 	pt->pstr = str;
217 	pt->pend = NULL;
218 	pt->plen = strlen(str);
219 	pt->fow = NULL;
220 	pt->flgs = 0;
221 	pt->chdname = chdnam;
222 
223 	if (pathead == NULL) {
224 		pattail = pathead = pt;
225 		return(0);
226 	}
227 	pattail->fow = pt;
228 	pattail = pt;
229 	return(0);
230 }
231 
232 /*
233  * pat_chk()
234  *	complain if any the user supplied pattern did not result in a match to
235  *	a selected archive member.
236  */
237 
238 void
239 pat_chk(void)
240 {
241 	PATTERN *pt;
242 	int wban = 0;
243 
244 	/*
245 	 * walk down the list checking the flags to make sure MTCH was set,
246 	 * if not complain
247 	 */
248 	for (pt = pathead; pt != NULL; pt = pt->fow) {
249 		if (pt->flgs & MTCH)
250 			continue;
251 		if (!wban) {
252 			paxwarn(1, "WARNING! These patterns were not matched:");
253 			++wban;
254 		}
255 		(void)fprintf(stderr, "%s\n", pt->pstr);
256 	}
257 }
258 
259 /*
260  * pat_sel()
261  *	the archive member which matches a pattern was selected. Mark the
262  *	pattern as having selected an archive member. arcn->pat points at the
263  *	pattern that was matched. arcn->pat is set in pat_match()
264  *
265  *	NOTE: When the -c option is used, we are called when there was no match
266  *	by pat_match() (that means we did match before the inverted sense of
267  *	the logic). Now this seems really strange at first, but with -c  we
268  *	need to keep track of those patterns that cause an archive member to NOT
269  *	be selected (it found an archive member with a specified pattern)
270  * Return:
271  *	0 if the pattern pointed at by arcn->pat was tagged as creating a
272  *	match, -1 otherwise.
273  */
274 
275 int
276 pat_sel(ARCHD *arcn)
277 {
278 	PATTERN *pt;
279 	PATTERN **ppt;
280 	int len;
281 
282 	/*
283 	 * if no patterns just return
284 	 */
285 	if ((pathead == NULL) || ((pt = arcn->pat) == NULL))
286 		return(0);
287 
288 	/*
289 	 * when we are NOT limited to a single match per pattern mark the
290 	 * pattern and return
291 	 */
292 	if (!nflag) {
293 		pt->flgs |= MTCH;
294 		return(0);
295 	}
296 
297 	/*
298 	 * we reach this point only when we allow a single selected match per
299 	 * pattern, if the pattern matches a directory and we do not have -d
300 	 * (dflag) we are done with this pattern. We may also be handed a file
301 	 * in the subtree of a directory. in that case when we are operating
302 	 * with -d, this pattern was already selected and we are done
303 	 */
304 	if (pt->flgs & DIR_MTCH)
305 		return(0);
306 
307 	if (!dflag && ((pt->pend != NULL) || (arcn->type == PAX_DIR))) {
308 		/*
309 		 * ok we matched a directory and we are allowing
310 		 * subtree matches but because of the -n only its children will
311 		 * match. This is tagged as a DIR_MTCH type.
312 		 * WATCH IT, the code assumes that pt->pend points
313 		 * into arcn->name and arcn->name has not been modified.
314 		 * If not we will have a big mess. Yup this is another kludge
315 		 */
316 
317 		/*
318 		 * if this was a prefix match, remove trailing part of path
319 		 * so we can copy it. Future matches will be exact prefix match
320 		 */
321 		if (pt->pend != NULL)
322 			*pt->pend = '\0';
323 
324 		if ((pt->pstr = strdup(arcn->name)) == NULL) {
325 			paxwarn(1, "Pattern select out of memory");
326 			if (pt->pend != NULL)
327 				*pt->pend = '/';
328 			pt->pend = NULL;
329 			return(-1);
330 		}
331 
332 		/*
333 		 * put the trailing / back in the source string
334 		 */
335 		if (pt->pend != NULL) {
336 			*pt->pend = '/';
337 			pt->pend = NULL;
338 		}
339 		pt->plen = strlen(pt->pstr);
340 
341 		/*
342 		 * strip off any trailing /, this should really never happen
343 		 */
344 		len = pt->plen - 1;
345 		if (*(pt->pstr + len) == '/') {
346 			*(pt->pstr + len) = '\0';
347 			pt->plen = len;
348 		}
349 		pt->flgs = DIR_MTCH | MTCH;
350 		arcn->pat = pt;
351 		return(0);
352 	}
353 
354 	/*
355 	 * we are then done with this pattern, so we delete it from the list
356 	 * because it can never be used for another match.
357 	 * Seems kind of strange to do for a -c, but the pax spec is really
358 	 * vague on the interaction of -c -n and -d. We assume that when -c
359 	 * and the pattern rejects a member (i.e. it matched it) it is done.
360 	 * In effect we place the order of the flags as having -c last.
361 	 */
362 	pt = pathead;
363 	ppt = &pathead;
364 	while ((pt != NULL) && (pt != arcn->pat)) {
365 		ppt = &(pt->fow);
366 		pt = pt->fow;
367 	}
368 
369 	if (pt == NULL) {
370 		/*
371 		 * should never happen....
372 		 */
373 		paxwarn(1, "Pattern list inconsistent");
374 		return(-1);
375 	}
376 	*ppt = pt->fow;
377 	free(pt);
378 	arcn->pat = NULL;
379 	return(0);
380 }
381 
382 /*
383  * pat_match()
384  *	see if this archive member matches any supplied pattern, if a match
385  *	is found, arcn->pat is set to point at the potential pattern. Later if
386  *	this archive member is "selected" we process and mark the pattern as
387  *	one which matched a selected archive member (see pat_sel())
388  * Return:
389  *	0 if this archive member should be processed, 1 if it should be
390  *	skipped and -1 if we are done with all patterns (and pax should quit
391  *	looking for more members)
392  */
393 
394 int
395 pat_match(ARCHD *arcn)
396 {
397 	PATTERN *pt;
398 
399 	arcn->pat = NULL;
400 
401 	/*
402 	 * if there are no more patterns and we have -n (and not -c) we are
403 	 * done. otherwise with no patterns to match, matches all
404 	 */
405 	if (pathead == NULL) {
406 		if (nflag && !cflag)
407 			return(-1);
408 		return(0);
409 	}
410 
411 	/*
412 	 * have to search down the list one at a time looking for a match.
413 	 */
414 	pt = pathead;
415 	while (pt != NULL) {
416 		/*
417 		 * check for a file name match unless we have DIR_MTCH set in
418 		 * this pattern then we want a prefix match
419 		 */
420 		if (pt->flgs & DIR_MTCH) {
421 			/*
422 			 * this pattern was matched before to a directory
423 			 * as we must have -n set for this (but not -d). We can
424 			 * only match CHILDREN of that directory so we must use
425 			 * an exact prefix match (no wildcards).
426 			 */
427 			if ((arcn->name[pt->plen] == '/') &&
428 			    (strncmp(pt->pstr, arcn->name, pt->plen) == 0))
429 				break;
430 		} else if (fn_match(pt->pstr, arcn->name, &pt->pend) == 0)
431 			break;
432 		pt = pt->fow;
433 	}
434 
435 	/*
436 	 * return the result, remember that cflag (-c) inverts the sense of a
437 	 * match
438 	 */
439 	if (pt == NULL)
440 		return(cflag ? 0 : 1);
441 
442 	/*
443 	 * We had a match, now when we invert the sense (-c) we reject this
444 	 * member. However we have to tag the pattern a being successful, (in a
445 	 * match, not in selecting an archive member) so we call pat_sel() here.
446 	 */
447 	arcn->pat = pt;
448 	if (!cflag)
449 		return(0);
450 
451 	if (pat_sel(arcn) < 0)
452 		return(-1);
453 	arcn->pat = NULL;
454 	return(1);
455 }
456 
457 /*
458  * fn_match()
459  * Return:
460  *	0 if this archive member should be processed, 1 if it should be
461  *	skipped and -1 if we are done with all patterns (and pax should quit
462  *	looking for more members)
463  *	Note: *pend may be changed to show where the prefix ends.
464  */
465 
466 static int
467 fn_match(char *pattern, char *string, char **pend)
468 {
469 	char c;
470 	char test;
471 
472 	*pend = NULL;
473 	for (;;) {
474 		switch (c = *pattern++) {
475 		case '\0':
476 			/*
477 			 * Ok we found an exact match
478 			 */
479 			if (*string == '\0')
480 				return(0);
481 
482 			/*
483 			 * Check if it is a prefix match
484 			 */
485 			if ((dflag == 1) || (*string != '/'))
486 				return(-1);
487 
488 			/*
489 			 * It is a prefix match, remember where the trailing
490 			 * / is located
491 			 */
492 			*pend = string;
493 			return(0);
494 		case '?':
495 			if ((test = *string++) == '\0')
496 				return (-1);
497 			break;
498 		case '*':
499 			c = *pattern;
500 			/*
501 			 * Collapse multiple *'s.
502 			 */
503 			while (c == '*')
504 				c = *++pattern;
505 
506 			/*
507 			 * Optimized hack for pattern with a * at the end
508 			 */
509 			if (c == '\0')
510 				return (0);
511 
512 			/*
513 			 * General case, use recursion.
514 			 */
515 			while ((test = *string) != '\0') {
516 				if (!fn_match(pattern, string, pend))
517 					return (0);
518 				++string;
519 			}
520 			return (-1);
521 		case '[':
522 			/*
523 			 * range match
524 			 */
525 			if (((test = *string++) == '\0') ||
526 			    ((pattern = range_match(pattern, test)) == NULL))
527 				return (-1);
528 			break;
529 		case '\\':
530 		default:
531 			if (c != *string++)
532 				return (-1);
533 			break;
534 		}
535 	}
536 	/* NOTREACHED */
537 }
538 
539 static char *
540 range_match(char *pattern, int test)
541 {
542 	char c;
543 	char c2;
544 	int negate;
545 	int ok = 0;
546 
547 	if ((negate = (*pattern == '!')) != 0)
548 		++pattern;
549 
550 	while ((c = *pattern++) != ']') {
551 		/*
552 		 * Illegal pattern
553 		 */
554 		if (c == '\0')
555 			return (NULL);
556 
557 		if ((*pattern == '-') && ((c2 = pattern[1]) != '\0') &&
558 		    (c2 != ']')) {
559 			if ((c <= test) && (test <= c2))
560 				ok = 1;
561 			pattern += 2;
562 		} else if (c == test)
563 			ok = 1;
564 	}
565 	return (ok == negate ? NULL : pattern);
566 }
567 
568 /*
569  * mod_name()
570  *	modify a selected file name. first attempt to apply replacement string
571  *	expressions, then apply interactive file rename. We apply replacement
572  *	string expressions to both filenames and file links (if we didn't the
573  *	links would point to the wrong place, and we could never be able to
574  *	move an archive that has a file link in it). When we rename files
575  *	interactively, we store that mapping (old name to user input name) so
576  *	if we spot any file links to the old file name in the future, we will
577  *	know exactly how to fix the file link.
578  * Return:
579  *	0 continue to  process file, 1 skip this file, -1 pax is finished
580  */
581 
582 int
583 mod_name(ARCHD *arcn)
584 {
585 	int res = 0;
586 
587 	/*
588 	 * Strip off leading '/' if appropriate.
589 	 * Currently, this option is only set for the tar format.
590 	 */
591 	if (rmleadslash && arcn->name[0] == '/') {
592 		if (arcn->name[1] == '\0') {
593 			arcn->name[0] = '.';
594 		} else {
595 			(void)memmove(arcn->name, &arcn->name[1],
596 			    strlen(arcn->name));
597 			arcn->nlen--;
598 		}
599 		if (rmleadslash < 2) {
600 			rmleadslash = 2;
601 			paxwarn(0, "Removing leading / from absolute path names in the archive");
602 		}
603 	}
604 	if (rmleadslash && arcn->ln_name[0] == '/' &&
605 	    (arcn->type == PAX_HLK || arcn->type == PAX_HRG)) {
606 		if (arcn->ln_name[1] == '\0') {
607 			arcn->ln_name[0] = '.';
608 		} else {
609 			(void)memmove(arcn->ln_name, &arcn->ln_name[1],
610 			    strlen(arcn->ln_name));
611 			arcn->ln_nlen--;
612 		}
613 		if (rmleadslash < 2) {
614 			rmleadslash = 2;
615 			paxwarn(0, "Removing leading / from absolute path names in the archive");
616 		}
617 	}
618 
619 	/*
620 	 * IMPORTANT: We have a problem. what do we do with symlinks?
621 	 * Modifying a hard link name makes sense, as we know the file it
622 	 * points at should have been seen already in the archive (and if it
623 	 * wasn't seen because of a read error or a bad archive, we lose
624 	 * anyway). But there are no such requirements for symlinks. On one
625 	 * hand the symlink that refers to a file in the archive will have to
626 	 * be modified to so it will still work at its new location in the
627 	 * file system. On the other hand a symlink that points elsewhere (and
628 	 * should continue to do so) should not be modified. There is clearly
629 	 * no perfect solution here. So we handle them like hardlinks. Clearly
630 	 * a replacement made by the interactive rename mapping is very likely
631 	 * to be correct since it applies to a single file and is an exact
632 	 * match. The regular expression replacements are a little harder to
633 	 * justify though. We claim that the symlink name is only likely
634 	 * to be replaced when it points within the file tree being moved and
635 	 * in that case it should be modified. what we really need to do is to
636 	 * call an oracle here. :)
637 	 */
638 	if (rephead != NULL) {
639 		/*
640 		 * we have replacement strings, modify the name and the link
641 		 * name if any.
642 		 */
643 		if ((res = rep_name(arcn->name, &(arcn->nlen), 1)) != 0)
644 			return(res);
645 
646 		if (((arcn->type == PAX_SLK) || (arcn->type == PAX_HLK) ||
647 		    (arcn->type == PAX_HRG)) &&
648 		    ((res = rep_name(arcn->ln_name, &(arcn->ln_nlen), 0)) != 0))
649 			return(res);
650 	}
651 
652 	if (iflag) {
653 		/*
654 		 * perform interactive file rename, then map the link if any
655 		 */
656 		if ((res = tty_rename(arcn)) != 0)
657 			return(res);
658 		if ((arcn->type == PAX_SLK) || (arcn->type == PAX_HLK) ||
659 		    (arcn->type == PAX_HRG))
660 			sub_name(arcn->ln_name, &(arcn->ln_nlen), sizeof(arcn->ln_name));
661 	}
662 	return(res);
663 }
664 
665 /*
666  * tty_rename()
667  *	Prompt the user for a replacement file name. A "." keeps the old name,
668  *	a empty line skips the file, and an EOF on reading the tty, will cause
669  *	pax to stop processing and exit. Otherwise the file name input, replaces
670  *	the old one.
671  * Return:
672  *	0 process this file, 1 skip this file, -1 we need to exit pax
673  */
674 
675 static int
676 tty_rename(ARCHD *arcn)
677 {
678 	char tmpname[PAXPATHLEN+2];
679 	int res;
680 
681 	/*
682 	 * prompt user for the replacement name for a file, keep trying until
683 	 * we get some reasonable input. Archives may have more than one file
684 	 * on them with the same name (from updates etc). We print verbose info
685 	 * on the file so the user knows what is up.
686 	 */
687 	tty_prnt("\nATTENTION: %s interactive file rename operation.\n", argv0);
688 
689 	for (;;) {
690 		ls_tty(arcn);
691 		tty_prnt("Input new name, or a \".\" to keep the old name, ");
692 		tty_prnt("or a \"return\" to skip this file.\n");
693 		tty_prnt("Input > ");
694 		if (tty_read(tmpname, sizeof(tmpname)) < 0)
695 			return(-1);
696 		if (strcmp(tmpname, "..") == 0) {
697 			tty_prnt("Try again, illegal file name: ..\n");
698 			continue;
699 		}
700 		if (strlen(tmpname) > PAXPATHLEN) {
701 			tty_prnt("Try again, file name too long\n");
702 			continue;
703 		}
704 		break;
705 	}
706 
707 	/*
708 	 * empty file name, skips this file. a "." leaves it alone
709 	 */
710 	if (tmpname[0] == '\0') {
711 		tty_prnt("Skipping file.\n");
712 		return(1);
713 	}
714 	if ((tmpname[0] == '.') && (tmpname[1] == '\0')) {
715 		tty_prnt("Processing continues, name unchanged.\n");
716 		return(0);
717 	}
718 
719 	/*
720 	 * ok the name changed. We may run into links that point at this
721 	 * file later. we have to remember where the user sent the file
722 	 * in order to repair any links.
723 	 */
724 	tty_prnt("Processing continues, name changed to: %s\n", tmpname);
725 	res = add_name(arcn->name, arcn->nlen, tmpname);
726 	arcn->nlen = l_strncpy(arcn->name, tmpname, sizeof(arcn->name) - 1);
727 	arcn->name[arcn->nlen] = '\0';
728 	if (res < 0)
729 		return(-1);
730 	return(0);
731 }
732 
733 /*
734  * set_dest()
735  *	fix up the file name and the link name (if any) so this file will land
736  *	in the destination directory (used during copy() -rw).
737  * Return:
738  *	0 if ok, -1 if failure (name too long)
739  */
740 
741 int
742 set_dest(ARCHD *arcn, char *dest_dir, int dir_len)
743 {
744 	if (fix_path(arcn->name, &(arcn->nlen), dest_dir, dir_len) < 0)
745 		return(-1);
746 
747 	/*
748 	 * It is really hard to deal with symlinks here, we cannot be sure
749 	 * if the name they point was moved (or will be moved). It is best to
750 	 * leave them alone.
751 	 */
752 	if ((arcn->type != PAX_HLK) && (arcn->type != PAX_HRG))
753 		return(0);
754 
755 	if (fix_path(arcn->ln_name, &(arcn->ln_nlen), dest_dir, dir_len) < 0)
756 		return(-1);
757 	return(0);
758 }
759 
760 /*
761  * fix_path
762  *	concatenate dir_name and or_name and store the result in or_name (if
763  *	it fits). This is one ugly function.
764  * Return:
765  *	0 if ok, -1 if the final name is too long
766  */
767 
768 static int
769 fix_path( char *or_name, int *or_len, char *dir_name, int dir_len)
770 {
771 	char *src;
772 	char *dest;
773 	char *start;
774 	int len;
775 
776 	/*
777 	 * we shift the or_name to the right enough to tack in the dir_name
778 	 * at the front. We make sure we have enough space for it all before
779 	 * we start. since dest always ends in a slash, we skip of or_name
780 	 * if it also starts with one.
781 	 */
782 	start = or_name;
783 	src = start + *or_len;
784 	dest = src + dir_len;
785 	if (*start == '/') {
786 		++start;
787 		--dest;
788 	}
789 	if ((len = dest - or_name) > PAXPATHLEN) {
790 		paxwarn(1, "File name %s/%s, too long", dir_name, start);
791 		return(-1);
792 	}
793 	*or_len = len;
794 
795 	/*
796 	 * enough space, shift
797 	 */
798 	while (src >= start)
799 		*dest-- = *src--;
800 	src = dir_name + dir_len - 1;
801 
802 	/*
803 	 * splice in the destination directory name
804 	 */
805 	while (src >= dir_name)
806 		*dest-- = *src--;
807 
808 	*(or_name + len) = '\0';
809 	return(0);
810 }
811 
812 /*
813  * rep_name()
814  *	walk down the list of replacement strings applying each one in order.
815  *	when we find one with a successful substitution, we modify the name
816  *	as specified. if required, we print the results. if the resulting name
817  *	is empty, we will skip this archive member. We use the regexp(3)
818  *	routines (regexp() ought to win a prize as having the most cryptic
819  *	library function manual page).
820  *	--Parameters--
821  *	name is the file name we are going to apply the regular expressions to
822  *	(and may be modified)
823  *	nlen is the length of this name (and is modified to hold the length of
824  *	the final string).
825  *	prnt is a flag that says whether to print the final result.
826  * Return:
827  *	0 if substitution was successful, 1 if we are to skip the file (the name
828  *	ended up empty)
829  */
830 
831 static int
832 rep_name(char *name, int *nlen, int prnt)
833 {
834 	REPLACE *pt;
835 	char *inpt;
836 	char *outpt;
837 	char *endpt;
838 	char *rpt;
839 	int found = 0;
840 	int res;
841 	regmatch_t pm[MAXSUBEXP];
842 	char nname[PAXPATHLEN+1];	/* final result of all replacements */
843 	char buf1[PAXPATHLEN+1];	/* where we work on the name */
844 
845 	/*
846 	 * copy the name into buf1, where we will work on it. We need to keep
847 	 * the orig string around so we can print out the result of the final
848 	 * replacement. We build up the final result in nname. inpt points at
849 	 * the string we apply the regular expression to. prnt is used to
850 	 * suppress printing when we handle replacements on the link field
851 	 * (the user already saw that substitution go by)
852 	 */
853 	pt = rephead;
854 	(void)strlcpy(buf1, name, sizeof(buf1));
855 	inpt = buf1;
856 	outpt = nname;
857 	endpt = outpt + PAXPATHLEN;
858 
859 	/*
860 	 * try each replacement string in order
861 	 */
862 	while (pt != NULL) {
863 		do {
864 			/*
865 			 * check for a successful substitution, if not go to
866 			 * the next pattern, or cleanup if we were global
867 			 */
868 			if (regexec(&(pt->rcmp), inpt, MAXSUBEXP, pm, 0) != 0)
869 				break;
870 
871 			/*
872 			 * ok we found one. We have three parts, the prefix
873 			 * which did not match, the section that did and the
874 			 * tail (that also did not match). Copy the prefix to
875 			 * the final output buffer (watching to make sure we
876 			 * do not create a string too long).
877 			 */
878 			found = 1;
879 			rpt = inpt + pm[0].rm_so;
880 
881 			while ((inpt < rpt) && (outpt < endpt))
882 				*outpt++ = *inpt++;
883 			if (outpt == endpt)
884 				break;
885 
886 			/*
887 			 * for the second part (which matched the regular
888 			 * expression) apply the substitution using the
889 			 * replacement string and place it the prefix in the
890 			 * final output. If we have problems, skip it.
891 			 */
892 			if ((res = resub(&(pt->rcmp),pm,inpt,pt->nstr,outpt,endpt))
893 			    < 0) {
894 				if (prnt)
895 					paxwarn(1, "Replacement name error %s",
896 					    name);
897 				return(1);
898 			}
899 			outpt += res;
900 
901 			/*
902 			 * we set up to look again starting at the first
903 			 * character in the tail (of the input string right
904 			 * after the last character matched by the regular
905 			 * expression (inpt always points at the first char in
906 			 * the string to process). If we are not doing a global
907 			 * substitution, we will use inpt to copy the tail to
908 			 * the final result. Make sure we do not overrun the
909 			 * output buffer
910 			 */
911 			inpt += pm[0].rm_eo - pm[0].rm_so;
912 
913 			if ((outpt == endpt) || (*inpt == '\0'))
914 				break;
915 
916 			/*
917 			 * if the user wants global we keep trying to
918 			 * substitute until it fails, then we are done.
919 			 */
920 		} while (pt->flgs & GLOB);
921 
922 		if (found)
923 			break;
924 
925 		/*
926 		 * a successful substitution did NOT occur, try the next one
927 		 */
928 		pt = pt->fow;
929 	}
930 
931 	if (found) {
932 		/*
933 		 * we had a substitution, copy the last tail piece (if there is
934 		 * room) to the final result
935 		 */
936 		while ((outpt < endpt) && (*inpt != '\0'))
937 			*outpt++ = *inpt++;
938 
939 		*outpt = '\0';
940 		if ((outpt == endpt) && (*inpt != '\0')) {
941 			if (prnt)
942 				paxwarn(1,"Replacement name too long %s >> %s",
943 				    name, nname);
944 			return(1);
945 		}
946 
947 		/*
948 		 * inform the user of the result if wanted
949 		 */
950 		if (prnt && (pt->flgs & PRNT)) {
951 			if (*nname == '\0')
952 				(void)fprintf(stderr,"%s >> <empty string>\n",
953 				    name);
954 			else
955 				(void)fprintf(stderr,"%s >> %s\n", name, nname);
956 		}
957 
958 		/*
959 		 * if empty inform the caller this file is to be skipped
960 		 * otherwise copy the new name over the orig name and return
961 		 */
962 		if (*nname == '\0')
963 			return(1);
964 		*nlen = l_strncpy(name, nname, PAXPATHLEN + 1);
965 		name[PAXPATHLEN] = '\0';
966 	}
967 	return(0);
968 }
969 
970 
971 /*
972  * resub()
973  *	apply the replacement to the matched expression. expand out the old
974  * 	style ed(1) subexpression expansion.
975  * Return:
976  *	-1 if error, or the number of characters added to the destination.
977  */
978 
979 static int
980 resub(regex_t *rp, regmatch_t *pm, char *orig, char *src, char *dest,
981 	char *destend)
982 {
983 	char *spt;
984 	char *dpt;
985 	char c;
986 	regmatch_t *pmpt;
987 	int len;
988 	int subexcnt;
989 
990 	spt =  src;
991 	dpt = dest;
992 	subexcnt = rp->re_nsub;
993 	while ((dpt < destend) && ((c = *spt++) != '\0')) {
994 		/*
995 		 * see if we just have an ordinary replacement character
996 		 * or we refer to a subexpression.
997 		 */
998 		if (c == '&') {
999 			pmpt = pm;
1000 		} else if ((c == '\\') && (*spt >= '0') && (*spt <= '9')) {
1001 			/*
1002 			 * make sure there is a subexpression as specified
1003 			 */
1004 			if ((len = *spt++ - '0') > subexcnt)
1005 				return(-1);
1006 			pmpt = pm + len;
1007 		} else {
1008  			/*
1009 			 * Ordinary character, just copy it
1010 			 */
1011  			if ((c == '\\') && ((*spt == '\\') || (*spt == '&')))
1012  				c = *spt++;
1013  			*dpt++ = c;
1014 			continue;
1015 		}
1016 
1017 		/*
1018 		 * continue if the subexpression is bogus
1019 		 */
1020 		if ((pmpt->rm_so < 0) || (pmpt->rm_eo < 0) ||
1021 		    ((len = pmpt->rm_eo - pmpt->rm_so) <= 0))
1022 			continue;
1023 
1024 		/*
1025 		 * copy the subexpression to the destination.
1026 		 * fail if we run out of space or the match string is damaged
1027 		 */
1028 		if (len > (destend - dpt))
1029 			len = destend - dpt;
1030 		if (l_strncpy(dpt, orig + pmpt->rm_so, len) != len)
1031 			return(-1);
1032 		dpt += len;
1033 	}
1034 	return(dpt - dest);
1035 }
1036