xref: /freebsd/contrib/libarchive/tar/subst.c (revision b64c5a0ace59af62eff52bfe110a521dc73c937b)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause
3  *
4  * Copyright (c) 2008 Joerg Sonnenberger
5  * All rights reserved.
6  */
7 
8 #include "bsdtar_platform.h"
9 
10 #if defined(HAVE_REGEX_H) || defined(HAVE_PCREPOSIX_H) || defined(HAVE_PCRE2POSIX_H)
11 #include "bsdtar.h"
12 
13 #include <errno.h>
14 #if defined(HAVE_PCREPOSIX_H)
15 #include <pcreposix.h>
16 #elif defined(HAVE_PCRE2POSIX_H)
17 #include <pcre2posix.h>
18 #else
19 #include <regex.h>
20 #endif
21 #include <stdlib.h>
22 #include <string.h>
23 
24 #ifndef REG_BASIC
25 #define	REG_BASIC 0
26 #endif
27 
28 #include "err.h"
29 
30 struct subst_rule {
31 	struct subst_rule *next;
32 	regex_t re;
33 	char *result;
34 	unsigned int global:1, print:1, regular:1, symlink:1, hardlink:1, from_begin:1;
35 };
36 
37 struct substitution {
38 	struct subst_rule *first_rule, *last_rule;
39 };
40 
41 static void
42 init_substitution(struct bsdtar *bsdtar)
43 {
44 	struct substitution *subst;
45 
46 	bsdtar->substitution = subst = malloc(sizeof(*subst));
47 	if (subst == NULL)
48 		lafe_errc(1, errno, "Out of memory");
49 	subst->first_rule = subst->last_rule = NULL;
50 }
51 
52 void
53 add_substitution(struct bsdtar *bsdtar, const char *rule_text)
54 {
55 	struct subst_rule *rule;
56 	struct substitution *subst;
57 	const char *end_pattern, *start_subst;
58 	char *pattern;
59 	int r;
60 
61 	if ((subst = bsdtar->substitution) == NULL) {
62 		init_substitution(bsdtar);
63 		subst = bsdtar->substitution;
64 	}
65 
66 	rule = malloc(sizeof(*rule));
67 	if (rule == NULL)
68 		lafe_errc(1, errno, "Out of memory");
69 	rule->next = NULL;
70 	rule->result = NULL;
71 
72 	if (subst->last_rule == NULL)
73 		subst->first_rule = rule;
74 	else
75 		subst->last_rule->next = rule;
76 	subst->last_rule = rule;
77 
78 	if (*rule_text == '\0')
79 		lafe_errc(1, 0, "Empty replacement string");
80 	end_pattern = strchr(rule_text + 1, *rule_text);
81 	if (end_pattern == NULL)
82 		lafe_errc(1, 0, "Invalid replacement string");
83 
84 	pattern = malloc(end_pattern - rule_text);
85 	if (pattern == NULL)
86 		lafe_errc(1, errno, "Out of memory");
87 	memcpy(pattern, rule_text + 1, end_pattern - rule_text - 1);
88 	pattern[end_pattern - rule_text - 1] = '\0';
89 
90 	if ((r = regcomp(&rule->re, pattern, REG_BASIC)) != 0) {
91 		char buf[80];
92 		regerror(r, &rule->re, buf, sizeof(buf));
93 		lafe_errc(1, 0, "Invalid regular expression: %s", buf);
94 	}
95 	free(pattern);
96 
97 	start_subst = end_pattern + 1;
98 	end_pattern = strchr(start_subst, *rule_text);
99 	if (end_pattern == NULL)
100 		lafe_errc(1, 0, "Invalid replacement string");
101 
102 	rule->result = malloc(end_pattern - start_subst + 1);
103 	if (rule->result == NULL)
104 		lafe_errc(1, errno, "Out of memory");
105 	memcpy(rule->result, start_subst, end_pattern - start_subst);
106 	rule->result[end_pattern - start_subst] = '\0';
107 
108 	/* Defaults */
109 	rule->global = 0; /* Don't do multiple replacements. */
110 	rule->print = 0; /* Don't print. */
111 	rule->regular = 1; /* Rewrite regular filenames. */
112 	rule->symlink = 1; /* Rewrite symlink targets. */
113 	rule->hardlink = 1; /* Rewrite hardlink targets. */
114 	rule->from_begin = 0; /* Don't match from start. */
115 
116 	while (*++end_pattern) {
117 		switch (*end_pattern) {
118 		case 'b':
119 		case 'B':
120 			rule->from_begin = 1;
121 			break;
122 		case 'g':
123 		case 'G':
124 			rule->global = 1;
125 			break;
126 		case 'h':
127 			rule->hardlink = 1;
128 			break;
129 		case 'H':
130 			rule->hardlink = 0;
131 			break;
132 		case 'p':
133 		case 'P':
134 			rule->print = 1;
135 			break;
136 		case 'r':
137 			rule->regular = 1;
138 			break;
139 		case 'R':
140 			rule->regular = 0;
141 			break;
142 		case 's':
143 			rule->symlink = 1;
144 			break;
145 		case 'S':
146 			rule->symlink = 0;
147 			break;
148 		default:
149 			lafe_errc(1, 0, "Invalid replacement flag %c", *end_pattern);
150 			/* NOTREACHED */
151 		}
152 	}
153 }
154 
155 static void
156 realloc_strncat(char **str, const char *append, size_t len)
157 {
158 	char *new_str;
159 	size_t old_len;
160 
161 	if (*str == NULL)
162 		old_len = 0;
163 	else
164 		old_len = strlen(*str);
165 
166 	new_str = malloc(old_len + len + 1);
167 	if (new_str == NULL)
168 		lafe_errc(1, errno, "Out of memory");
169 	if (*str != NULL)
170 		memcpy(new_str, *str, old_len);
171 	memcpy(new_str + old_len, append, len);
172 	new_str[old_len + len] = '\0';
173 	free(*str);
174 	*str = new_str;
175 }
176 
177 static void
178 realloc_strcat(char **str, const char *append)
179 {
180 	char *new_str;
181 	size_t old_len;
182 
183 	if (*str == NULL)
184 		old_len = 0;
185 	else
186 		old_len = strlen(*str);
187 
188 	new_str = malloc(old_len + strlen(append) + 1);
189 	if (new_str == NULL)
190 		lafe_errc(1, errno, "Out of memory");
191 	if (*str != NULL)
192 		memcpy(new_str, *str, old_len);
193 	strcpy(new_str + old_len, append);
194 	free(*str);
195 	*str = new_str;
196 }
197 
198 int
199 apply_substitution(struct bsdtar *bsdtar, const char *name, char **result,
200     int symlink_target, int hardlink_target)
201 {
202 	const char *path = name;
203 	regmatch_t matches[10];
204 	char* buffer = NULL;
205 	size_t i, j;
206 	struct subst_rule *rule;
207 	struct substitution *subst;
208 	int c, got_match, print_match;
209 
210 	*result = NULL;
211 
212 	if ((subst = bsdtar->substitution) == NULL)
213 		return 0;
214 
215 	got_match = 0;
216 	print_match = 0;
217 
218 	for (rule = subst->first_rule; rule != NULL; rule = rule->next) {
219 		if (symlink_target) {
220 			if (!rule->symlink)
221 				continue;
222 		} else if (hardlink_target) {
223 			if (!rule->hardlink)
224 				continue;
225 		} else { /* Regular filename. */
226 			if (!rule->regular)
227 				continue;
228 		}
229 
230 		if (rule->from_begin && *result) {
231 			realloc_strcat(result, name);
232 			realloc_strcat(&buffer, *result);
233 			name = buffer;
234 			(*result)[0] = 0;
235 		}
236 
237 		while (1) {
238 			if (regexec(&rule->re, name, 10, matches, 0))
239 				break;
240 
241 			got_match = 1;
242 			print_match |= rule->print;
243 			realloc_strncat(result, name, matches[0].rm_so);
244 
245 			for (i = 0, j = 0; rule->result[i] != '\0'; ++i) {
246 				if (rule->result[i] == '~') {
247 					realloc_strncat(result, rule->result + j, i - j);
248 					realloc_strncat(result,
249 					    name + matches[0].rm_so,
250 					    matches[0].rm_eo - matches[0].rm_so);
251 					j = i + 1;
252 					continue;
253 				}
254 				if (rule->result[i] != '\\')
255 					continue;
256 
257 				++i;
258 				c = rule->result[i];
259 				switch (c) {
260 				case '~':
261 				case '\\':
262 					realloc_strncat(result, rule->result + j, i - j - 1);
263 					j = i;
264 					break;
265 				case '1':
266 				case '2':
267 				case '3':
268 				case '4':
269 				case '5':
270 				case '6':
271 				case '7':
272 				case '8':
273 				case '9':
274 					realloc_strncat(result, rule->result + j, i - j - 1);
275 					if ((size_t)(c - '0') > (size_t)(rule->re.re_nsub)) {
276 						free(buffer);
277 						free(*result);
278 						*result = NULL;
279 						return -1;
280 					}
281 					realloc_strncat(result, name + matches[c - '0'].rm_so, matches[c - '0'].rm_eo - matches[c - '0'].rm_so);
282 					j = i + 1;
283 					break;
284 				default:
285 					/* Just continue; */
286 					break;
287 				}
288 
289 			}
290 
291 			realloc_strcat(result, rule->result + j);
292 
293 			name += matches[0].rm_eo;
294 
295 			if (!rule->global)
296 				break;
297 		}
298 	}
299 
300 	if (got_match)
301 		realloc_strcat(result, name);
302 
303 	free(buffer);
304 
305 	if (print_match)
306 		fprintf(stderr, "%s >> %s\n", path, *result);
307 
308 	return got_match;
309 }
310 
311 void
312 cleanup_substitution(struct bsdtar *bsdtar)
313 {
314 	struct subst_rule *rule;
315 	struct substitution *subst;
316 
317 	if ((subst = bsdtar->substitution) == NULL)
318 		return;
319 
320 	while ((rule = subst->first_rule) != NULL) {
321 		subst->first_rule = rule->next;
322 		free(rule->result);
323 		regfree(&rule->re);
324 		free(rule);
325 	}
326 	free(subst);
327 }
328 #endif /* defined(HAVE_REGEX_H) || defined(HAVE_PCREPOSIX_H) || defined(HAVE_PCRE2POSIX_H) */
329