xref: /freebsd/contrib/libarchive/tar/subst.c (revision c1e033c33e8b290cd40f4069249c879efcbae6a6)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause
3  *
4  * Copyright (c) 2008 Joerg Sonnenberger
5  * All rights reserved.
6  */
7 
8 #include "bsdtar_platform.h"
9 
10 #if defined(HAVE_REGEX_H) || defined(HAVE_PCREPOSIX_H) || defined(HAVE_PCRE2POSIX_H)
11 #include "bsdtar.h"
12 
13 #include <errno.h>
14 #if defined(HAVE_PCREPOSIX_H)
15 #include <pcreposix.h>
16 #elif defined(HAVE_PCRE2POSIX_H)
17 #include <pcre2posix.h>
18 #else
19 #include <regex.h>
20 #endif
21 #include <stdlib.h>
22 #include <string.h>
23 
24 #ifndef REG_BASIC
25 #define	REG_BASIC 0
26 #endif
27 
28 #include "lafe_err.h"
29 
30 struct subst_rule {
31 	struct subst_rule *next;
32 	regex_t re;
33 	char *result;
34 	unsigned int global:1, print:1, regular:1, symlink:1, hardlink:1, from_begin:1;
35 };
36 
37 struct substitution {
38 	struct subst_rule *first_rule, *last_rule;
39 };
40 
41 static void
init_substitution(struct bsdtar * bsdtar)42 init_substitution(struct bsdtar *bsdtar)
43 {
44 	struct substitution *subst;
45 
46 	bsdtar->substitution = subst = malloc(sizeof(*subst));
47 	if (subst == NULL)
48 		lafe_errc(1, errno, "Out of memory");
49 	subst->first_rule = subst->last_rule = NULL;
50 }
51 
52 void
add_substitution(struct bsdtar * bsdtar,const char * rule_text)53 add_substitution(struct bsdtar *bsdtar, const char *rule_text)
54 {
55 	struct subst_rule *rule;
56 	struct substitution *subst;
57 	const char *end_pattern, *start_subst;
58 	char *pattern;
59 	int r;
60 
61 	if ((subst = bsdtar->substitution) == NULL) {
62 		init_substitution(bsdtar);
63 		subst = bsdtar->substitution;
64 	}
65 
66 	rule = malloc(sizeof(*rule));
67 	if (rule == NULL)
68 		lafe_errc(1, errno, "Out of memory");
69 	rule->next = NULL;
70 	rule->result = NULL;
71 
72 	if (subst->last_rule == NULL)
73 		subst->first_rule = rule;
74 	else
75 		subst->last_rule->next = rule;
76 	subst->last_rule = rule;
77 
78 	if (*rule_text == '\0')
79 		lafe_errc(1, 0, "Empty replacement string");
80 	end_pattern = strchr(rule_text + 1, *rule_text);
81 	if (end_pattern == NULL)
82 		lafe_errc(1, 0, "Invalid replacement string");
83 
84 	pattern = malloc(end_pattern - rule_text);
85 	if (pattern == NULL)
86 		lafe_errc(1, errno, "Out of memory");
87 	memcpy(pattern, rule_text + 1, end_pattern - rule_text - 1);
88 	pattern[end_pattern - rule_text - 1] = '\0';
89 
90 	if ((r = regcomp(&rule->re, pattern, REG_BASIC)) != 0) {
91 		char buf[80];
92 		regerror(r, &rule->re, buf, sizeof(buf));
93 		lafe_errc(1, 0, "Invalid regular expression: %s", buf);
94 	}
95 	free(pattern);
96 
97 	start_subst = end_pattern + 1;
98 	end_pattern = strchr(start_subst, *rule_text);
99 	if (end_pattern == NULL)
100 		lafe_errc(1, 0, "Invalid replacement string");
101 
102 	rule->result = malloc(end_pattern - start_subst + 1);
103 	if (rule->result == NULL)
104 		lafe_errc(1, errno, "Out of memory");
105 	memcpy(rule->result, start_subst, end_pattern - start_subst);
106 	rule->result[end_pattern - start_subst] = '\0';
107 
108 	/* Defaults */
109 	rule->global = 0; /* Don't do multiple replacements. */
110 	rule->print = 0; /* Don't print. */
111 	rule->regular = 1; /* Rewrite regular filenames. */
112 	rule->symlink = 1; /* Rewrite symlink targets. */
113 	rule->hardlink = 1; /* Rewrite hardlink targets. */
114 	rule->from_begin = 0; /* Don't match from start. */
115 
116 	while (*++end_pattern) {
117 		switch (*end_pattern) {
118 		case 'b':
119 		case 'B':
120 			rule->from_begin = 1;
121 			break;
122 		case 'g':
123 		case 'G':
124 			rule->global = 1;
125 			break;
126 		case 'h':
127 			rule->hardlink = 1;
128 			break;
129 		case 'H':
130 			rule->hardlink = 0;
131 			break;
132 		case 'p':
133 		case 'P':
134 			rule->print = 1;
135 			break;
136 		case 'r':
137 			rule->regular = 1;
138 			break;
139 		case 'R':
140 			rule->regular = 0;
141 			break;
142 		case 's':
143 			rule->symlink = 1;
144 			break;
145 		case 'S':
146 			rule->symlink = 0;
147 			break;
148 		default:
149 			lafe_errc(1, 0, "Invalid replacement flag %c", *end_pattern);
150 			/* NOTREACHED */
151 		}
152 	}
153 }
154 
155 static void
realloc_strncat(char ** str,const char * append,size_t len)156 realloc_strncat(char **str, const char *append, size_t len)
157 {
158 	char *new_str;
159 	size_t old_len;
160 
161 	if (*str == NULL)
162 		old_len = 0;
163 	else
164 		old_len = strlen(*str);
165 
166 	new_str = malloc(old_len + len + 1);
167 	if (new_str == NULL)
168 		lafe_errc(1, errno, "Out of memory");
169 	if (*str != NULL)
170 		memcpy(new_str, *str, old_len);
171 	memcpy(new_str + old_len, append, len);
172 	new_str[old_len + len] = '\0';
173 	free(*str);
174 	*str = new_str;
175 }
176 
177 static void
realloc_strcat(char ** str,const char * append)178 realloc_strcat(char **str, const char *append)
179 {
180 	char *new_str;
181 	size_t old_len;
182 
183 	if (*str == NULL)
184 		old_len = 0;
185 	else
186 		old_len = strlen(*str);
187 
188 	new_str = malloc(old_len + strlen(append) + 1);
189 	if (new_str == NULL)
190 		lafe_errc(1, errno, "Out of memory");
191 	if (*str != NULL)
192 		memcpy(new_str, *str, old_len);
193 	strcpy(new_str + old_len, append);
194 	free(*str);
195 	*str = new_str;
196 }
197 
198 int
apply_substitution(struct bsdtar * bsdtar,const char * name,char ** result,int symlink_target,int hardlink_target)199 apply_substitution(struct bsdtar *bsdtar, const char *name, char **result,
200     int symlink_target, int hardlink_target)
201 {
202 	const char *path = name;
203 	regmatch_t matches[10];
204 	char* buffer = NULL;
205 	size_t i, j;
206 	struct subst_rule *rule;
207 	struct substitution *subst;
208 	int c, got_match, print_match;
209 
210 	*result = NULL;
211 
212 	if ((subst = bsdtar->substitution) == NULL)
213 		return 0;
214 
215 	got_match = 0;
216 	print_match = 0;
217 
218 	for (rule = subst->first_rule; rule != NULL; rule = rule->next) {
219 		if (symlink_target) {
220 			if (!rule->symlink)
221 				continue;
222 		} else if (hardlink_target) {
223 			if (!rule->hardlink)
224 				continue;
225 		} else { /* Regular filename. */
226 			if (!rule->regular)
227 				continue;
228 		}
229 
230 		if (rule->from_begin && *result) {
231 			realloc_strcat(result, name);
232 			if (buffer) buffer[0] = 0;
233 			realloc_strcat(&buffer, *result);
234 			name = buffer;
235 			(*result)[0] = 0;
236 		}
237 
238 		char isEnd = 0;
239 		do {
240             isEnd = *name == '\0';
241 			if (regexec(&rule->re, name, 10, matches, 0))
242 				break;
243 
244 			got_match = 1;
245 			print_match |= rule->print;
246 			realloc_strncat(result, name, matches[0].rm_so);
247 
248 			for (i = 0, j = 0; rule->result[i] != '\0'; ++i) {
249 				if (rule->result[i] == '~') {
250 					realloc_strncat(result, rule->result + j, i - j);
251 					realloc_strncat(result,
252 					    name + matches[0].rm_so,
253 					    matches[0].rm_eo - matches[0].rm_so);
254 					j = i + 1;
255 					continue;
256 				}
257 				if (rule->result[i] != '\\')
258 					continue;
259 
260 				++i;
261 				c = rule->result[i];
262 				switch (c) {
263 				case '~':
264 				case '\\':
265 					realloc_strncat(result, rule->result + j, i - j - 1);
266 					j = i;
267 					break;
268 				case '1':
269 				case '2':
270 				case '3':
271 				case '4':
272 				case '5':
273 				case '6':
274 				case '7':
275 				case '8':
276 				case '9':
277 					realloc_strncat(result, rule->result + j, i - j - 1);
278 					if ((size_t)(c - '0') > (size_t)(rule->re.re_nsub)) {
279 						free(buffer);
280 						free(*result);
281 						*result = NULL;
282 						return -1;
283 					}
284 					realloc_strncat(result, name + matches[c - '0'].rm_so, matches[c - '0'].rm_eo - matches[c - '0'].rm_so);
285 					j = i + 1;
286 					break;
287 				default:
288 					/* Just continue; */
289 					break;
290 				}
291 
292 			}
293 
294 			realloc_strcat(result, rule->result + j);
295 			if (matches[0].rm_eo > 0) {
296                 name += matches[0].rm_eo;
297             } else {
298                 // We skip a character because the match is 0-length
299                 // so we need to add it to the output
300                 realloc_strncat(result, name, 1);
301                 name += 1;
302             }
303 		} while (rule->global && !isEnd); // Testing one step after because sed et al. run 0-length patterns a last time on the empty string at the end
304 	}
305 
306 	if (got_match)
307 		realloc_strcat(result, name);
308 
309 	free(buffer);
310 
311 	if (print_match)
312 		fprintf(stderr, "%s >> %s\n", path, *result);
313 
314 	return got_match;
315 }
316 
317 void
cleanup_substitution(struct bsdtar * bsdtar)318 cleanup_substitution(struct bsdtar *bsdtar)
319 {
320 	struct subst_rule *rule;
321 	struct substitution *subst;
322 
323 	if ((subst = bsdtar->substitution) == NULL)
324 		return;
325 
326 	while ((rule = subst->first_rule) != NULL) {
327 		subst->first_rule = rule->next;
328 		free(rule->result);
329 		regfree(&rule->re);
330 		free(rule);
331 	}
332 	free(subst);
333 }
334 #endif /* defined(HAVE_REGEX_H) || defined(HAVE_PCREPOSIX_H) || defined(HAVE_PCRE2POSIX_H) */
335