xref: /freebsd/contrib/libarchive/libarchive/archive_entry_link_resolver.c (revision 2e113ef82465598b8c26e0ca415fbe90677fbd47)
1 /*-
2  * Copyright (c) 2003-2007 Tim Kientzle
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR
15  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
16  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
17  * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT,
18  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
19  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
20  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
21  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
23  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24  */
25 
26 #include "archive_platform.h"
27 
28 #ifdef HAVE_SYS_STAT_H
29 #include <sys/stat.h>
30 #endif
31 #ifdef HAVE_ERRNO_H
32 #include <errno.h>
33 #endif
34 #include <stdio.h>
35 #ifdef HAVE_STDLIB_H
36 #include <stdlib.h>
37 #endif
38 #ifdef HAVE_STRING_H
39 #include <string.h>
40 #endif
41 
42 #include "archive.h"
43 #include "archive_entry.h"
44 
45 /*
46  * This is mostly a pretty straightforward hash table implementation.
47  * The only interesting bit is the different strategies used to
48  * match up links.  These strategies match those used by various
49  * archiving formats:
50  *   tar - content stored with first link, remainder refer back to it.
51  *       This requires us to match each subsequent link up with the
52  *       first appearance.
53  *   cpio - Old cpio just stored body with each link, match-ups were
54  *       implicit.  This is trivial.
55  *   new cpio - New cpio only stores body with last link, match-ups
56  *       are implicit.  This is actually quite tricky; see the notes
57  *       below.
58  */
59 
60 /* Users pass us a format code, we translate that into a strategy here. */
61 #define ARCHIVE_ENTRY_LINKIFY_LIKE_TAR	0
62 #define ARCHIVE_ENTRY_LINKIFY_LIKE_MTREE 1
63 #define ARCHIVE_ENTRY_LINKIFY_LIKE_OLD_CPIO 2
64 #define ARCHIVE_ENTRY_LINKIFY_LIKE_NEW_CPIO 3
65 
66 /* Initial size of link cache. */
67 #define	links_cache_initial_size 1024
68 
69 struct links_entry {
70 	struct links_entry	*next;
71 	struct links_entry	*previous;
72 	struct archive_entry	*canonical;
73 	struct archive_entry	*entry;
74 	size_t			 hash;
75 	unsigned int		 links; /* # links not yet seen */
76 };
77 
78 struct archive_entry_linkresolver {
79 	struct links_entry	**buckets;
80 	struct links_entry	 *spare;
81 	unsigned long		  number_entries;
82 	size_t			  number_buckets;
83 	int			  strategy;
84 };
85 
86 #define	NEXT_ENTRY_DEFERRED	1
87 #define	NEXT_ENTRY_PARTIAL	2
88 #define	NEXT_ENTRY_ALL		(NEXT_ENTRY_DEFERRED | NEXT_ENTRY_PARTIAL)
89 
90 static struct links_entry *find_entry(struct archive_entry_linkresolver *,
91 		    struct archive_entry *);
92 static void grow_hash(struct archive_entry_linkresolver *);
93 static struct links_entry *insert_entry(struct archive_entry_linkresolver *,
94 		    struct archive_entry *);
95 static struct links_entry *next_entry(struct archive_entry_linkresolver *,
96     int);
97 
98 struct archive_entry_linkresolver *
archive_entry_linkresolver_new(void)99 archive_entry_linkresolver_new(void)
100 {
101 	struct archive_entry_linkresolver *res;
102 
103 	/* Check for positive power-of-two */
104 	if (links_cache_initial_size == 0 ||
105 	    (links_cache_initial_size & (links_cache_initial_size - 1)) != 0)
106 		return (NULL);
107 
108 	res = calloc(1, sizeof(struct archive_entry_linkresolver));
109 	if (res == NULL)
110 		return (NULL);
111 	res->number_buckets = links_cache_initial_size;
112 	res->buckets = calloc(res->number_buckets, sizeof(res->buckets[0]));
113 	if (res->buckets == NULL) {
114 		free(res);
115 		return (NULL);
116 	}
117 	return (res);
118 }
119 
120 void
archive_entry_linkresolver_set_strategy(struct archive_entry_linkresolver * res,int fmt)121 archive_entry_linkresolver_set_strategy(struct archive_entry_linkresolver *res,
122     int fmt)
123 {
124 	int fmtbase = fmt & ARCHIVE_FORMAT_BASE_MASK;
125 
126 	switch (fmtbase) {
127 	case ARCHIVE_FORMAT_7ZIP:
128 	case ARCHIVE_FORMAT_AR:
129 	case ARCHIVE_FORMAT_ZIP:
130 		res->strategy = ARCHIVE_ENTRY_LINKIFY_LIKE_OLD_CPIO;
131 		break;
132 	case ARCHIVE_FORMAT_CPIO:
133 		switch (fmt) {
134 		case ARCHIVE_FORMAT_CPIO_SVR4_NOCRC:
135 		case ARCHIVE_FORMAT_CPIO_SVR4_CRC:
136 			res->strategy = ARCHIVE_ENTRY_LINKIFY_LIKE_NEW_CPIO;
137 			break;
138 		default:
139 			res->strategy = ARCHIVE_ENTRY_LINKIFY_LIKE_OLD_CPIO;
140 			break;
141 		}
142 		break;
143 	case ARCHIVE_FORMAT_MTREE:
144 		res->strategy = ARCHIVE_ENTRY_LINKIFY_LIKE_MTREE;
145 		break;
146 	case ARCHIVE_FORMAT_ISO9660:
147 	case ARCHIVE_FORMAT_SHAR:
148 	case ARCHIVE_FORMAT_TAR:
149 	case ARCHIVE_FORMAT_XAR:
150 		res->strategy = ARCHIVE_ENTRY_LINKIFY_LIKE_TAR;
151 		break;
152 	default:
153 		res->strategy = ARCHIVE_ENTRY_LINKIFY_LIKE_OLD_CPIO;
154 		break;
155 	}
156 }
157 
158 void
archive_entry_linkresolver_free(struct archive_entry_linkresolver * res)159 archive_entry_linkresolver_free(struct archive_entry_linkresolver *res)
160 {
161 	struct links_entry *le;
162 
163 	if (res == NULL)
164 		return;
165 
166 	while ((le = next_entry(res, NEXT_ENTRY_ALL)) != NULL)
167 		archive_entry_free(le->entry);
168 	free(res->buckets);
169 	free(res);
170 }
171 
172 void
archive_entry_linkify(struct archive_entry_linkresolver * res,struct archive_entry ** e,struct archive_entry ** f)173 archive_entry_linkify(struct archive_entry_linkresolver *res,
174     struct archive_entry **e, struct archive_entry **f)
175 {
176 	struct links_entry *le;
177 	struct archive_entry *t;
178 
179 	*f = NULL; /* Default: Don't return a second entry. */
180 
181 	if (*e == NULL) {
182 		le = next_entry(res, NEXT_ENTRY_DEFERRED);
183 		if (le != NULL) {
184 			*e = le->entry;
185 			le->entry = NULL;
186 		}
187 		return;
188 	}
189 
190 	/* If it has only one link, then we're done. */
191 	if (archive_entry_nlink(*e) == 1)
192 		return;
193 	/* Directories, devices never have hardlinks. */
194 	if (archive_entry_filetype(*e) == AE_IFDIR
195 	    || archive_entry_filetype(*e) == AE_IFBLK
196 	    || archive_entry_filetype(*e) == AE_IFCHR)
197 		return;
198 
199 	switch (res->strategy) {
200 	case ARCHIVE_ENTRY_LINKIFY_LIKE_TAR:
201 		le = find_entry(res, *e);
202 		if (le != NULL) {
203 			archive_entry_unset_size(*e);
204 #if defined(_WIN32) && !defined(__CYGWIN__)
205 			archive_entry_copy_hardlink_w(*e,
206 			    archive_entry_pathname_w(le->canonical));
207 #else
208 			archive_entry_copy_hardlink(*e,
209 			    archive_entry_pathname(le->canonical));
210 #endif
211 		} else
212 			insert_entry(res, *e);
213 		return;
214 	case ARCHIVE_ENTRY_LINKIFY_LIKE_MTREE:
215 		le = find_entry(res, *e);
216 		if (le != NULL) {
217 #if defined(_WIN32) && !defined(__CYGWIN__)
218 			archive_entry_copy_hardlink_w(*e,
219 			    archive_entry_pathname_w(le->canonical));
220 #else
221 			archive_entry_copy_hardlink(*e,
222 			    archive_entry_pathname(le->canonical));
223 #endif
224 		} else
225 			insert_entry(res, *e);
226 		return;
227 	case ARCHIVE_ENTRY_LINKIFY_LIKE_OLD_CPIO:
228 		/* This one is trivial. */
229 		return;
230 	case ARCHIVE_ENTRY_LINKIFY_LIKE_NEW_CPIO:
231 		le = find_entry(res, *e);
232 		if (le != NULL) {
233 			/*
234 			 * Put the new entry in le, return the
235 			 * old entry from le.
236 			 */
237 			t = *e;
238 			*e = le->entry;
239 			le->entry = t;
240 			/* Make the old entry into a hardlink. */
241 			archive_entry_unset_size(*e);
242 #if defined(_WIN32) && !defined(__CYGWIN__)
243 			archive_entry_copy_hardlink_w(*e,
244 			    archive_entry_pathname_w(le->canonical));
245 #else
246 			archive_entry_copy_hardlink(*e,
247 			    archive_entry_pathname(le->canonical));
248 #endif
249 			/* If we ran out of links, return the
250 			 * final entry as well. */
251 			if (le->links == 0) {
252 				*f = le->entry;
253 				le->entry = NULL;
254 			}
255 		} else {
256 			/*
257 			 * If we haven't seen it, tuck it away
258 			 * for future use.
259 			 */
260 			le = insert_entry(res, *e);
261 			if (le == NULL)
262 				/* XXX We should return an error code XXX */
263 				return;
264 			le->entry = *e;
265 			*e = NULL;
266 		}
267 		return;
268 	default:
269 		break;
270 	}
271 	return;
272 }
273 
274 static struct links_entry *
find_entry(struct archive_entry_linkresolver * res,struct archive_entry * entry)275 find_entry(struct archive_entry_linkresolver *res,
276     struct archive_entry *entry)
277 {
278 	struct links_entry	*le;
279 	size_t			 hash, bucket;
280 	dev_t			 dev;
281 	int64_t			 ino;
282 
283 	if (!archive_entry_ino_is_set(entry) || !archive_entry_dev_is_set(entry)) {
284 		return (NULL);
285 	}
286 
287 	/* Free a held entry. */
288 	if (res->spare != NULL) {
289 		archive_entry_free(res->spare->canonical);
290 		archive_entry_free(res->spare->entry);
291 		free(res->spare);
292 		res->spare = NULL;
293 	}
294 
295 	dev = archive_entry_dev(entry);
296 	ino = archive_entry_ino64(entry);
297 	hash = (size_t)(dev ^ ino);
298 
299 	/* Try to locate this entry in the links cache. */
300 	bucket = hash & (res->number_buckets - 1);
301 	for (le = res->buckets[bucket]; le != NULL; le = le->next) {
302 		if (le->hash == hash
303 		    && dev == archive_entry_dev(le->canonical)
304 		    && ino == archive_entry_ino64(le->canonical)) {
305 			/*
306 			 * Decrement link count each time and release
307 			 * the entry if it hits zero.  This saves
308 			 * memory and is necessary for detecting
309 			 * missed links.
310 			 */
311 			--le->links;
312 			if (le->links > 0)
313 				return (le);
314 			/* Remove it from this hash bucket. */
315 			if (le->previous != NULL)
316 				le->previous->next = le->next;
317 			if (le->next != NULL)
318 				le->next->previous = le->previous;
319 			if (res->buckets[bucket] == le)
320 				res->buckets[bucket] = le->next;
321 			res->number_entries--;
322 			/* Defer freeing this entry. */
323 			res->spare = le;
324 			return (le);
325 		}
326 	}
327 	return (NULL);
328 }
329 
330 static struct links_entry *
next_entry(struct archive_entry_linkresolver * res,int mode)331 next_entry(struct archive_entry_linkresolver *res, int mode)
332 {
333 	struct links_entry	*le;
334 	size_t			 bucket;
335 
336 	/* Free a held entry. */
337 	if (res->spare != NULL) {
338 		archive_entry_free(res->spare->canonical);
339 		archive_entry_free(res->spare->entry);
340 		free(res->spare);
341 		res->spare = NULL;
342 	}
343 
344 	/* Look for next non-empty bucket in the links cache. */
345 	for (bucket = 0; bucket < res->number_buckets; bucket++) {
346 		for (le = res->buckets[bucket]; le != NULL; le = le->next) {
347 			if (le->entry != NULL &&
348 			    (mode & NEXT_ENTRY_DEFERRED) == 0)
349 				continue;
350 			if (le->entry == NULL &&
351 			    (mode & NEXT_ENTRY_PARTIAL) == 0)
352 				continue;
353 			/* Remove it from this hash bucket. */
354 			if (le->next != NULL)
355 				le->next->previous = le->previous;
356 			if (le->previous != NULL)
357 				le->previous->next = le->next;
358 			else
359 				res->buckets[bucket] = le->next;
360 			res->number_entries--;
361 			/* Defer freeing this entry. */
362 			res->spare = le;
363 			return (le);
364 		}
365 	}
366 	return (NULL);
367 }
368 
369 static struct links_entry *
insert_entry(struct archive_entry_linkresolver * res,struct archive_entry * entry)370 insert_entry(struct archive_entry_linkresolver *res,
371     struct archive_entry *entry)
372 {
373 	struct links_entry *le;
374 	size_t hash, bucket;
375 
376 	if (!archive_entry_ino_is_set(entry) || !archive_entry_dev_is_set(entry)) {
377 		return (NULL);
378 	}
379 
380 	/* Add this entry to the links cache. */
381 	le = calloc(1, sizeof(struct links_entry));
382 	if (le == NULL)
383 		return (NULL);
384 	le->canonical = archive_entry_clone(entry);
385 
386 	/* If the links cache is getting too full, enlarge the hash table. */
387 	if (res->number_entries > res->number_buckets * 2)
388 		grow_hash(res);
389 
390 	hash = (size_t)(archive_entry_dev(entry) ^ archive_entry_ino64(entry));
391 	bucket = hash & (res->number_buckets - 1);
392 
393 	/* If we could allocate the entry, record it. */
394 	if (res->buckets[bucket] != NULL)
395 		res->buckets[bucket]->previous = le;
396 	res->number_entries++;
397 	le->next = res->buckets[bucket];
398 	le->previous = NULL;
399 	res->buckets[bucket] = le;
400 	le->hash = hash;
401 	le->links = archive_entry_nlink(entry) - 1;
402 	return (le);
403 }
404 
405 static void
grow_hash(struct archive_entry_linkresolver * res)406 grow_hash(struct archive_entry_linkresolver *res)
407 {
408 	struct links_entry *le, **new_buckets;
409 	size_t new_size;
410 	size_t i, bucket;
411 
412 	/* Try to enlarge the bucket list. */
413 	new_size = res->number_buckets * 2;
414 	if (new_size < res->number_buckets)
415 		return;
416 	new_buckets = calloc(new_size, sizeof(struct links_entry *));
417 
418 	if (new_buckets == NULL)
419 		return;
420 
421 	for (i = 0; i < res->number_buckets; i++) {
422 		while (res->buckets[i] != NULL) {
423 			/* Remove entry from old bucket. */
424 			le = res->buckets[i];
425 			res->buckets[i] = le->next;
426 
427 			/* Add entry to new bucket. */
428 			bucket = le->hash & (new_size - 1);
429 
430 			if (new_buckets[bucket] != NULL)
431 				new_buckets[bucket]->previous = le;
432 			le->next = new_buckets[bucket];
433 			le->previous = NULL;
434 			new_buckets[bucket] = le;
435 		}
436 	}
437 	free(res->buckets);
438 	res->buckets = new_buckets;
439 	res->number_buckets = new_size;
440 }
441 
442 struct archive_entry *
archive_entry_partial_links(struct archive_entry_linkresolver * res,unsigned int * links)443 archive_entry_partial_links(struct archive_entry_linkresolver *res,
444     unsigned int *links)
445 {
446 	struct archive_entry	*e;
447 	struct links_entry	*le;
448 
449 	/* Free a held entry. */
450 	if (res->spare != NULL) {
451 		archive_entry_free(res->spare->canonical);
452 		archive_entry_free(res->spare->entry);
453 		free(res->spare);
454 		res->spare = NULL;
455 	}
456 
457 	le = next_entry(res, NEXT_ENTRY_PARTIAL);
458 	if (le != NULL) {
459 		e = le->canonical;
460 		if (links != NULL)
461 			*links = le->links;
462 		le->canonical = NULL;
463 	} else {
464 		e = NULL;
465 		if (links != NULL)
466 			*links = 0;
467 	}
468 	return (e);
469 }
470