1 /*-
2 * Copyright (c) 2003-2007 Tim Kientzle
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR
15 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
16 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
17 * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT,
18 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
19 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
20 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
21 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
23 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24 */
25
26 #include "archive_platform.h"
27
28 #ifdef HAVE_SYS_STAT_H
29 #include <sys/stat.h>
30 #endif
31 #ifdef HAVE_ERRNO_H
32 #include <errno.h>
33 #endif
34 #include <stdio.h>
35 #ifdef HAVE_STDLIB_H
36 #include <stdlib.h>
37 #endif
38 #ifdef HAVE_STRING_H
39 #include <string.h>
40 #endif
41
42 #include "archive.h"
43 #include "archive_entry.h"
44
45 /*
46 * This is mostly a pretty straightforward hash table implementation.
47 * The only interesting bit is the different strategies used to
48 * match up links. These strategies match those used by various
49 * archiving formats:
50 * tar - content stored with first link, remainder refer back to it.
51 * This requires us to match each subsequent link up with the
52 * first appearance.
53 * cpio - Old cpio just stored body with each link, match-ups were
54 * implicit. This is trivial.
55 * new cpio - New cpio only stores body with last link, match-ups
56 * are implicit. This is actually quite tricky; see the notes
57 * below.
58 */
59
60 /* Users pass us a format code, we translate that into a strategy here. */
61 #define ARCHIVE_ENTRY_LINKIFY_LIKE_TAR 0
62 #define ARCHIVE_ENTRY_LINKIFY_LIKE_MTREE 1
63 #define ARCHIVE_ENTRY_LINKIFY_LIKE_OLD_CPIO 2
64 #define ARCHIVE_ENTRY_LINKIFY_LIKE_NEW_CPIO 3
65
66 /* Initial size of link cache. */
67 #define links_cache_initial_size 1024
68
69 struct links_entry {
70 struct links_entry *next;
71 struct links_entry *previous;
72 struct archive_entry *canonical;
73 struct archive_entry *entry;
74 size_t hash;
75 unsigned int links; /* # links not yet seen */
76 };
77
78 struct archive_entry_linkresolver {
79 struct links_entry **buckets;
80 struct links_entry *spare;
81 unsigned long number_entries;
82 size_t number_buckets;
83 int strategy;
84 };
85
86 #define NEXT_ENTRY_DEFERRED 1
87 #define NEXT_ENTRY_PARTIAL 2
88 #define NEXT_ENTRY_ALL (NEXT_ENTRY_DEFERRED | NEXT_ENTRY_PARTIAL)
89
90 static struct links_entry *find_entry(struct archive_entry_linkresolver *,
91 struct archive_entry *);
92 static void grow_hash(struct archive_entry_linkresolver *);
93 static struct links_entry *insert_entry(struct archive_entry_linkresolver *,
94 struct archive_entry *);
95 static struct links_entry *next_entry(struct archive_entry_linkresolver *,
96 int);
97
98 struct archive_entry_linkresolver *
archive_entry_linkresolver_new(void)99 archive_entry_linkresolver_new(void)
100 {
101 struct archive_entry_linkresolver *res;
102
103 /* Check for positive power-of-two */
104 if (links_cache_initial_size == 0 ||
105 (links_cache_initial_size & (links_cache_initial_size - 1)) != 0)
106 return (NULL);
107
108 res = calloc(1, sizeof(struct archive_entry_linkresolver));
109 if (res == NULL)
110 return (NULL);
111 res->number_buckets = links_cache_initial_size;
112 res->buckets = calloc(res->number_buckets, sizeof(res->buckets[0]));
113 if (res->buckets == NULL) {
114 free(res);
115 return (NULL);
116 }
117 return (res);
118 }
119
120 void
archive_entry_linkresolver_set_strategy(struct archive_entry_linkresolver * res,int fmt)121 archive_entry_linkresolver_set_strategy(struct archive_entry_linkresolver *res,
122 int fmt)
123 {
124 int fmtbase = fmt & ARCHIVE_FORMAT_BASE_MASK;
125
126 switch (fmtbase) {
127 case ARCHIVE_FORMAT_7ZIP:
128 case ARCHIVE_FORMAT_AR:
129 case ARCHIVE_FORMAT_ZIP:
130 res->strategy = ARCHIVE_ENTRY_LINKIFY_LIKE_OLD_CPIO;
131 break;
132 case ARCHIVE_FORMAT_CPIO:
133 switch (fmt) {
134 case ARCHIVE_FORMAT_CPIO_SVR4_NOCRC:
135 case ARCHIVE_FORMAT_CPIO_SVR4_CRC:
136 res->strategy = ARCHIVE_ENTRY_LINKIFY_LIKE_NEW_CPIO;
137 break;
138 default:
139 res->strategy = ARCHIVE_ENTRY_LINKIFY_LIKE_OLD_CPIO;
140 break;
141 }
142 break;
143 case ARCHIVE_FORMAT_MTREE:
144 res->strategy = ARCHIVE_ENTRY_LINKIFY_LIKE_MTREE;
145 break;
146 case ARCHIVE_FORMAT_ISO9660:
147 case ARCHIVE_FORMAT_SHAR:
148 case ARCHIVE_FORMAT_TAR:
149 case ARCHIVE_FORMAT_XAR:
150 res->strategy = ARCHIVE_ENTRY_LINKIFY_LIKE_TAR;
151 break;
152 default:
153 res->strategy = ARCHIVE_ENTRY_LINKIFY_LIKE_OLD_CPIO;
154 break;
155 }
156 }
157
158 void
archive_entry_linkresolver_free(struct archive_entry_linkresolver * res)159 archive_entry_linkresolver_free(struct archive_entry_linkresolver *res)
160 {
161 struct links_entry *le;
162
163 if (res == NULL)
164 return;
165
166 while ((le = next_entry(res, NEXT_ENTRY_ALL)) != NULL)
167 archive_entry_free(le->entry);
168 free(res->buckets);
169 free(res);
170 }
171
172 void
archive_entry_linkify(struct archive_entry_linkresolver * res,struct archive_entry ** e,struct archive_entry ** f)173 archive_entry_linkify(struct archive_entry_linkresolver *res,
174 struct archive_entry **e, struct archive_entry **f)
175 {
176 struct links_entry *le;
177 struct archive_entry *t;
178
179 *f = NULL; /* Default: Don't return a second entry. */
180
181 if (*e == NULL) {
182 le = next_entry(res, NEXT_ENTRY_DEFERRED);
183 if (le != NULL) {
184 *e = le->entry;
185 le->entry = NULL;
186 }
187 return;
188 }
189
190 /* If it has only one link, then we're done. */
191 if (archive_entry_nlink(*e) == 1)
192 return;
193 /* Directories, devices never have hardlinks. */
194 if (archive_entry_filetype(*e) == AE_IFDIR
195 || archive_entry_filetype(*e) == AE_IFBLK
196 || archive_entry_filetype(*e) == AE_IFCHR)
197 return;
198
199 switch (res->strategy) {
200 case ARCHIVE_ENTRY_LINKIFY_LIKE_TAR:
201 le = find_entry(res, *e);
202 if (le != NULL) {
203 archive_entry_unset_size(*e);
204 #if defined(_WIN32) && !defined(__CYGWIN__)
205 archive_entry_copy_hardlink_w(*e,
206 archive_entry_pathname_w(le->canonical));
207 #else
208 archive_entry_copy_hardlink(*e,
209 archive_entry_pathname(le->canonical));
210 #endif
211 } else
212 insert_entry(res, *e);
213 return;
214 case ARCHIVE_ENTRY_LINKIFY_LIKE_MTREE:
215 le = find_entry(res, *e);
216 if (le != NULL) {
217 #if defined(_WIN32) && !defined(__CYGWIN__)
218 archive_entry_copy_hardlink_w(*e,
219 archive_entry_pathname_w(le->canonical));
220 #else
221 archive_entry_copy_hardlink(*e,
222 archive_entry_pathname(le->canonical));
223 #endif
224 } else
225 insert_entry(res, *e);
226 return;
227 case ARCHIVE_ENTRY_LINKIFY_LIKE_OLD_CPIO:
228 /* This one is trivial. */
229 return;
230 case ARCHIVE_ENTRY_LINKIFY_LIKE_NEW_CPIO:
231 le = find_entry(res, *e);
232 if (le != NULL) {
233 /*
234 * Put the new entry in le, return the
235 * old entry from le.
236 */
237 t = *e;
238 *e = le->entry;
239 le->entry = t;
240 /* Make the old entry into a hardlink. */
241 archive_entry_unset_size(*e);
242 #if defined(_WIN32) && !defined(__CYGWIN__)
243 archive_entry_copy_hardlink_w(*e,
244 archive_entry_pathname_w(le->canonical));
245 #else
246 archive_entry_copy_hardlink(*e,
247 archive_entry_pathname(le->canonical));
248 #endif
249 /* If we ran out of links, return the
250 * final entry as well. */
251 if (le->links == 0) {
252 *f = le->entry;
253 le->entry = NULL;
254 }
255 } else {
256 /*
257 * If we haven't seen it, tuck it away
258 * for future use.
259 */
260 le = insert_entry(res, *e);
261 if (le == NULL)
262 /* XXX We should return an error code XXX */
263 return;
264 le->entry = *e;
265 *e = NULL;
266 }
267 return;
268 default:
269 break;
270 }
271 return;
272 }
273
274 static struct links_entry *
find_entry(struct archive_entry_linkresolver * res,struct archive_entry * entry)275 find_entry(struct archive_entry_linkresolver *res,
276 struct archive_entry *entry)
277 {
278 struct links_entry *le;
279 size_t hash, bucket;
280 dev_t dev;
281 int64_t ino;
282
283 if (!archive_entry_ino_is_set(entry) || !archive_entry_dev_is_set(entry)) {
284 return (NULL);
285 }
286
287 /* Free a held entry. */
288 if (res->spare != NULL) {
289 archive_entry_free(res->spare->canonical);
290 archive_entry_free(res->spare->entry);
291 free(res->spare);
292 res->spare = NULL;
293 }
294
295 dev = archive_entry_dev(entry);
296 ino = archive_entry_ino64(entry);
297 hash = (size_t)(dev ^ ino);
298
299 /* Try to locate this entry in the links cache. */
300 bucket = hash & (res->number_buckets - 1);
301 for (le = res->buckets[bucket]; le != NULL; le = le->next) {
302 if (le->hash == hash
303 && dev == archive_entry_dev(le->canonical)
304 && ino == archive_entry_ino64(le->canonical)) {
305 /*
306 * Decrement link count each time and release
307 * the entry if it hits zero. This saves
308 * memory and is necessary for detecting
309 * missed links.
310 */
311 --le->links;
312 if (le->links > 0)
313 return (le);
314 /* Remove it from this hash bucket. */
315 if (le->previous != NULL)
316 le->previous->next = le->next;
317 if (le->next != NULL)
318 le->next->previous = le->previous;
319 if (res->buckets[bucket] == le)
320 res->buckets[bucket] = le->next;
321 res->number_entries--;
322 /* Defer freeing this entry. */
323 res->spare = le;
324 return (le);
325 }
326 }
327 return (NULL);
328 }
329
330 static struct links_entry *
next_entry(struct archive_entry_linkresolver * res,int mode)331 next_entry(struct archive_entry_linkresolver *res, int mode)
332 {
333 struct links_entry *le;
334 size_t bucket;
335
336 /* Free a held entry. */
337 if (res->spare != NULL) {
338 archive_entry_free(res->spare->canonical);
339 archive_entry_free(res->spare->entry);
340 free(res->spare);
341 res->spare = NULL;
342 }
343
344 /* Look for next non-empty bucket in the links cache. */
345 for (bucket = 0; bucket < res->number_buckets; bucket++) {
346 for (le = res->buckets[bucket]; le != NULL; le = le->next) {
347 if (le->entry != NULL &&
348 (mode & NEXT_ENTRY_DEFERRED) == 0)
349 continue;
350 if (le->entry == NULL &&
351 (mode & NEXT_ENTRY_PARTIAL) == 0)
352 continue;
353 /* Remove it from this hash bucket. */
354 if (le->next != NULL)
355 le->next->previous = le->previous;
356 if (le->previous != NULL)
357 le->previous->next = le->next;
358 else
359 res->buckets[bucket] = le->next;
360 res->number_entries--;
361 /* Defer freeing this entry. */
362 res->spare = le;
363 return (le);
364 }
365 }
366 return (NULL);
367 }
368
369 static struct links_entry *
insert_entry(struct archive_entry_linkresolver * res,struct archive_entry * entry)370 insert_entry(struct archive_entry_linkresolver *res,
371 struct archive_entry *entry)
372 {
373 struct links_entry *le;
374 size_t hash, bucket;
375
376 if (!archive_entry_ino_is_set(entry) || !archive_entry_dev_is_set(entry)) {
377 return (NULL);
378 }
379
380 /* Add this entry to the links cache. */
381 le = calloc(1, sizeof(struct links_entry));
382 if (le == NULL)
383 return (NULL);
384 le->canonical = archive_entry_clone(entry);
385
386 /* If the links cache is getting too full, enlarge the hash table. */
387 if (res->number_entries > res->number_buckets * 2)
388 grow_hash(res);
389
390 hash = (size_t)(archive_entry_dev(entry) ^ archive_entry_ino64(entry));
391 bucket = hash & (res->number_buckets - 1);
392
393 /* If we could allocate the entry, record it. */
394 if (res->buckets[bucket] != NULL)
395 res->buckets[bucket]->previous = le;
396 res->number_entries++;
397 le->next = res->buckets[bucket];
398 le->previous = NULL;
399 res->buckets[bucket] = le;
400 le->hash = hash;
401 le->links = archive_entry_nlink(entry) - 1;
402 return (le);
403 }
404
405 static void
grow_hash(struct archive_entry_linkresolver * res)406 grow_hash(struct archive_entry_linkresolver *res)
407 {
408 struct links_entry *le, **new_buckets;
409 size_t new_size;
410 size_t i, bucket;
411
412 /* Try to enlarge the bucket list. */
413 new_size = res->number_buckets * 2;
414 if (new_size < res->number_buckets)
415 return;
416 new_buckets = calloc(new_size, sizeof(struct links_entry *));
417
418 if (new_buckets == NULL)
419 return;
420
421 for (i = 0; i < res->number_buckets; i++) {
422 while (res->buckets[i] != NULL) {
423 /* Remove entry from old bucket. */
424 le = res->buckets[i];
425 res->buckets[i] = le->next;
426
427 /* Add entry to new bucket. */
428 bucket = le->hash & (new_size - 1);
429
430 if (new_buckets[bucket] != NULL)
431 new_buckets[bucket]->previous = le;
432 le->next = new_buckets[bucket];
433 le->previous = NULL;
434 new_buckets[bucket] = le;
435 }
436 }
437 free(res->buckets);
438 res->buckets = new_buckets;
439 res->number_buckets = new_size;
440 }
441
442 struct archive_entry *
archive_entry_partial_links(struct archive_entry_linkresolver * res,unsigned int * links)443 archive_entry_partial_links(struct archive_entry_linkresolver *res,
444 unsigned int *links)
445 {
446 struct archive_entry *e;
447 struct links_entry *le;
448
449 /* Free a held entry. */
450 if (res->spare != NULL) {
451 archive_entry_free(res->spare->canonical);
452 archive_entry_free(res->spare->entry);
453 free(res->spare);
454 res->spare = NULL;
455 }
456
457 le = next_entry(res, NEXT_ENTRY_PARTIAL);
458 if (le != NULL) {
459 e = le->canonical;
460 if (links != NULL)
461 *links = le->links;
462 le->canonical = NULL;
463 } else {
464 e = NULL;
465 if (links != NULL)
466 *links = 0;
467 }
468 return (e);
469 }
470