1 /*-
2 * Copyright (c) 2018 Grzegorz Antoniak (http://antoniak.org)
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR
15 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
16 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
17 * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT,
18 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
19 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
20 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
21 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
23 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24 */
25
26 #include "archive_platform.h"
27 #include "archive_endian.h"
28
29 #ifdef HAVE_ERRNO_H
30 #include <errno.h>
31 #endif
32 #include <time.h>
33 #ifdef HAVE_ZLIB_H
34 #include <zlib.h> /* crc32 */
35 #endif
36 #ifdef HAVE_LIMITS_H
37 #include <limits.h>
38 #endif
39
40 #include "archive.h"
41 #ifndef HAVE_ZLIB_H
42 #include "archive_crc32.h"
43 #endif
44
45 #include "archive_entry.h"
46 #include "archive_entry_locale.h"
47 #include "archive_ppmd7_private.h"
48 #include "archive_entry_private.h"
49 #include "archive_time_private.h"
50
51 #ifdef HAVE_BLAKE2_H
52 #include <blake2.h>
53 #else
54 #include "archive_blake2.h"
55 #endif
56
57 /*#define CHECK_CRC_ON_SOLID_SKIP*/
58 /*#define DONT_FAIL_ON_CRC_ERROR*/
59 /*#define DEBUG*/
60
61 #define rar5_min(a, b) (((a) > (b)) ? (b) : (a))
62 #define rar5_max(a, b) (((a) > (b)) ? (a) : (b))
63 #define rar5_countof(X) ((const ssize_t) (sizeof(X) / sizeof(*X)))
64
65 #if defined DEBUG
66 #define DEBUG_CODE if(1)
67 #define LOG(...) do { printf("rar5: " __VA_ARGS__); puts(""); } while(0)
68 #else
69 #define DEBUG_CODE if(0)
70 #endif
71
72 /* Real RAR5 magic number is:
73 *
74 * 0x52, 0x61, 0x72, 0x21, 0x1a, 0x07, 0x01, 0x00
75 * "Rar!→•☺·\x00"
76 *
77 * Retrieved with `rar5_signature()` by XOR'ing it with 0xA1, because I don't
78 * want to put this magic sequence in each binary that uses libarchive, so
79 * applications that scan through the file for this marker won't trigger on
80 * this "false" one.
81 *
82 * The array itself is decrypted in `rar5_init` function. */
83
84 static unsigned char rar5_signature_xor[] = { 243, 192, 211, 128, 187, 166, 160, 161 };
85 static const size_t g_unpack_window_size = 0x20000;
86
87 /* These could have been static const's, but they aren't, because of
88 * Visual Studio. */
89 #define MAX_NAME_IN_CHARS 2048
90 #define MAX_NAME_IN_BYTES (4 * MAX_NAME_IN_CHARS)
91
92 struct file_header {
93 ssize_t bytes_remaining;
94 ssize_t unpacked_size;
95 int64_t last_offset; /* Used in sanity checks. */
96 int64_t last_size; /* Used in sanity checks. */
97
98 uint8_t solid : 1; /* Is this a solid stream? */
99 uint8_t service : 1; /* Is this file a service data? */
100 uint8_t eof : 1; /* Did we finish unpacking the file? */
101 uint8_t dir : 1; /* Is this file entry a directory? */
102
103 /* Optional time fields. */
104 int64_t e_mtime;
105 int64_t e_ctime;
106 int64_t e_atime;
107 uint32_t e_mtime_ns;
108 uint32_t e_ctime_ns;
109 uint32_t e_atime_ns;
110
111 /* Optional hash fields. */
112 uint32_t stored_crc32;
113 uint32_t calculated_crc32;
114 uint8_t blake2sp[32];
115 blake2sp_state b2state;
116 char has_blake2;
117
118 /* Optional redir fields */
119 uint64_t redir_type;
120 uint64_t redir_flags;
121
122 ssize_t solid_window_size; /* Used in file format check. */
123 };
124
125 enum EXTRA {
126 EX_CRYPT = 0x01,
127 EX_HASH = 0x02,
128 EX_HTIME = 0x03,
129 EX_VERSION = 0x04,
130 EX_REDIR = 0x05,
131 EX_UOWNER = 0x06,
132 EX_SUBDATA = 0x07
133 };
134
135 #define REDIR_SYMLINK_IS_DIR 1
136
137 enum REDIR_TYPE {
138 REDIR_TYPE_NONE = 0,
139 REDIR_TYPE_UNIXSYMLINK = 1,
140 REDIR_TYPE_WINSYMLINK = 2,
141 REDIR_TYPE_JUNCTION = 3,
142 REDIR_TYPE_HARDLINK = 4,
143 REDIR_TYPE_FILECOPY = 5,
144 };
145
146 #define OWNER_USER_NAME 0x01
147 #define OWNER_GROUP_NAME 0x02
148 #define OWNER_USER_UID 0x04
149 #define OWNER_GROUP_GID 0x08
150 #define OWNER_MAXNAMELEN 256
151
152 enum FILTER_TYPE {
153 FILTER_DELTA = 0, /* Generic pattern. */
154 FILTER_E8 = 1, /* Intel x86 code. */
155 FILTER_E8E9 = 2, /* Intel x86 code. */
156 FILTER_ARM = 3, /* ARM code. */
157 FILTER_AUDIO = 4, /* Audio filter, not used in RARv5. */
158 FILTER_RGB = 5, /* Color palette, not used in RARv5. */
159 FILTER_ITANIUM = 6, /* Intel's Itanium, not used in RARv5. */
160 FILTER_PPM = 7, /* Predictive pattern matching, not used in
161 RARv5. */
162 FILTER_NONE = 8,
163 };
164
165 struct filter_info {
166 int type;
167 int channels;
168 int pos_r;
169
170 int64_t block_start;
171 ssize_t block_length;
172 uint16_t width;
173 };
174
175 struct data_ready {
176 char used;
177 const uint8_t* buf;
178 size_t size;
179 int64_t offset;
180 };
181
182 struct cdeque {
183 uint16_t beg_pos;
184 uint16_t end_pos;
185 uint16_t cap_mask;
186 uint16_t size;
187 size_t* arr;
188 };
189
190 struct decode_table {
191 uint32_t size;
192 int32_t decode_len[16];
193 uint32_t decode_pos[16];
194 uint32_t quick_bits;
195 uint8_t quick_len[1 << 10];
196 uint16_t quick_num[1 << 10];
197 uint16_t decode_num[306];
198 };
199
200 struct comp_state {
201 /* Flag used to specify if unpacker needs to reinitialize the
202 uncompression context. */
203 uint8_t initialized : 1;
204
205 /* Flag used when applying filters. */
206 uint8_t all_filters_applied : 1;
207
208 /* Flag used to skip file context reinitialization, used when unpacker
209 is skipping through different multivolume archives. */
210 uint8_t switch_multivolume : 1;
211
212 /* Flag used to specify if unpacker has processed the whole data block
213 or just a part of it. */
214 uint8_t block_parsing_finished : 1;
215
216 /* Flag used to indicate that a previous file using this buffer was
217 encrypted, meaning no data in the buffer can be trusted */
218 uint8_t data_encrypted : 1;
219
220 signed int notused : 3;
221
222 int flags; /* Uncompression flags. */
223 int method; /* Uncompression algorithm method. */
224 int version; /* Uncompression algorithm version. */
225 ssize_t window_size; /* Size of window_buf. */
226 uint8_t* window_buf; /* Circular buffer used during
227 decompression. */
228 uint8_t* filtered_buf; /* Buffer used when applying filters. */
229 const uint8_t* block_buf; /* Buffer used when merging blocks. */
230 ssize_t window_mask; /* Convenience field; window_size - 1. */
231 int64_t write_ptr; /* This amount of data has been unpacked
232 in the window buffer. */
233 int64_t last_write_ptr; /* This amount of data has been stored in
234 the output file. */
235 int64_t last_unstore_ptr; /* Counter of bytes extracted during
236 unstoring. This is separate from
237 last_write_ptr because of how SERVICE
238 base blocks are handled during skipping
239 in solid multiarchive archives. */
240 int64_t solid_offset; /* Additional offset inside the window
241 buffer, used in unpacking solid
242 archives. */
243 ssize_t cur_block_size; /* Size of current data block. */
244 int last_len; /* Flag used in lzss decompression. */
245
246 /* Decode tables used during lzss uncompression. */
247
248 #define HUFF_BC 20
249 struct decode_table bd; /* huffman bit lengths */
250 #define HUFF_NC 306
251 struct decode_table ld; /* literals */
252 #define HUFF_DC 64
253 struct decode_table dd; /* distances */
254 #define HUFF_LDC 16
255 struct decode_table ldd; /* lower bits of distances */
256 #define HUFF_RC 44
257 struct decode_table rd; /* repeating distances */
258 #define HUFF_TABLE_SIZE (HUFF_NC + HUFF_DC + HUFF_RC + HUFF_LDC)
259
260 /* Circular deque for storing filters. */
261 struct cdeque filters;
262 int64_t last_block_start; /* Used for sanity checking. */
263 ssize_t last_block_length; /* Used for sanity checking. */
264
265 /* Distance cache used during lzss uncompression. */
266 int dist_cache[4];
267
268 /* Data buffer stack. */
269 struct data_ready dready[2];
270 };
271
272 /* Bit reader state. */
273 struct bit_reader {
274 int8_t bit_addr; /* Current bit pointer inside current byte. */
275 int in_addr; /* Current byte pointer. */
276 };
277
278 /* RARv5 block header structure. Use bf_* functions to get values from
279 * block_flags_u8 field. I.e. bf_byte_count, etc. */
280 struct compressed_block_header {
281 /* block_flags_u8 contain fields encoded in little-endian bitfield:
282 *
283 * - table present flag (shr 7, and 1),
284 * - last block flag (shr 6, and 1),
285 * - byte_count (shr 3, and 7),
286 * - bit_size (shr 0, and 7).
287 */
288 uint8_t block_flags_u8;
289 uint8_t block_cksum;
290 };
291
292 /* RARv5 main header structure. */
293 struct main_header {
294 /* Does the archive contain solid streams? */
295 uint8_t solid : 1;
296
297 /* If this a multi-file archive? */
298 uint8_t volume : 1;
299 uint8_t endarc : 1;
300 uint8_t notused : 5;
301
302 unsigned int vol_no;
303 };
304
305 struct generic_header {
306 uint8_t split_after : 1;
307 uint8_t split_before : 1;
308 uint8_t padding : 6;
309 int size;
310 int last_header_id;
311 };
312
313 struct multivolume {
314 unsigned int expected_vol_no;
315 uint8_t* push_buf;
316 };
317
318 /* Main context structure. */
319 struct rar5 {
320 int header_initialized;
321
322 /* Set to 1 if current file is positioned AFTER the magic value
323 * of the archive file. This is used in header reading functions. */
324 int skipped_magic;
325
326 /* Set to not zero if we're in skip mode (either by calling
327 * rar5_data_skip function or when skipping over solid streams).
328 * Set to 0 when in * extraction mode. This is used during checksum
329 * calculation functions. */
330 int skip_mode;
331
332 /* Set to not zero if we're in block merging mode (i.e. when switching
333 * to another file in multivolume archive, last block from 1st archive
334 * needs to be merged with 1st block from 2nd archive). This flag
335 * guards against recursive use of the merging function, which doesn't
336 * support recursive calls. */
337 int merge_mode;
338
339 /* An offset to QuickOpen list. This is not supported by this unpacker,
340 * because we're focusing on streaming interface. QuickOpen is designed
341 * to make things quicker for non-stream interfaces, so it's not our
342 * use case. */
343 uint64_t qlist_offset;
344
345 /* An offset to additional Recovery data. This is not supported by this
346 * unpacker. Recovery data are additional Reed-Solomon codes that could
347 * be used to calculate bytes that are missing in archive or are
348 * corrupted. */
349 uint64_t rr_offset;
350
351 /* Various context variables grouped to different structures. */
352 struct generic_header generic;
353 struct main_header main;
354 struct comp_state cstate;
355 struct file_header file;
356 struct bit_reader bits;
357 struct multivolume vol;
358
359 /* The header of currently processed RARv5 block. Used in main
360 * decompression logic loop. */
361 struct compressed_block_header last_block_hdr;
362
363 /*
364 * Custom field to denote that this archive contains encrypted entries
365 */
366 int has_encrypted_entries;
367 int headers_are_encrypted;
368 };
369
370 /* Forward function declarations. */
371
372 static void rar5_signature(char *buf);
373 static int verify_global_checksums(struct archive_read* a);
374 static int rar5_read_data_skip(struct archive_read *a);
375 static int push_data_ready(struct archive_read* a, struct rar5* rar,
376 const uint8_t* buf, size_t size, int64_t offset);
377 static void clear_data_ready_stack(struct rar5* rar);
378
379 /* CDE_xxx = Circular Double Ended (Queue) return values. */
380 enum CDE_RETURN_VALUES {
381 CDE_OK, CDE_ALLOC, CDE_PARAM, CDE_OUT_OF_BOUNDS,
382 };
383
384 /* Clears the contents of this circular deque. */
cdeque_clear(struct cdeque * d)385 static void cdeque_clear(struct cdeque* d) {
386 d->size = 0;
387 d->beg_pos = 0;
388 d->end_pos = 0;
389 }
390
391 /* Creates a new circular deque object. Capacity must be power of 2: 8, 16, 32,
392 * 64, 256, etc. When the user will add another item above current capacity,
393 * the circular deque will overwrite the oldest entry. */
cdeque_init(struct cdeque * d,int max_capacity_power_of_2)394 static int cdeque_init(struct cdeque* d, int max_capacity_power_of_2) {
395 if(d == NULL || max_capacity_power_of_2 == 0)
396 return CDE_PARAM;
397
398 d->cap_mask = max_capacity_power_of_2 - 1;
399 d->arr = NULL;
400
401 if((max_capacity_power_of_2 & d->cap_mask) != 0)
402 return CDE_PARAM;
403
404 cdeque_clear(d);
405 d->arr = malloc(sizeof(void*) * max_capacity_power_of_2);
406
407 return d->arr ? CDE_OK : CDE_ALLOC;
408 }
409
410 /* Return the current size (not capacity) of circular deque `d`. */
cdeque_size(struct cdeque * d)411 static size_t cdeque_size(struct cdeque* d) {
412 return d->size;
413 }
414
415 /* Returns the first element of current circular deque. Note that this function
416 * doesn't perform any bounds checking. If you need bounds checking, use
417 * `cdeque_front()` function instead. */
cdeque_front_fast(struct cdeque * d,void ** value)418 static void cdeque_front_fast(struct cdeque* d, void** value) {
419 *value = (void*) d->arr[d->beg_pos];
420 }
421
422 /* Returns the first element of current circular deque. This function
423 * performs bounds checking. */
cdeque_front(struct cdeque * d,void ** value)424 static int cdeque_front(struct cdeque* d, void** value) {
425 if(d->size > 0) {
426 cdeque_front_fast(d, value);
427 return CDE_OK;
428 } else
429 return CDE_OUT_OF_BOUNDS;
430 }
431
432 /* Pushes a new element into the end of this circular deque object. If current
433 * size will exceed capacity, the oldest element will be overwritten. */
cdeque_push_back(struct cdeque * d,void * item)434 static int cdeque_push_back(struct cdeque* d, void* item) {
435 if(d == NULL)
436 return CDE_PARAM;
437
438 if(d->size == d->cap_mask + 1)
439 return CDE_OUT_OF_BOUNDS;
440
441 d->arr[d->end_pos] = (size_t) item;
442 d->end_pos = (d->end_pos + 1) & d->cap_mask;
443 d->size++;
444
445 return CDE_OK;
446 }
447
448 /* Pops a front element of this circular deque object and returns its value.
449 * This function doesn't perform any bounds checking. */
cdeque_pop_front_fast(struct cdeque * d,void ** value)450 static void cdeque_pop_front_fast(struct cdeque* d, void** value) {
451 *value = (void*) d->arr[d->beg_pos];
452 d->beg_pos = (d->beg_pos + 1) & d->cap_mask;
453 d->size--;
454 }
455
456 /* Pops a front element of this circular deque object and returns its value.
457 * This function performs bounds checking. */
cdeque_pop_front(struct cdeque * d,void ** value)458 static int cdeque_pop_front(struct cdeque* d, void** value) {
459 if(!d || !value)
460 return CDE_PARAM;
461
462 if(d->size == 0)
463 return CDE_OUT_OF_BOUNDS;
464
465 cdeque_pop_front_fast(d, value);
466 return CDE_OK;
467 }
468
469 /* Convenience function to cast filter_info** to void **. */
cdeque_filter_p(struct filter_info ** f)470 static void** cdeque_filter_p(struct filter_info** f) {
471 return (void**) (size_t) f;
472 }
473
474 /* Convenience function to cast filter_info* to void *. */
cdeque_filter(struct filter_info * f)475 static void* cdeque_filter(struct filter_info* f) {
476 return (void**) (size_t) f;
477 }
478
479 /* Destroys this circular deque object. Deallocates the memory of the
480 * collection buffer, but doesn't deallocate the memory of any pointer passed
481 * to this deque as a value. */
cdeque_free(struct cdeque * d)482 static void cdeque_free(struct cdeque* d) {
483 if(!d)
484 return;
485
486 if(!d->arr)
487 return;
488
489 free(d->arr);
490
491 d->arr = NULL;
492 d->beg_pos = -1;
493 d->end_pos = -1;
494 d->cap_mask = 0;
495 }
496
497 static inline
bf_bit_size(const struct compressed_block_header * hdr)498 uint8_t bf_bit_size(const struct compressed_block_header* hdr) {
499 return hdr->block_flags_u8 & 7;
500 }
501
502 static inline
bf_byte_count(const struct compressed_block_header * hdr)503 uint8_t bf_byte_count(const struct compressed_block_header* hdr) {
504 return (hdr->block_flags_u8 >> 3) & 7;
505 }
506
507 static inline
bf_is_table_present(const struct compressed_block_header * hdr)508 uint8_t bf_is_table_present(const struct compressed_block_header* hdr) {
509 return (hdr->block_flags_u8 >> 7) & 1;
510 }
511
512 static inline
bf_is_last_block(const struct compressed_block_header * hdr)513 uint8_t bf_is_last_block(const struct compressed_block_header* hdr) {
514 return (hdr->block_flags_u8 >> 6) & 1;
515 }
516
get_context(struct archive_read * a)517 static inline struct rar5* get_context(struct archive_read* a) {
518 return (struct rar5*) a->format->data;
519 }
520
521 /* Convenience functions used by filter implementations. */
circular_memcpy(uint8_t * dst,uint8_t * window,const ssize_t mask,int64_t start,int64_t end)522 static void circular_memcpy(uint8_t* dst, uint8_t* window, const ssize_t mask,
523 int64_t start, int64_t end)
524 {
525 if((start & mask) > (end & mask)) {
526 ssize_t len1 = mask + 1 - (start & mask);
527 ssize_t len2 = end & mask;
528
529 memcpy(dst, &window[start & mask], len1);
530 memcpy(dst + len1, window, len2);
531 } else {
532 memcpy(dst, &window[start & mask], (size_t) (end - start));
533 }
534 }
535
read_filter_data(struct rar5 * rar,uint32_t offset)536 static uint32_t read_filter_data(struct rar5* rar, uint32_t offset) {
537 uint8_t linear_buf[4];
538 circular_memcpy(linear_buf, rar->cstate.window_buf,
539 rar->cstate.window_mask, offset, offset + 4);
540 return archive_le32dec(linear_buf);
541 }
542
write_filter_data(struct rar5 * rar,uint32_t offset,uint32_t value)543 static void write_filter_data(struct rar5* rar, uint32_t offset,
544 uint32_t value)
545 {
546 archive_le32enc(&rar->cstate.filtered_buf[offset], value);
547 }
548
549 /* Allocates a new filter descriptor and adds it to the filter array. */
add_new_filter(struct rar5 * rar)550 static struct filter_info* add_new_filter(struct rar5* rar) {
551 struct filter_info* f = calloc(1, sizeof(*f));
552
553 if(!f) {
554 return NULL;
555 }
556
557 cdeque_push_back(&rar->cstate.filters, cdeque_filter(f));
558 return f;
559 }
560
run_delta_filter(struct rar5 * rar,struct filter_info * flt)561 static int run_delta_filter(struct rar5* rar, struct filter_info* flt) {
562 int i;
563 ssize_t dest_pos, src_pos = 0;
564
565 for(i = 0; i < flt->channels; i++) {
566 uint8_t prev_byte = 0;
567 for(dest_pos = i;
568 dest_pos < flt->block_length;
569 dest_pos += flt->channels)
570 {
571 uint8_t byte;
572
573 byte = rar->cstate.window_buf[
574 (rar->cstate.solid_offset + flt->block_start +
575 src_pos) & rar->cstate.window_mask];
576
577 prev_byte -= byte;
578 rar->cstate.filtered_buf[dest_pos] = prev_byte;
579 src_pos++;
580 }
581 }
582
583 return ARCHIVE_OK;
584 }
585
run_e8e9_filter(struct rar5 * rar,struct filter_info * flt,int extended)586 static int run_e8e9_filter(struct rar5* rar, struct filter_info* flt,
587 int extended)
588 {
589 const uint32_t file_size = 0x1000000;
590 ssize_t i;
591
592 circular_memcpy(rar->cstate.filtered_buf,
593 rar->cstate.window_buf, rar->cstate.window_mask,
594 rar->cstate.solid_offset + flt->block_start,
595 rar->cstate.solid_offset + flt->block_start + flt->block_length);
596
597 for(i = 0; i < flt->block_length - 4;) {
598 uint8_t b = rar->cstate.window_buf[
599 (rar->cstate.solid_offset + flt->block_start +
600 i++) & rar->cstate.window_mask];
601
602 /*
603 * 0xE8 = x86's call <relative_addr_uint32> (function call)
604 * 0xE9 = x86's jmp <relative_addr_uint32> (unconditional jump)
605 */
606 if(b == 0xE8 || (extended && b == 0xE9)) {
607
608 uint32_t addr;
609 uint32_t offset = (i + flt->block_start) % file_size;
610
611 addr = read_filter_data(rar,
612 (uint32_t)(rar->cstate.solid_offset +
613 flt->block_start + i) & rar->cstate.window_mask);
614
615 if(addr & 0x80000000) {
616 if(((addr + offset) & 0x80000000) == 0) {
617 write_filter_data(rar, (uint32_t)i,
618 addr + file_size);
619 }
620 } else {
621 if((addr - file_size) & 0x80000000) {
622 uint32_t naddr = addr - offset;
623 write_filter_data(rar, (uint32_t)i,
624 naddr);
625 }
626 }
627
628 i += 4;
629 }
630 }
631
632 return ARCHIVE_OK;
633 }
634
run_arm_filter(struct rar5 * rar,struct filter_info * flt)635 static int run_arm_filter(struct rar5* rar, struct filter_info* flt) {
636 ssize_t i = 0;
637 uint32_t offset;
638
639 circular_memcpy(rar->cstate.filtered_buf,
640 rar->cstate.window_buf, rar->cstate.window_mask,
641 rar->cstate.solid_offset + flt->block_start,
642 rar->cstate.solid_offset + flt->block_start + flt->block_length);
643
644 for(i = 0; i < flt->block_length - 3; i += 4) {
645 uint8_t* b = &rar->cstate.window_buf[
646 (rar->cstate.solid_offset +
647 flt->block_start + i + 3) & rar->cstate.window_mask];
648
649 if(*b == 0xEB) {
650 /* 0xEB = ARM's BL (branch + link) instruction. */
651 offset = read_filter_data(rar,
652 (rar->cstate.solid_offset + flt->block_start + i) &
653 (uint32_t)rar->cstate.window_mask) & 0x00ffffff;
654
655 offset -= (uint32_t) ((i + flt->block_start) / 4);
656 offset = (offset & 0x00ffffff) | 0xeb000000;
657 write_filter_data(rar, (uint32_t)i, offset);
658 }
659 }
660
661 return ARCHIVE_OK;
662 }
663
run_filter(struct archive_read * a,struct filter_info * flt)664 static int run_filter(struct archive_read* a, struct filter_info* flt) {
665 int ret;
666 struct rar5* rar = get_context(a);
667
668 clear_data_ready_stack(rar);
669 free(rar->cstate.filtered_buf);
670
671 rar->cstate.filtered_buf = malloc(flt->block_length);
672 if(!rar->cstate.filtered_buf) {
673 archive_set_error(&a->archive, ENOMEM,
674 "Can't allocate memory for filter data.");
675 return ARCHIVE_FATAL;
676 }
677
678 switch(flt->type) {
679 case FILTER_DELTA:
680 ret = run_delta_filter(rar, flt);
681 break;
682
683 case FILTER_E8:
684 /* fallthrough */
685 case FILTER_E8E9:
686 ret = run_e8e9_filter(rar, flt,
687 flt->type == FILTER_E8E9);
688 break;
689
690 case FILTER_ARM:
691 ret = run_arm_filter(rar, flt);
692 break;
693
694 default:
695 archive_set_error(&a->archive,
696 ARCHIVE_ERRNO_FILE_FORMAT,
697 "Unsupported filter type: 0x%x",
698 (unsigned int)flt->type);
699 return ARCHIVE_FATAL;
700 }
701
702 if(ret != ARCHIVE_OK) {
703 /* Filter has failed. */
704 return ret;
705 }
706
707 if(ARCHIVE_OK != push_data_ready(a, rar, rar->cstate.filtered_buf,
708 flt->block_length, rar->cstate.last_write_ptr))
709 {
710 archive_set_error(&a->archive, ARCHIVE_ERRNO_PROGRAMMER,
711 "Stack overflow when submitting unpacked data");
712
713 return ARCHIVE_FATAL;
714 }
715
716 rar->cstate.last_write_ptr += flt->block_length;
717 return ARCHIVE_OK;
718 }
719
720 /* The `push_data` function submits the selected data range to the user.
721 * Next call of `use_data` will use the pointer, size and offset arguments
722 * that are specified here. These arguments are pushed to the FIFO stack here,
723 * and popped from the stack by the `use_data` function. */
push_data(struct archive_read * a,struct rar5 * rar,const uint8_t * buf,int64_t idx_begin,int64_t idx_end)724 static void push_data(struct archive_read* a, struct rar5* rar,
725 const uint8_t* buf, int64_t idx_begin, int64_t idx_end)
726 {
727 const ssize_t wmask = rar->cstate.window_mask;
728 const ssize_t solid_write_ptr = (rar->cstate.solid_offset +
729 rar->cstate.last_write_ptr) & wmask;
730
731 idx_begin += rar->cstate.solid_offset;
732 idx_end += rar->cstate.solid_offset;
733
734 /* Check if our unpacked data is wrapped inside the window circular
735 * buffer. If it's not wrapped, it can be copied out by using
736 * a single memcpy, but when it's wrapped, we need to copy the first
737 * part with one memcpy, and the second part with another memcpy. */
738
739 if((idx_begin & wmask) > (idx_end & wmask)) {
740 /* The data is wrapped (begin offset sis bigger than end
741 * offset). */
742 const ssize_t frag1_size = rar->cstate.window_size -
743 (idx_begin & wmask);
744 const ssize_t frag2_size = idx_end & wmask;
745
746 /* Copy the first part of the buffer first. */
747 push_data_ready(a, rar, buf + solid_write_ptr, frag1_size,
748 rar->cstate.last_write_ptr);
749
750 /* Copy the second part of the buffer. */
751 push_data_ready(a, rar, buf, frag2_size,
752 rar->cstate.last_write_ptr + frag1_size);
753
754 rar->cstate.last_write_ptr += frag1_size + frag2_size;
755 } else {
756 /* Data is not wrapped, so we can just use one call to copy the
757 * data. */
758 push_data_ready(a, rar,
759 buf + solid_write_ptr, (idx_end - idx_begin) & wmask,
760 rar->cstate.last_write_ptr);
761
762 rar->cstate.last_write_ptr += idx_end - idx_begin;
763 }
764 }
765
766 /* Convenience function that submits the data to the user. It uses the
767 * unpack window buffer as a source location. */
push_window_data(struct archive_read * a,struct rar5 * rar,int64_t idx_begin,int64_t idx_end)768 static void push_window_data(struct archive_read* a, struct rar5* rar,
769 int64_t idx_begin, int64_t idx_end)
770 {
771 push_data(a, rar, rar->cstate.window_buf, idx_begin, idx_end);
772 }
773
apply_filters(struct archive_read * a)774 static int apply_filters(struct archive_read* a) {
775 struct filter_info* flt;
776 struct rar5* rar = get_context(a);
777 int ret;
778
779 rar->cstate.all_filters_applied = 0;
780
781 /* Get the first filter that can be applied to our data. The data
782 * needs to be fully unpacked before the filter can be run. */
783 if(CDE_OK == cdeque_front(&rar->cstate.filters,
784 cdeque_filter_p(&flt))) {
785 /* Check if our unpacked data fully covers this filter's
786 * range. */
787 if(rar->cstate.write_ptr > flt->block_start &&
788 rar->cstate.write_ptr >= flt->block_start +
789 flt->block_length) {
790 /* Check if we have some data pending to be written
791 * right before the filter's start offset. */
792 if(rar->cstate.last_write_ptr == flt->block_start) {
793 /* Run the filter specified by descriptor
794 * `flt`. */
795 ret = run_filter(a, flt);
796 if(ret != ARCHIVE_OK) {
797 /* Filter failure, return error. */
798 return ret;
799 }
800
801 /* Filter descriptor won't be needed anymore
802 * after it's used, * so remove it from the
803 * filter list and free its memory. */
804 (void) cdeque_pop_front(&rar->cstate.filters,
805 cdeque_filter_p(&flt));
806
807 free(flt);
808 } else {
809 /* We can't run filters yet, dump the memory
810 * right before the filter. */
811 push_window_data(a, rar,
812 rar->cstate.last_write_ptr,
813 flt->block_start);
814 }
815
816 /* Return 'filter applied or not needed' state to the
817 * caller. */
818 return ARCHIVE_RETRY;
819 }
820 }
821
822 rar->cstate.all_filters_applied = 1;
823 return ARCHIVE_OK;
824 }
825
dist_cache_push(struct rar5 * rar,int value)826 static void dist_cache_push(struct rar5* rar, int value) {
827 int* q = rar->cstate.dist_cache;
828
829 q[3] = q[2];
830 q[2] = q[1];
831 q[1] = q[0];
832 q[0] = value;
833 }
834
dist_cache_touch(struct rar5 * rar,int idx)835 static int dist_cache_touch(struct rar5* rar, int idx) {
836 int* q = rar->cstate.dist_cache;
837 int i, dist = q[idx];
838
839 for(i = idx; i > 0; i--)
840 q[i] = q[i - 1];
841
842 q[0] = dist;
843 return dist;
844 }
845
free_filters(struct rar5 * rar)846 static void free_filters(struct rar5* rar) {
847 struct cdeque* d = &rar->cstate.filters;
848
849 /* Free any remaining filters. All filters should be naturally
850 * consumed by the unpacking function, so remaining filters after
851 * unpacking normally mean that unpacking wasn't successful.
852 * But still of course we shouldn't leak memory in such case. */
853
854 /* cdeque_size() is a fast operation, so we can use it as a loop
855 * expression. */
856 while(cdeque_size(d) > 0) {
857 struct filter_info* f = NULL;
858
859 /* Pop_front will also decrease the collection's size. */
860 if (CDE_OK == cdeque_pop_front(d, cdeque_filter_p(&f)))
861 free(f);
862 }
863
864 cdeque_clear(d);
865
866 /* Also clear out the variables needed for sanity checking. */
867 rar->cstate.last_block_start = 0;
868 rar->cstate.last_block_length = 0;
869 }
870
reset_file_context(struct rar5 * rar)871 static void reset_file_context(struct rar5* rar) {
872 memset(&rar->file, 0, sizeof(rar->file));
873 blake2sp_init(&rar->file.b2state, 32);
874
875 if(rar->main.solid) {
876 rar->cstate.solid_offset += rar->cstate.write_ptr;
877 } else {
878 rar->cstate.solid_offset = 0;
879 }
880
881 rar->cstate.write_ptr = 0;
882 rar->cstate.last_write_ptr = 0;
883 rar->cstate.last_unstore_ptr = 0;
884
885 rar->file.redir_type = REDIR_TYPE_NONE;
886 rar->file.redir_flags = 0;
887
888 free_filters(rar);
889 }
890
get_archive_read(struct archive * a,struct archive_read ** ar)891 static inline int get_archive_read(struct archive* a,
892 struct archive_read** ar)
893 {
894 *ar = (struct archive_read*) a;
895 archive_check_magic(a, ARCHIVE_READ_MAGIC, ARCHIVE_STATE_NEW,
896 "archive_read_support_format_rar5");
897
898 return ARCHIVE_OK;
899 }
900
read_ahead(struct archive_read * a,size_t how_many,const uint8_t ** ptr)901 static int read_ahead(struct archive_read* a, size_t how_many,
902 const uint8_t** ptr)
903 {
904 ssize_t avail = -1;
905 if(!ptr)
906 return 0;
907
908 *ptr = __archive_read_ahead(a, how_many, &avail);
909 if(*ptr == NULL) {
910 return 0;
911 }
912
913 return 1;
914 }
915
consume(struct archive_read * a,int64_t how_many)916 static int consume(struct archive_read* a, int64_t how_many) {
917 int ret;
918
919 ret = how_many == __archive_read_consume(a, how_many)
920 ? ARCHIVE_OK
921 : ARCHIVE_FATAL;
922
923 return ret;
924 }
925
926 /**
927 * Read a RAR5 variable sized numeric value. This value will be stored in
928 * `pvalue`. The `pvalue_len` argument points to a variable that will receive
929 * the byte count that was consumed in order to decode the `pvalue` value, plus
930 * one.
931 *
932 * pvalue_len is optional and can be NULL.
933 *
934 * NOTE: if `pvalue_len` is NOT NULL, the caller needs to manually consume
935 * the number of bytes that `pvalue_len` value contains. If the `pvalue_len`
936 * is NULL, this consuming operation is done automatically.
937 *
938 * Returns 1 if *pvalue was successfully read.
939 * Returns 0 if there was an error. In this case, *pvalue contains an
940 * invalid value.
941 */
942
read_var(struct archive_read * a,uint64_t * pvalue,uint64_t * pvalue_len)943 static int read_var(struct archive_read* a, uint64_t* pvalue,
944 uint64_t* pvalue_len)
945 {
946 uint64_t result = 0;
947 size_t shift, i;
948 const uint8_t* p;
949 uint8_t b;
950
951 /* We will read maximum of 8 bytes. We don't have to handle the
952 * situation to read the RAR5 variable-sized value stored at the end of
953 * the file, because such situation will never happen. */
954 if(!read_ahead(a, 8, &p))
955 return 0;
956
957 for(shift = 0, i = 0; i < 8; i++, shift += 7) {
958 b = p[i];
959
960 /* Strip the MSB from the input byte and add the resulting
961 * number to the `result`. */
962 result += (b & (uint64_t)0x7F) << shift;
963
964 /* MSB set to 1 means we need to continue decoding process.
965 * MSB set to 0 means we're done.
966 *
967 * This conditional checks for the second case. */
968 if((b & 0x80) == 0) {
969 if(pvalue) {
970 *pvalue = result;
971 }
972
973 /* If the caller has passed the `pvalue_len` pointer,
974 * store the number of consumed bytes in it and do NOT
975 * consume those bytes, since the caller has all the
976 * information it needs to perform */
977 if(pvalue_len) {
978 *pvalue_len = 1 + i;
979 } else {
980 /* If the caller did not provide the
981 * `pvalue_len` pointer, it will not have the
982 * possibility to advance the file pointer,
983 * because it will not know how many bytes it
984 * needs to consume. This is why we handle
985 * such situation here automatically. */
986 if(ARCHIVE_OK != consume(a, 1 + i)) {
987 return 0;
988 }
989 }
990
991 /* End of decoding process, return success. */
992 return 1;
993 }
994 }
995
996 /* The decoded value takes the maximum number of 8 bytes.
997 * It's a maximum number of bytes, so end decoding process here
998 * even if the first bit of last byte is 1. */
999 if(pvalue) {
1000 *pvalue = result;
1001 }
1002
1003 if(pvalue_len) {
1004 *pvalue_len = 9;
1005 } else {
1006 if(ARCHIVE_OK != consume(a, 9)) {
1007 return 0;
1008 }
1009 }
1010
1011 return 1;
1012 }
1013
read_var_sized(struct archive_read * a,size_t * pvalue,size_t * pvalue_len)1014 static int read_var_sized(struct archive_read* a, size_t* pvalue,
1015 size_t* pvalue_len)
1016 {
1017 uint64_t v;
1018 uint64_t v_size = 0;
1019
1020 const int ret = pvalue_len ? read_var(a, &v, &v_size)
1021 : read_var(a, &v, NULL);
1022
1023 if(ret == 1 && pvalue) {
1024 *pvalue = (size_t) v;
1025 }
1026
1027 if(pvalue_len) {
1028 /* Possible data truncation should be safe. */
1029 *pvalue_len = (size_t) v_size;
1030 }
1031
1032 return ret;
1033 }
1034
read_bits_32(struct archive_read * a,struct rar5 * rar,const uint8_t * p,uint32_t * value)1035 static int read_bits_32(struct archive_read* a, struct rar5* rar,
1036 const uint8_t* p, uint32_t* value)
1037 {
1038 if(rar->bits.in_addr >= rar->cstate.cur_block_size) {
1039 archive_set_error(&a->archive,
1040 ARCHIVE_ERRNO_PROGRAMMER,
1041 "Premature end of stream during extraction of data (#1)");
1042 return ARCHIVE_FATAL;
1043 }
1044
1045 uint32_t bits = ((uint32_t) p[rar->bits.in_addr]) << 24;
1046 bits |= p[rar->bits.in_addr + 1] << 16;
1047 bits |= p[rar->bits.in_addr + 2] << 8;
1048 bits |= p[rar->bits.in_addr + 3];
1049 bits <<= rar->bits.bit_addr;
1050 bits |= p[rar->bits.in_addr + 4] >> (8 - rar->bits.bit_addr);
1051 *value = bits;
1052 return ARCHIVE_OK;
1053 }
1054
read_bits_16(struct archive_read * a,struct rar5 * rar,const uint8_t * p,uint16_t * value)1055 static int read_bits_16(struct archive_read* a, struct rar5* rar,
1056 const uint8_t* p, uint16_t* value)
1057 {
1058 if(rar->bits.in_addr >= rar->cstate.cur_block_size) {
1059 archive_set_error(&a->archive,
1060 ARCHIVE_ERRNO_PROGRAMMER,
1061 "Premature end of stream during extraction of data (#2)");
1062 return ARCHIVE_FATAL;
1063 }
1064
1065 int bits = (int) ((uint32_t) p[rar->bits.in_addr]) << 16;
1066 bits |= (int) p[rar->bits.in_addr + 1] << 8;
1067 bits |= (int) p[rar->bits.in_addr + 2];
1068 bits >>= (8 - rar->bits.bit_addr);
1069 *value = bits & 0xffff;
1070 return ARCHIVE_OK;
1071 }
1072
skip_bits(struct rar5 * rar,int bits)1073 static void skip_bits(struct rar5* rar, int bits) {
1074 const int new_bits = rar->bits.bit_addr + bits;
1075 rar->bits.in_addr += new_bits >> 3;
1076 rar->bits.bit_addr = new_bits & 7;
1077 }
1078
1079 /* n = up to 16 */
read_consume_bits(struct archive_read * a,struct rar5 * rar,const uint8_t * p,int n,int * value)1080 static int read_consume_bits(struct archive_read* a, struct rar5* rar,
1081 const uint8_t* p, int n, int* value)
1082 {
1083 uint16_t v;
1084 int ret, num;
1085
1086 if(n == 0 || n > 16) {
1087 /* This is a programmer error and should never happen
1088 * in runtime. */
1089 return ARCHIVE_FATAL;
1090 }
1091
1092 ret = read_bits_16(a, rar, p, &v);
1093 if(ret != ARCHIVE_OK)
1094 return ret;
1095
1096 num = (int) v;
1097 num >>= 16 - n;
1098
1099 skip_bits(rar, n);
1100
1101 if(value)
1102 *value = num;
1103
1104 return ARCHIVE_OK;
1105 }
1106
read_u32(struct archive_read * a,uint32_t * pvalue)1107 static char read_u32(struct archive_read* a, uint32_t* pvalue) {
1108 const uint8_t* p;
1109 if(!read_ahead(a, 4, &p))
1110 return 0;
1111
1112 *pvalue = archive_le32dec(p);
1113 return ARCHIVE_OK == consume(a, 4);
1114 }
1115
read_u64(struct archive_read * a,uint64_t * pvalue)1116 static char read_u64(struct archive_read* a, uint64_t* pvalue) {
1117 const uint8_t* p;
1118 if(!read_ahead(a, 8, &p))
1119 return 0;
1120
1121 *pvalue = archive_le64dec(p);
1122 return ARCHIVE_OK == consume(a, 8);
1123 }
1124
bid_standard(struct archive_read * a)1125 static int bid_standard(struct archive_read* a) {
1126 const uint8_t* p;
1127 char signature[sizeof(rar5_signature_xor)];
1128
1129 rar5_signature(signature);
1130
1131 if(!read_ahead(a, sizeof(rar5_signature_xor), &p))
1132 return -1;
1133
1134 if(!memcmp(signature, p, sizeof(rar5_signature_xor)))
1135 return 30;
1136
1137 return -1;
1138 }
1139
bid_sfx(struct archive_read * a)1140 static int bid_sfx(struct archive_read *a)
1141 {
1142 const char *p;
1143
1144 if ((p = __archive_read_ahead(a, 7, NULL)) == NULL)
1145 return -1;
1146
1147 if ((p[0] == 'M' && p[1] == 'Z') || memcmp(p, "\x7F\x45LF", 4) == 0) {
1148 /* This is a PE file */
1149 char signature[sizeof(rar5_signature_xor)];
1150 ssize_t offset = 0x10000;
1151 ssize_t window = 4096;
1152 ssize_t bytes_avail;
1153
1154 rar5_signature(signature);
1155
1156 while (offset + window <= (1024 * 512)) {
1157 const char *buff = __archive_read_ahead(a, offset + window, &bytes_avail);
1158 if (buff == NULL) {
1159 /* Remaining bytes are less than window. */
1160 window >>= 1;
1161 if (window < 0x40)
1162 return 0;
1163 continue;
1164 }
1165 p = buff + offset;
1166 while (p + 8 < buff + bytes_avail) {
1167 if (memcmp(p, signature, sizeof(signature)) == 0)
1168 return 30;
1169 p += 0x10;
1170 }
1171 offset = p - buff;
1172 }
1173 }
1174
1175 return 0;
1176 }
1177
rar5_bid(struct archive_read * a,int best_bid)1178 static int rar5_bid(struct archive_read* a, int best_bid) {
1179 int my_bid;
1180
1181 if(best_bid > 30)
1182 return -1;
1183
1184 my_bid = bid_standard(a);
1185 if(my_bid > -1) {
1186 return my_bid;
1187 }
1188 my_bid = bid_sfx(a);
1189 if (my_bid > -1) {
1190 return my_bid;
1191 }
1192
1193 return -1;
1194 }
1195
rar5_options(struct archive_read * a,const char * key,const char * val)1196 static int rar5_options(struct archive_read *a, const char *key,
1197 const char *val) {
1198 (void) a;
1199 (void) key;
1200 (void) val;
1201
1202 /* No options supported in this version. Return the ARCHIVE_WARN code
1203 * to signal the options supervisor that the unpacker didn't handle
1204 * setting this option. */
1205
1206 return ARCHIVE_WARN;
1207 }
1208
init_header(struct archive_read * a)1209 static void init_header(struct archive_read* a) {
1210 a->archive.archive_format = ARCHIVE_FORMAT_RAR_V5;
1211 a->archive.archive_format_name = "RAR5";
1212 }
1213
init_window_mask(struct rar5 * rar)1214 static void init_window_mask(struct rar5* rar) {
1215 if (rar->cstate.window_size)
1216 rar->cstate.window_mask = rar->cstate.window_size - 1;
1217 else
1218 rar->cstate.window_mask = 0;
1219 }
1220
1221 enum HEADER_FLAGS {
1222 HFL_EXTRA_DATA = 0x0001,
1223 HFL_DATA = 0x0002,
1224 HFL_SKIP_IF_UNKNOWN = 0x0004,
1225 HFL_SPLIT_BEFORE = 0x0008,
1226 HFL_SPLIT_AFTER = 0x0010,
1227 HFL_CHILD = 0x0020,
1228 HFL_INHERITED = 0x0040
1229 };
1230
process_main_locator_extra_block(struct archive_read * a,struct rar5 * rar)1231 static int process_main_locator_extra_block(struct archive_read* a,
1232 struct rar5* rar)
1233 {
1234 uint64_t locator_flags;
1235
1236 enum LOCATOR_FLAGS {
1237 QLIST = 0x01, RECOVERY = 0x02,
1238 };
1239
1240 if(!read_var(a, &locator_flags, NULL)) {
1241 return ARCHIVE_EOF;
1242 }
1243
1244 if(locator_flags & QLIST) {
1245 if(!read_var(a, &rar->qlist_offset, NULL)) {
1246 return ARCHIVE_EOF;
1247 }
1248
1249 /* qlist is not used */
1250 }
1251
1252 if(locator_flags & RECOVERY) {
1253 if(!read_var(a, &rar->rr_offset, NULL)) {
1254 return ARCHIVE_EOF;
1255 }
1256
1257 /* rr is not used */
1258 }
1259
1260 return ARCHIVE_OK;
1261 }
1262
parse_file_extra_hash(struct archive_read * a,struct rar5 * rar,int64_t * extra_data_size)1263 static int parse_file_extra_hash(struct archive_read* a, struct rar5* rar,
1264 int64_t* extra_data_size)
1265 {
1266 size_t hash_type = 0;
1267 size_t value_len;
1268
1269 enum HASH_TYPE {
1270 BLAKE2sp = 0x00
1271 };
1272
1273 if(!read_var_sized(a, &hash_type, &value_len))
1274 return ARCHIVE_EOF;
1275
1276 *extra_data_size -= value_len;
1277 if(ARCHIVE_OK != consume(a, value_len)) {
1278 return ARCHIVE_EOF;
1279 }
1280
1281 /* The file uses BLAKE2sp checksum algorithm instead of plain old
1282 * CRC32. */
1283 if(hash_type == BLAKE2sp) {
1284 const uint8_t* p;
1285 const int hash_size = sizeof(rar->file.blake2sp);
1286
1287 if(!read_ahead(a, hash_size, &p))
1288 return ARCHIVE_EOF;
1289
1290 rar->file.has_blake2 = 1;
1291 memcpy(&rar->file.blake2sp, p, hash_size);
1292
1293 if(ARCHIVE_OK != consume(a, hash_size)) {
1294 return ARCHIVE_EOF;
1295 }
1296
1297 *extra_data_size -= hash_size;
1298 } else {
1299 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
1300 "Unsupported hash type (0x%jx)", (uintmax_t)hash_type);
1301 return ARCHIVE_FATAL;
1302 }
1303
1304 return ARCHIVE_OK;
1305 }
1306
parse_htime_item(struct archive_read * a,char unix_time,int64_t * sec,uint32_t * nsec,int64_t * extra_data_size)1307 static int parse_htime_item(struct archive_read* a, char unix_time,
1308 int64_t* sec, uint32_t* nsec, int64_t* extra_data_size)
1309 {
1310 if(unix_time) {
1311 uint32_t time_val;
1312 if(!read_u32(a, &time_val))
1313 return ARCHIVE_EOF;
1314
1315 *extra_data_size -= 4;
1316 *sec = (int64_t) time_val;
1317 } else {
1318 uint64_t windows_time;
1319 if(!read_u64(a, &windows_time))
1320 return ARCHIVE_EOF;
1321
1322 ntfs_to_unix(windows_time, sec, nsec);
1323 *extra_data_size -= 8;
1324 }
1325
1326 return ARCHIVE_OK;
1327 }
1328
parse_file_extra_version(struct archive_read * a,struct archive_entry * e,int64_t * extra_data_size)1329 static int parse_file_extra_version(struct archive_read* a,
1330 struct archive_entry* e, int64_t* extra_data_size)
1331 {
1332 size_t flags = 0;
1333 size_t version = 0;
1334 size_t value_len = 0;
1335 struct archive_string version_string;
1336 struct archive_string name_utf8_string;
1337 const char* cur_filename;
1338
1339 /* Flags are ignored. */
1340 if(!read_var_sized(a, &flags, &value_len))
1341 return ARCHIVE_EOF;
1342
1343 *extra_data_size -= value_len;
1344 if(ARCHIVE_OK != consume(a, value_len))
1345 return ARCHIVE_EOF;
1346
1347 if(!read_var_sized(a, &version, &value_len))
1348 return ARCHIVE_EOF;
1349
1350 *extra_data_size -= value_len;
1351 if(ARCHIVE_OK != consume(a, value_len))
1352 return ARCHIVE_EOF;
1353
1354 /* extra_data_size should be zero here. */
1355
1356 cur_filename = archive_entry_pathname_utf8(e);
1357 if(cur_filename == NULL) {
1358 archive_set_error(&a->archive, ARCHIVE_ERRNO_PROGRAMMER,
1359 "Version entry without file name");
1360 return ARCHIVE_FATAL;
1361 }
1362
1363 archive_string_init(&version_string);
1364 archive_string_init(&name_utf8_string);
1365
1366 /* Prepare a ;123 suffix for the filename, where '123' is the version
1367 * value of this file. */
1368 archive_string_sprintf(&version_string, ";%zu", version);
1369
1370 /* Build the new filename. */
1371 archive_strcat(&name_utf8_string, cur_filename);
1372 archive_strcat(&name_utf8_string, version_string.s);
1373
1374 /* Apply the new filename into this file's context. */
1375 archive_entry_update_pathname_utf8(e, name_utf8_string.s);
1376
1377 /* Free buffers. */
1378 archive_string_free(&version_string);
1379 archive_string_free(&name_utf8_string);
1380 return ARCHIVE_OK;
1381 }
1382
parse_file_extra_htime(struct archive_read * a,struct archive_entry * e,struct rar5 * rar,int64_t * extra_data_size)1383 static int parse_file_extra_htime(struct archive_read* a,
1384 struct archive_entry* e, struct rar5* rar, int64_t* extra_data_size)
1385 {
1386 char unix_time, has_unix_ns, has_mtime, has_ctime, has_atime;
1387 size_t flags = 0;
1388 size_t value_len;
1389
1390 enum HTIME_FLAGS {
1391 IS_UNIX = 0x01,
1392 HAS_MTIME = 0x02,
1393 HAS_CTIME = 0x04,
1394 HAS_ATIME = 0x08,
1395 HAS_UNIX_NS = 0x10,
1396 };
1397
1398 if(!read_var_sized(a, &flags, &value_len))
1399 return ARCHIVE_EOF;
1400
1401 *extra_data_size -= value_len;
1402 if(ARCHIVE_OK != consume(a, value_len)) {
1403 return ARCHIVE_EOF;
1404 }
1405
1406 unix_time = flags & IS_UNIX;
1407 has_unix_ns = unix_time && (flags & HAS_UNIX_NS);
1408 has_mtime = flags & HAS_MTIME;
1409 has_atime = flags & HAS_ATIME;
1410 has_ctime = flags & HAS_CTIME;
1411 rar->file.e_atime_ns = rar->file.e_ctime_ns = rar->file.e_mtime_ns = 0;
1412
1413 if(has_mtime) {
1414 parse_htime_item(a, unix_time, &rar->file.e_mtime,
1415 &rar->file.e_mtime_ns, extra_data_size);
1416 }
1417
1418 if(has_ctime) {
1419 parse_htime_item(a, unix_time, &rar->file.e_ctime,
1420 &rar->file.e_ctime_ns, extra_data_size);
1421 }
1422
1423 if(has_atime) {
1424 parse_htime_item(a, unix_time, &rar->file.e_atime,
1425 &rar->file.e_atime_ns, extra_data_size);
1426 }
1427
1428 if(has_mtime && has_unix_ns) {
1429 if(!read_u32(a, &rar->file.e_mtime_ns))
1430 return ARCHIVE_EOF;
1431
1432 *extra_data_size -= 4;
1433 }
1434
1435 if(has_ctime && has_unix_ns) {
1436 if(!read_u32(a, &rar->file.e_ctime_ns))
1437 return ARCHIVE_EOF;
1438
1439 *extra_data_size -= 4;
1440 }
1441
1442 if(has_atime && has_unix_ns) {
1443 if(!read_u32(a, &rar->file.e_atime_ns))
1444 return ARCHIVE_EOF;
1445
1446 *extra_data_size -= 4;
1447 }
1448
1449 /* The seconds and nanoseconds are either together, or separated in two
1450 * fields so we parse them, then set the archive_entry's times. */
1451 if(has_mtime) {
1452 archive_entry_set_mtime(e, rar->file.e_mtime, rar->file.e_mtime_ns);
1453 }
1454
1455 if(has_ctime) {
1456 archive_entry_set_ctime(e, rar->file.e_ctime, rar->file.e_ctime_ns);
1457 }
1458
1459 if(has_atime) {
1460 archive_entry_set_atime(e, rar->file.e_atime, rar->file.e_atime_ns);
1461 }
1462
1463 return ARCHIVE_OK;
1464 }
1465
parse_file_extra_redir(struct archive_read * a,struct archive_entry * e,struct rar5 * rar,int64_t * extra_data_size)1466 static int parse_file_extra_redir(struct archive_read* a,
1467 struct archive_entry* e, struct rar5* rar, int64_t* extra_data_size)
1468 {
1469 uint64_t value_size = 0;
1470 size_t target_size = 0;
1471 char target_utf8_buf[MAX_NAME_IN_BYTES];
1472 const uint8_t* p;
1473
1474 if(!read_var(a, &rar->file.redir_type, &value_size))
1475 return ARCHIVE_EOF;
1476 if(ARCHIVE_OK != consume(a, (int64_t)value_size))
1477 return ARCHIVE_EOF;
1478 *extra_data_size -= value_size;
1479
1480 if(!read_var(a, &rar->file.redir_flags, &value_size))
1481 return ARCHIVE_EOF;
1482 if(ARCHIVE_OK != consume(a, (int64_t)value_size))
1483 return ARCHIVE_EOF;
1484 *extra_data_size -= value_size;
1485
1486 if(!read_var_sized(a, &target_size, NULL))
1487 return ARCHIVE_EOF;
1488 *extra_data_size -= target_size + 1;
1489
1490 if(target_size > (MAX_NAME_IN_CHARS - 1)) {
1491 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
1492 "Link target is too long");
1493 return ARCHIVE_FATAL;
1494 }
1495
1496 if(target_size == 0) {
1497 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
1498 "No link target specified");
1499 return ARCHIVE_FATAL;
1500 }
1501
1502 if(!read_ahead(a, target_size, &p))
1503 return ARCHIVE_EOF;
1504
1505 memcpy(target_utf8_buf, p, target_size);
1506 target_utf8_buf[target_size] = 0;
1507
1508 if(ARCHIVE_OK != consume(a, (int64_t)target_size))
1509 return ARCHIVE_EOF;
1510
1511 switch(rar->file.redir_type) {
1512 case REDIR_TYPE_UNIXSYMLINK:
1513 case REDIR_TYPE_WINSYMLINK:
1514 archive_entry_set_filetype(e, AE_IFLNK);
1515 archive_entry_update_symlink_utf8(e, target_utf8_buf);
1516 if (rar->file.redir_flags & REDIR_SYMLINK_IS_DIR) {
1517 archive_entry_set_symlink_type(e,
1518 AE_SYMLINK_TYPE_DIRECTORY);
1519 } else {
1520 archive_entry_set_symlink_type(e,
1521 AE_SYMLINK_TYPE_FILE);
1522 }
1523 break;
1524
1525 case REDIR_TYPE_HARDLINK:
1526 archive_entry_set_filetype(e, AE_IFREG);
1527 archive_entry_update_hardlink_utf8(e, target_utf8_buf);
1528 break;
1529
1530 default:
1531 /* Unknown redir type, skip it. */
1532 break;
1533 }
1534 return ARCHIVE_OK;
1535 }
1536
parse_file_extra_owner(struct archive_read * a,struct archive_entry * e,int64_t * extra_data_size)1537 static int parse_file_extra_owner(struct archive_read* a,
1538 struct archive_entry* e, int64_t* extra_data_size)
1539 {
1540 uint64_t flags = 0;
1541 uint64_t value_size = 0;
1542 uint64_t id = 0;
1543 size_t name_len = 0;
1544 size_t name_size = 0;
1545 char namebuf[OWNER_MAXNAMELEN];
1546 const uint8_t* p;
1547
1548 if(!read_var(a, &flags, &value_size))
1549 return ARCHIVE_EOF;
1550 if(ARCHIVE_OK != consume(a, (int64_t)value_size))
1551 return ARCHIVE_EOF;
1552 *extra_data_size -= value_size;
1553
1554 if ((flags & OWNER_USER_NAME) != 0) {
1555 if(!read_var_sized(a, &name_size, NULL))
1556 return ARCHIVE_EOF;
1557 *extra_data_size -= name_size + 1;
1558
1559 if(!read_ahead(a, name_size, &p))
1560 return ARCHIVE_EOF;
1561
1562 if (name_size >= OWNER_MAXNAMELEN) {
1563 name_len = OWNER_MAXNAMELEN - 1;
1564 } else {
1565 name_len = name_size;
1566 }
1567
1568 memcpy(namebuf, p, name_len);
1569 namebuf[name_len] = 0;
1570 if(ARCHIVE_OK != consume(a, (int64_t)name_size))
1571 return ARCHIVE_EOF;
1572
1573 archive_entry_set_uname(e, namebuf);
1574 }
1575 if ((flags & OWNER_GROUP_NAME) != 0) {
1576 if(!read_var_sized(a, &name_size, NULL))
1577 return ARCHIVE_EOF;
1578 *extra_data_size -= name_size + 1;
1579
1580 if(!read_ahead(a, name_size, &p))
1581 return ARCHIVE_EOF;
1582
1583 if (name_size >= OWNER_MAXNAMELEN) {
1584 name_len = OWNER_MAXNAMELEN - 1;
1585 } else {
1586 name_len = name_size;
1587 }
1588
1589 memcpy(namebuf, p, name_len);
1590 namebuf[name_len] = 0;
1591 if(ARCHIVE_OK != consume(a, (int64_t)name_size))
1592 return ARCHIVE_EOF;
1593
1594 archive_entry_set_gname(e, namebuf);
1595 }
1596 if ((flags & OWNER_USER_UID) != 0) {
1597 if(!read_var(a, &id, &value_size))
1598 return ARCHIVE_EOF;
1599 if(ARCHIVE_OK != consume(a, (int64_t)value_size))
1600 return ARCHIVE_EOF;
1601 *extra_data_size -= value_size;
1602
1603 archive_entry_set_uid(e, (la_int64_t)id);
1604 }
1605 if ((flags & OWNER_GROUP_GID) != 0) {
1606 if(!read_var(a, &id, &value_size))
1607 return ARCHIVE_EOF;
1608 if(ARCHIVE_OK != consume(a, (int64_t)value_size))
1609 return ARCHIVE_EOF;
1610 *extra_data_size -= value_size;
1611
1612 archive_entry_set_gid(e, (la_int64_t)id);
1613 }
1614 return ARCHIVE_OK;
1615 }
1616
process_head_file_extra(struct archive_read * a,struct archive_entry * e,struct rar5 * rar,int64_t extra_data_size)1617 static int process_head_file_extra(struct archive_read* a,
1618 struct archive_entry* e, struct rar5* rar, int64_t extra_data_size)
1619 {
1620 uint64_t extra_field_size;
1621 uint64_t extra_field_id = 0;
1622 int ret = ARCHIVE_FATAL;
1623 uint64_t var_size;
1624
1625 while(extra_data_size > 0) {
1626 if(!read_var(a, &extra_field_size, &var_size))
1627 return ARCHIVE_EOF;
1628
1629 extra_data_size -= var_size;
1630 if(ARCHIVE_OK != consume(a, var_size)) {
1631 return ARCHIVE_EOF;
1632 }
1633
1634 if(!read_var(a, &extra_field_id, &var_size))
1635 return ARCHIVE_EOF;
1636
1637 extra_field_size -= var_size;
1638 extra_data_size -= var_size;
1639 if(ARCHIVE_OK != consume(a, var_size)) {
1640 return ARCHIVE_EOF;
1641 }
1642
1643 switch(extra_field_id) {
1644 case EX_HASH:
1645 ret = parse_file_extra_hash(a, rar,
1646 &extra_data_size);
1647 break;
1648 case EX_HTIME:
1649 ret = parse_file_extra_htime(a, e, rar,
1650 &extra_data_size);
1651 break;
1652 case EX_REDIR:
1653 ret = parse_file_extra_redir(a, e, rar,
1654 &extra_data_size);
1655 break;
1656 case EX_UOWNER:
1657 ret = parse_file_extra_owner(a, e,
1658 &extra_data_size);
1659 break;
1660 case EX_VERSION:
1661 ret = parse_file_extra_version(a, e,
1662 &extra_data_size);
1663 break;
1664 case EX_CRYPT:
1665 /* Mark the entry as encrypted */
1666 archive_entry_set_is_data_encrypted(e, 1);
1667 rar->has_encrypted_entries = 1;
1668 rar->cstate.data_encrypted = 1;
1669 /* fallthrough */
1670 case EX_SUBDATA:
1671 /* fallthrough */
1672 default:
1673 /* Skip unsupported entry. */
1674 extra_data_size -= extra_field_size;
1675 if (ARCHIVE_OK != consume(a, extra_field_size)) {
1676 return ARCHIVE_EOF;
1677 }
1678 }
1679 }
1680
1681 if(ret != ARCHIVE_OK) {
1682 /* Attribute not implemented. */
1683 return ret;
1684 }
1685
1686 return ARCHIVE_OK;
1687 }
1688
process_head_file(struct archive_read * a,struct rar5 * rar,struct archive_entry * entry,size_t block_flags)1689 static int process_head_file(struct archive_read* a, struct rar5* rar,
1690 struct archive_entry* entry, size_t block_flags)
1691 {
1692 int64_t extra_data_size = 0;
1693 size_t data_size = 0;
1694 size_t file_flags = 0;
1695 size_t file_attr = 0;
1696 size_t compression_info = 0;
1697 size_t host_os = 0;
1698 size_t name_size = 0;
1699 uint64_t unpacked_size, window_size;
1700 uint32_t mtime = 0, crc = 0;
1701 int c_method = 0, c_version = 0;
1702 char name_utf8_buf[MAX_NAME_IN_BYTES];
1703 const uint8_t* p;
1704
1705 enum FILE_FLAGS {
1706 DIRECTORY = 0x0001, UTIME = 0x0002, CRC32 = 0x0004,
1707 UNKNOWN_UNPACKED_SIZE = 0x0008,
1708 };
1709
1710 enum FILE_ATTRS {
1711 ATTR_READONLY = 0x1, ATTR_HIDDEN = 0x2, ATTR_SYSTEM = 0x4,
1712 ATTR_DIRECTORY = 0x10,
1713 };
1714
1715 enum COMP_INFO_FLAGS {
1716 SOLID = 0x0040,
1717 };
1718
1719 enum HOST_OS {
1720 HOST_WINDOWS = 0,
1721 HOST_UNIX = 1,
1722 };
1723
1724 archive_entry_clear(entry);
1725
1726 /* Do not reset file context if we're switching archives. */
1727 if(!rar->cstate.switch_multivolume) {
1728 reset_file_context(rar);
1729 }
1730
1731 if(block_flags & HFL_EXTRA_DATA) {
1732 uint64_t edata_size = 0;
1733 if(!read_var(a, &edata_size, NULL))
1734 return ARCHIVE_EOF;
1735
1736 /* Intentional type cast from unsigned to signed. */
1737 extra_data_size = (int64_t) edata_size;
1738 }
1739
1740 if(block_flags & HFL_DATA) {
1741 if(!read_var_sized(a, &data_size, NULL))
1742 return ARCHIVE_EOF;
1743
1744 rar->file.bytes_remaining = data_size;
1745 } else {
1746 rar->file.bytes_remaining = 0;
1747
1748 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
1749 "no data found in file/service block");
1750 return ARCHIVE_FATAL;
1751 }
1752
1753 if(!read_var_sized(a, &file_flags, NULL))
1754 return ARCHIVE_EOF;
1755
1756 if(!read_var(a, &unpacked_size, NULL))
1757 return ARCHIVE_EOF;
1758
1759 if(file_flags & UNKNOWN_UNPACKED_SIZE) {
1760 archive_set_error(&a->archive, ARCHIVE_ERRNO_PROGRAMMER,
1761 "Files with unknown unpacked size are not supported");
1762 return ARCHIVE_FATAL;
1763 }
1764
1765 rar->file.dir = (uint8_t) ((file_flags & DIRECTORY) > 0);
1766
1767 if(!read_var_sized(a, &file_attr, NULL))
1768 return ARCHIVE_EOF;
1769
1770 if(file_flags & UTIME) {
1771 if(!read_u32(a, &mtime))
1772 return ARCHIVE_EOF;
1773 }
1774
1775 if(file_flags & CRC32) {
1776 if(!read_u32(a, &crc))
1777 return ARCHIVE_EOF;
1778 }
1779
1780 if(!read_var_sized(a, &compression_info, NULL))
1781 return ARCHIVE_EOF;
1782
1783 c_method = (int) (compression_info >> 7) & 0x7;
1784 c_version = (int) (compression_info & 0x3f);
1785
1786 /* RAR5 seems to limit the dictionary size to 64MB. */
1787 window_size = (rar->file.dir > 0) ?
1788 0 :
1789 g_unpack_window_size << ((compression_info >> 10) & 15);
1790 rar->cstate.method = c_method;
1791 rar->cstate.version = c_version + 50;
1792 rar->file.solid = (compression_info & SOLID) > 0;
1793
1794 /* Archives which declare solid files without initializing the window
1795 * buffer first are invalid, unless previous data was encrypted, in
1796 * which case we may never have had the chance */
1797
1798 if(rar->file.solid > 0 && rar->cstate.data_encrypted == 0 &&
1799 rar->cstate.window_buf == NULL) {
1800 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
1801 "Declared solid file, but no window buffer "
1802 "initialized yet.");
1803 return ARCHIVE_FATAL;
1804 }
1805
1806 /* Check if window_size is a sane value. Also, if the file is not
1807 * declared as a directory, disallow window_size == 0. */
1808 if(window_size > (64 * 1024 * 1024) ||
1809 (rar->file.dir == 0 && window_size == 0))
1810 {
1811 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
1812 "Declared dictionary size is not supported.");
1813 return ARCHIVE_FATAL;
1814 }
1815
1816 if(rar->file.solid > 0) {
1817 /* Re-check if current window size is the same as previous
1818 * window size (for solid files only). */
1819 if(rar->file.solid_window_size > 0 &&
1820 rar->file.solid_window_size != (ssize_t) window_size)
1821 {
1822 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
1823 "Window size for this solid file doesn't match "
1824 "the window size used in previous solid file. ");
1825 return ARCHIVE_FATAL;
1826 }
1827 }
1828 else
1829 rar->cstate.data_encrypted = 0; /* Reset for new buffer */
1830
1831 if(rar->cstate.window_size < (ssize_t) window_size &&
1832 rar->cstate.window_buf)
1833 {
1834 /* The `data_ready` stack contains pointers to the `window_buf` or
1835 * `filtered_buf` buffers. Since we're about to reallocate the first
1836 * buffer, some of those pointers could become invalid. Therefore, we
1837 * need to dispose of all entries from the stack before attempting the
1838 * realloc. */
1839 clear_data_ready_stack(rar);
1840
1841 /* If window_buf has been allocated before, reallocate it, so
1842 * that its size will match new window_size. */
1843
1844 uint8_t* new_window_buf =
1845 realloc(rar->cstate.window_buf, (size_t) window_size);
1846
1847 if(!new_window_buf) {
1848 archive_set_error(&a->archive, ARCHIVE_ERRNO_PROGRAMMER,
1849 "Not enough memory when trying to realloc the window "
1850 "buffer.");
1851 return ARCHIVE_FATAL;
1852 }
1853
1854 rar->cstate.window_buf = new_window_buf;
1855 }
1856
1857 /* Values up to 64M should fit into ssize_t on every
1858 * architecture. */
1859 rar->cstate.window_size = (ssize_t) window_size;
1860
1861 if(rar->file.solid > 0 && rar->file.solid_window_size == 0) {
1862 /* Solid files have to have the same window_size across
1863 whole archive. Remember the window_size parameter
1864 for first solid file found. */
1865 rar->file.solid_window_size = rar->cstate.window_size;
1866 }
1867
1868 init_window_mask(rar);
1869
1870 rar->file.service = 0;
1871
1872 if(!read_var_sized(a, &host_os, NULL))
1873 return ARCHIVE_EOF;
1874
1875 if(host_os == HOST_WINDOWS) {
1876 /* Host OS is Windows */
1877
1878 __LA_MODE_T mode;
1879
1880 if(file_attr & ATTR_DIRECTORY) {
1881 if (file_attr & ATTR_READONLY) {
1882 mode = 0555 | AE_IFDIR;
1883 } else {
1884 mode = 0755 | AE_IFDIR;
1885 }
1886 } else {
1887 if (file_attr & ATTR_READONLY) {
1888 mode = 0444 | AE_IFREG;
1889 } else {
1890 mode = 0644 | AE_IFREG;
1891 }
1892 }
1893
1894 archive_entry_set_mode(entry, mode);
1895
1896 if (file_attr & (ATTR_READONLY | ATTR_HIDDEN | ATTR_SYSTEM)) {
1897 char *fflags_text, *ptr;
1898 /* allocate for ",rdonly,hidden,system" */
1899 fflags_text = malloc(22 * sizeof(*fflags_text));
1900 if (fflags_text != NULL) {
1901 ptr = fflags_text;
1902 if (file_attr & ATTR_READONLY) {
1903 strcpy(ptr, ",rdonly");
1904 ptr = ptr + 7;
1905 }
1906 if (file_attr & ATTR_HIDDEN) {
1907 strcpy(ptr, ",hidden");
1908 ptr = ptr + 7;
1909 }
1910 if (file_attr & ATTR_SYSTEM) {
1911 strcpy(ptr, ",system");
1912 ptr = ptr + 7;
1913 }
1914 if (ptr > fflags_text) {
1915 archive_entry_copy_fflags_text(entry,
1916 fflags_text + 1);
1917 }
1918 free(fflags_text);
1919 }
1920 }
1921 } else if(host_os == HOST_UNIX) {
1922 /* Host OS is Unix */
1923 archive_entry_set_mode(entry, (__LA_MODE_T) file_attr);
1924 } else {
1925 /* Unknown host OS */
1926 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
1927 "Unsupported Host OS: 0x%jx",
1928 (uintmax_t)host_os);
1929
1930 return ARCHIVE_FATAL;
1931 }
1932
1933 if(!read_var_sized(a, &name_size, NULL))
1934 return ARCHIVE_EOF;
1935
1936 if(name_size > (MAX_NAME_IN_CHARS - 1)) {
1937 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
1938 "Filename is too long");
1939
1940 return ARCHIVE_FATAL;
1941 }
1942
1943 if(name_size == 0) {
1944 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
1945 "No filename specified");
1946
1947 return ARCHIVE_FATAL;
1948 }
1949
1950 if(!read_ahead(a, name_size, &p))
1951 return ARCHIVE_EOF;
1952
1953 memcpy(name_utf8_buf, p, name_size);
1954 name_utf8_buf[name_size] = 0;
1955 if(ARCHIVE_OK != consume(a, name_size)) {
1956 return ARCHIVE_EOF;
1957 }
1958
1959 archive_entry_update_pathname_utf8(entry, name_utf8_buf);
1960
1961 if(extra_data_size > 0) {
1962 int ret = process_head_file_extra(a, entry, rar,
1963 extra_data_size);
1964
1965 /*
1966 * TODO: rewrite or remove useless sanity check
1967 * as extra_data_size is not passed as a pointer
1968 *
1969 if(extra_data_size < 0) {
1970 archive_set_error(&a->archive, ARCHIVE_ERRNO_PROGRAMMER,
1971 "File extra data size is not zero");
1972 return ARCHIVE_FATAL;
1973 }
1974 */
1975
1976 if(ret != ARCHIVE_OK)
1977 return ret;
1978 }
1979
1980 if((file_flags & UNKNOWN_UNPACKED_SIZE) == 0) {
1981 rar->file.unpacked_size = (ssize_t) unpacked_size;
1982 if(rar->file.redir_type == REDIR_TYPE_NONE)
1983 archive_entry_set_size(entry, unpacked_size);
1984 }
1985
1986 if(file_flags & UTIME) {
1987 archive_entry_set_mtime(entry, (time_t) mtime, 0);
1988 }
1989
1990 if(file_flags & CRC32) {
1991 rar->file.stored_crc32 = crc;
1992 }
1993
1994 if(!rar->cstate.switch_multivolume) {
1995 /* Do not reinitialize unpacking state if we're switching
1996 * archives. */
1997 rar->cstate.block_parsing_finished = 1;
1998 rar->cstate.all_filters_applied = 1;
1999 rar->cstate.initialized = 0;
2000 }
2001
2002 if(rar->generic.split_before > 0) {
2003 /* If now we're standing on a header that has a 'split before'
2004 * mark, it means we're standing on a 'continuation' file
2005 * header. Signal the caller that if it wants to move to
2006 * another file, it must call rar5_read_header() function
2007 * again. */
2008
2009 return ARCHIVE_RETRY;
2010 } else {
2011 return ARCHIVE_OK;
2012 }
2013 }
2014
process_head_service(struct archive_read * a,struct rar5 * rar,struct archive_entry * entry,size_t block_flags)2015 static int process_head_service(struct archive_read* a, struct rar5* rar,
2016 struct archive_entry* entry, size_t block_flags)
2017 {
2018 /* Process this SERVICE block the same way as FILE blocks. */
2019 int ret = process_head_file(a, rar, entry, block_flags);
2020 if(ret != ARCHIVE_OK)
2021 return ret;
2022
2023 rar->file.service = 1;
2024
2025 /* But skip the data part automatically. It's no use for the user
2026 * anyway. It contains only service data, not even needed to
2027 * properly unpack the file. */
2028 ret = rar5_read_data_skip(a);
2029 if(ret != ARCHIVE_OK)
2030 return ret;
2031
2032 /* After skipping, try parsing another block automatically. */
2033 return ARCHIVE_RETRY;
2034 }
2035
process_head_main(struct archive_read * a,struct rar5 * rar,struct archive_entry * entry,size_t block_flags)2036 static int process_head_main(struct archive_read* a, struct rar5* rar,
2037 struct archive_entry* entry, size_t block_flags)
2038 {
2039 int ret;
2040 uint64_t extra_data_size = 0;
2041 size_t extra_field_size = 0;
2042 size_t extra_field_id = 0;
2043 size_t archive_flags = 0;
2044
2045 enum MAIN_FLAGS {
2046 VOLUME = 0x0001, /* multi-volume archive */
2047 VOLUME_NUMBER = 0x0002, /* volume number, first vol doesn't
2048 * have it */
2049 SOLID = 0x0004, /* solid archive */
2050 PROTECT = 0x0008, /* contains Recovery info */
2051 LOCK = 0x0010, /* readonly flag, not used */
2052 };
2053
2054 enum MAIN_EXTRA {
2055 // Just one attribute here.
2056 LOCATOR = 0x01,
2057 };
2058
2059 (void) entry;
2060
2061 if(block_flags & HFL_EXTRA_DATA) {
2062 if(!read_var(a, &extra_data_size, NULL))
2063 return ARCHIVE_EOF;
2064 } else {
2065 extra_data_size = 0;
2066 }
2067
2068 if(!read_var_sized(a, &archive_flags, NULL)) {
2069 return ARCHIVE_EOF;
2070 }
2071
2072 rar->main.volume = (archive_flags & VOLUME) > 0;
2073 rar->main.solid = (archive_flags & SOLID) > 0;
2074
2075 if(archive_flags & VOLUME_NUMBER) {
2076 size_t v = 0;
2077 if(!read_var_sized(a, &v, NULL)) {
2078 return ARCHIVE_EOF;
2079 }
2080
2081 if (v > UINT_MAX) {
2082 archive_set_error(&a->archive,
2083 ARCHIVE_ERRNO_FILE_FORMAT,
2084 "Invalid volume number");
2085 return ARCHIVE_FATAL;
2086 }
2087
2088 rar->main.vol_no = (unsigned int) v;
2089 } else {
2090 rar->main.vol_no = 0;
2091 }
2092
2093 if(rar->vol.expected_vol_no > 0 &&
2094 rar->main.vol_no != rar->vol.expected_vol_no)
2095 {
2096 /* Returning EOF instead of FATAL because of strange
2097 * libarchive behavior. When opening multiple files via
2098 * archive_read_open_filenames(), after reading up the whole
2099 * last file, the __archive_read_ahead function wraps up to
2100 * the first archive instead of returning EOF. */
2101 return ARCHIVE_EOF;
2102 }
2103
2104 if(extra_data_size == 0) {
2105 /* Early return. */
2106 return ARCHIVE_OK;
2107 }
2108
2109 if(!read_var_sized(a, &extra_field_size, NULL)) {
2110 return ARCHIVE_EOF;
2111 }
2112
2113 if(!read_var_sized(a, &extra_field_id, NULL)) {
2114 return ARCHIVE_EOF;
2115 }
2116
2117 if(extra_field_size == 0) {
2118 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
2119 "Invalid extra field size");
2120 return ARCHIVE_FATAL;
2121 }
2122
2123 switch(extra_field_id) {
2124 case LOCATOR:
2125 ret = process_main_locator_extra_block(a, rar);
2126 if(ret != ARCHIVE_OK) {
2127 /* Error while parsing main locator extra
2128 * block. */
2129 return ret;
2130 }
2131
2132 break;
2133 default:
2134 archive_set_error(&a->archive,
2135 ARCHIVE_ERRNO_FILE_FORMAT,
2136 "Unsupported extra type (0x%jx)",
2137 (uintmax_t)extra_field_id);
2138 return ARCHIVE_FATAL;
2139 }
2140
2141 return ARCHIVE_OK;
2142 }
2143
skip_unprocessed_bytes(struct archive_read * a)2144 static int skip_unprocessed_bytes(struct archive_read* a) {
2145 struct rar5* rar = get_context(a);
2146 int ret;
2147
2148 if(rar->file.bytes_remaining) {
2149 /* Use different skipping method in block merging mode than in
2150 * normal mode. If merge mode is active, rar5_read_data_skip
2151 * can't be used, because it could allow recursive use of
2152 * merge_block() * function, and this function doesn't support
2153 * recursive use. */
2154 if(rar->merge_mode) {
2155 /* Discard whole merged block. This is valid in solid
2156 * mode as well, because the code will discard blocks
2157 * only if those blocks are safe to discard (i.e.
2158 * they're not FILE blocks). */
2159 ret = consume(a, rar->file.bytes_remaining);
2160 if(ret != ARCHIVE_OK) {
2161 return ret;
2162 }
2163 rar->file.bytes_remaining = 0;
2164 } else {
2165 /* If we're not in merge mode, use safe skipping code.
2166 * This will ensure we'll handle solid archives
2167 * properly. */
2168 ret = rar5_read_data_skip(a);
2169 if(ret != ARCHIVE_OK) {
2170 return ret;
2171 }
2172 }
2173 }
2174
2175 return ARCHIVE_OK;
2176 }
2177
2178 static int scan_for_signature(struct archive_read* a);
2179
2180 /* Base block processing function. A 'base block' is a RARv5 header block
2181 * that tells the reader what kind of data is stored inside the block.
2182 *
2183 * From the birds-eye view a RAR file looks file this:
2184 *
2185 * <magic><base_block_1><base_block_2>...<base_block_n>
2186 *
2187 * There are a few types of base blocks. Those types are specified inside
2188 * the 'switch' statement in this function. For example purposes, I'll write
2189 * how a standard RARv5 file could look like here:
2190 *
2191 * <magic><MAIN><FILE><FILE><FILE><SERVICE><ENDARC>
2192 *
2193 * The structure above could describe an archive file with 3 files in it,
2194 * one service "QuickOpen" block (that is ignored by this parser), and an
2195 * end of file base block marker.
2196 *
2197 * If the file is stored in multiple archive files ("multiarchive"), it might
2198 * look like this:
2199 *
2200 * .part01.rar: <magic><MAIN><FILE><ENDARC>
2201 * .part02.rar: <magic><MAIN><FILE><ENDARC>
2202 * .part03.rar: <magic><MAIN><FILE><ENDARC>
2203 *
2204 * This example could describe 3 RAR files that contain ONE archived file.
2205 * Or it could describe 3 RAR files that contain 3 different files. Or 3
2206 * RAR files than contain 2 files. It all depends what metadata is stored in
2207 * the headers of <FILE> blocks.
2208 *
2209 * Each <FILE> block contains info about its size, the name of the file it's
2210 * storing inside, and whether this FILE block is a continuation block of
2211 * previous archive ('split before'), and is this FILE block should be
2212 * continued in another archive ('split after'). By parsing the 'split before'
2213 * and 'split after' flags, we're able to tell if multiple <FILE> base blocks
2214 * are describing one file, or multiple files (with the same filename, for
2215 * example).
2216 *
2217 * One thing to note is that if we're parsing the first <FILE> block, and
2218 * we see 'split after' flag, then we need to jump over to another <FILE>
2219 * block to be able to decompress rest of the data. To do this, we need
2220 * to skip the <ENDARC> block, then switch to another file, then skip the
2221 * <magic> block, <MAIN> block, and then we're standing on the proper
2222 * <FILE> block.
2223 */
2224
process_base_block(struct archive_read * a,struct archive_entry * entry)2225 static int process_base_block(struct archive_read* a,
2226 struct archive_entry* entry)
2227 {
2228 const size_t SMALLEST_RAR5_BLOCK_SIZE = 3;
2229
2230 struct rar5* rar = get_context(a);
2231 uint32_t hdr_crc, computed_crc;
2232 size_t raw_hdr_size = 0, hdr_size_len, hdr_size;
2233 size_t header_id = 0;
2234 size_t header_flags = 0;
2235 const uint8_t* p;
2236 int ret;
2237
2238 enum HEADER_TYPE {
2239 HEAD_MARK = 0x00, HEAD_MAIN = 0x01, HEAD_FILE = 0x02,
2240 HEAD_SERVICE = 0x03, HEAD_CRYPT = 0x04, HEAD_ENDARC = 0x05,
2241 HEAD_UNKNOWN = 0xff,
2242 };
2243
2244 /* Skip any unprocessed data for this file. */
2245 ret = skip_unprocessed_bytes(a);
2246 if(ret != ARCHIVE_OK)
2247 return ret;
2248
2249 /* Read the expected CRC32 checksum. */
2250 if(!read_u32(a, &hdr_crc)) {
2251 return ARCHIVE_EOF;
2252 }
2253
2254 /* Read header size. */
2255 if(!read_var_sized(a, &raw_hdr_size, &hdr_size_len)) {
2256 return ARCHIVE_EOF;
2257 }
2258
2259 hdr_size = raw_hdr_size + hdr_size_len;
2260
2261 /* Sanity check, maximum header size for RAR5 is 2MB. */
2262 if(hdr_size > (2 * 1024 * 1024)) {
2263 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
2264 "Base block header is too large");
2265
2266 return ARCHIVE_FATAL;
2267 }
2268
2269 /* Additional sanity checks to weed out invalid files. */
2270 if(raw_hdr_size == 0 || hdr_size_len == 0 ||
2271 hdr_size < SMALLEST_RAR5_BLOCK_SIZE)
2272 {
2273 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
2274 "Too small block encountered (%zu bytes)",
2275 raw_hdr_size);
2276
2277 return ARCHIVE_FATAL;
2278 }
2279
2280 /* Read the whole header data into memory, maximum memory use here is
2281 * 2MB. */
2282 if(!read_ahead(a, hdr_size, &p)) {
2283 return ARCHIVE_EOF;
2284 }
2285
2286 /* Verify the CRC32 of the header data. */
2287 computed_crc = (uint32_t) crc32(0, p, (int) hdr_size);
2288 if(computed_crc != hdr_crc) {
2289 #ifndef DONT_FAIL_ON_CRC_ERROR
2290 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
2291 "Header CRC error");
2292
2293 return ARCHIVE_FATAL;
2294 #endif
2295 }
2296
2297 /* If the checksum is OK, we proceed with parsing. */
2298 if(ARCHIVE_OK != consume(a, hdr_size_len)) {
2299 return ARCHIVE_EOF;
2300 }
2301
2302 if(!read_var_sized(a, &header_id, NULL))
2303 return ARCHIVE_EOF;
2304
2305 if(!read_var_sized(a, &header_flags, NULL))
2306 return ARCHIVE_EOF;
2307
2308 rar->generic.split_after = (header_flags & HFL_SPLIT_AFTER) > 0;
2309 rar->generic.split_before = (header_flags & HFL_SPLIT_BEFORE) > 0;
2310 rar->generic.size = (int)hdr_size;
2311 rar->generic.last_header_id = (int)header_id;
2312 rar->main.endarc = 0;
2313
2314 /* Those are possible header ids in RARv5. */
2315 switch(header_id) {
2316 case HEAD_MAIN:
2317 ret = process_head_main(a, rar, entry, header_flags);
2318
2319 /* Main header doesn't have any files in it, so it's
2320 * pointless to return to the caller. Retry to next
2321 * header, which should be HEAD_FILE/HEAD_SERVICE. */
2322 if(ret == ARCHIVE_OK)
2323 return ARCHIVE_RETRY;
2324
2325 return ret;
2326 case HEAD_SERVICE:
2327 ret = process_head_service(a, rar, entry, header_flags);
2328 return ret;
2329 case HEAD_FILE:
2330 ret = process_head_file(a, rar, entry, header_flags);
2331 return ret;
2332 case HEAD_CRYPT:
2333 archive_entry_set_is_metadata_encrypted(entry, 1);
2334 archive_entry_set_is_data_encrypted(entry, 1);
2335 rar->has_encrypted_entries = 1;
2336 rar->headers_are_encrypted = 1;
2337 archive_set_error(&a->archive,
2338 ARCHIVE_ERRNO_FILE_FORMAT,
2339 "Encryption is not supported");
2340 return ARCHIVE_FATAL;
2341 case HEAD_ENDARC:
2342 rar->main.endarc = 1;
2343
2344 /* After encountering an end of file marker, we need
2345 * to take into consideration if this archive is
2346 * continued in another file (i.e. is it part01.rar:
2347 * is there a part02.rar?) */
2348 if(rar->main.volume) {
2349 /* In case there is part02.rar, position the
2350 * read pointer in a proper place, so we can
2351 * resume parsing. */
2352 ret = scan_for_signature(a);
2353 if(ret == ARCHIVE_FATAL) {
2354 return ARCHIVE_EOF;
2355 } else {
2356 if(rar->vol.expected_vol_no ==
2357 UINT_MAX) {
2358 archive_set_error(&a->archive,
2359 ARCHIVE_ERRNO_FILE_FORMAT,
2360 "Header error");
2361 return ARCHIVE_FATAL;
2362 }
2363
2364 rar->vol.expected_vol_no =
2365 rar->main.vol_no + 1;
2366 return ARCHIVE_OK;
2367 }
2368 } else {
2369 return ARCHIVE_EOF;
2370 }
2371 case HEAD_MARK:
2372 return ARCHIVE_EOF;
2373 default:
2374 if((header_flags & HFL_SKIP_IF_UNKNOWN) == 0) {
2375 archive_set_error(&a->archive,
2376 ARCHIVE_ERRNO_FILE_FORMAT,
2377 "Header type error");
2378 return ARCHIVE_FATAL;
2379 } else {
2380 /* If the block is marked as 'skip if unknown',
2381 * do as the flag says: skip the block
2382 * instead on failing on it. */
2383 return ARCHIVE_RETRY;
2384 }
2385 }
2386
2387 #if !defined WIN32
2388 // Not reached.
2389 archive_set_error(&a->archive, ARCHIVE_ERRNO_PROGRAMMER,
2390 "Internal unpacker error");
2391 return ARCHIVE_FATAL;
2392 #endif
2393 }
2394
skip_base_block(struct archive_read * a)2395 static int skip_base_block(struct archive_read* a) {
2396 int ret;
2397 struct rar5* rar = get_context(a);
2398
2399 /* Create a new local archive_entry structure that will be operated on
2400 * by header reader; operations on this archive_entry will be discarded.
2401 */
2402 struct archive_entry* entry = archive_entry_new();
2403 ret = process_base_block(a, entry);
2404
2405 /* Discard operations on this archive_entry structure. */
2406 archive_entry_free(entry);
2407 if(ret == ARCHIVE_FATAL)
2408 return ret;
2409
2410 if(rar->generic.last_header_id == 2 && rar->generic.split_before > 0)
2411 return ARCHIVE_OK;
2412
2413 if(ret == ARCHIVE_OK)
2414 return ARCHIVE_RETRY;
2415 else
2416 return ret;
2417 }
2418
try_skip_sfx(struct archive_read * a)2419 static int try_skip_sfx(struct archive_read *a)
2420 {
2421 const char *p;
2422
2423 if ((p = __archive_read_ahead(a, 7, NULL)) == NULL)
2424 return ARCHIVE_EOF;
2425
2426 if ((p[0] == 'M' && p[1] == 'Z') || memcmp(p, "\x7F\x45LF", 4) == 0)
2427 {
2428 char signature[sizeof(rar5_signature_xor)];
2429 const void *h;
2430 const char *q;
2431 size_t skip, total = 0;
2432 ssize_t bytes, window = 4096;
2433
2434 rar5_signature(signature);
2435
2436 while (total + window <= (1024 * 512)) {
2437 h = __archive_read_ahead(a, window, &bytes);
2438 if (h == NULL) {
2439 /* Remaining bytes are less than window. */
2440 window >>= 1;
2441 if (window < 0x40)
2442 goto fatal;
2443 continue;
2444 }
2445 if (bytes < 0x40)
2446 goto fatal;
2447 p = h;
2448 q = p + bytes;
2449
2450 /*
2451 * Scan ahead until we find something that looks
2452 * like the RAR header.
2453 */
2454 while (p + 8 < q) {
2455 if (memcmp(p, signature, sizeof(signature)) == 0) {
2456 skip = p - (const char *)h;
2457 __archive_read_consume(a, skip);
2458 return (ARCHIVE_OK);
2459 }
2460 p += 0x10;
2461 }
2462 skip = p - (const char *)h;
2463 __archive_read_consume(a, skip);
2464 total += skip;
2465 }
2466 }
2467
2468 return ARCHIVE_OK;
2469 fatal:
2470 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
2471 "Couldn't find out RAR header");
2472 return (ARCHIVE_FATAL);
2473 }
2474
rar5_read_header(struct archive_read * a,struct archive_entry * entry)2475 static int rar5_read_header(struct archive_read *a,
2476 struct archive_entry *entry)
2477 {
2478 struct rar5* rar = get_context(a);
2479 int ret;
2480
2481 /*
2482 * It should be sufficient to call archive_read_next_header() for
2483 * a reader to determine if an entry is encrypted or not.
2484 */
2485 if (rar->has_encrypted_entries == ARCHIVE_READ_FORMAT_ENCRYPTION_DONT_KNOW) {
2486 rar->has_encrypted_entries = 0;
2487 }
2488
2489 if(rar->header_initialized == 0) {
2490 init_header(a);
2491 if ((ret = try_skip_sfx(a)) < ARCHIVE_WARN)
2492 return ret;
2493 rar->header_initialized = 1;
2494 }
2495
2496 if(rar->skipped_magic == 0) {
2497 if(ARCHIVE_OK != consume(a, sizeof(rar5_signature_xor))) {
2498 return ARCHIVE_EOF;
2499 }
2500
2501 rar->skipped_magic = 1;
2502 }
2503
2504 do {
2505 ret = process_base_block(a, entry);
2506 } while(ret == ARCHIVE_RETRY ||
2507 (rar->main.endarc > 0 && ret == ARCHIVE_OK));
2508
2509 return ret;
2510 }
2511
init_unpack(struct rar5 * rar)2512 static void init_unpack(struct rar5* rar) {
2513 rar->file.calculated_crc32 = 0;
2514 init_window_mask(rar);
2515
2516 free(rar->cstate.window_buf);
2517 free(rar->cstate.filtered_buf);
2518
2519 if(rar->cstate.window_size > 0) {
2520 rar->cstate.window_buf = calloc(1, rar->cstate.window_size);
2521 rar->cstate.filtered_buf = calloc(1, rar->cstate.window_size);
2522 } else {
2523 rar->cstate.window_buf = NULL;
2524 rar->cstate.filtered_buf = NULL;
2525 }
2526
2527 clear_data_ready_stack(rar);
2528
2529 rar->cstate.write_ptr = 0;
2530 rar->cstate.last_write_ptr = 0;
2531
2532 memset(&rar->cstate.bd, 0, sizeof(rar->cstate.bd));
2533 memset(&rar->cstate.ld, 0, sizeof(rar->cstate.ld));
2534 memset(&rar->cstate.dd, 0, sizeof(rar->cstate.dd));
2535 memset(&rar->cstate.ldd, 0, sizeof(rar->cstate.ldd));
2536 memset(&rar->cstate.rd, 0, sizeof(rar->cstate.rd));
2537 }
2538
update_crc(struct rar5 * rar,const uint8_t * p,size_t to_read)2539 static void update_crc(struct rar5* rar, const uint8_t* p, size_t to_read) {
2540 int verify_crc;
2541
2542 if(rar->skip_mode) {
2543 #if defined CHECK_CRC_ON_SOLID_SKIP
2544 verify_crc = 1;
2545 #else
2546 verify_crc = 0;
2547 #endif
2548 } else
2549 verify_crc = 1;
2550
2551 if(verify_crc) {
2552 /* Don't update CRC32 if the file doesn't have the
2553 * `stored_crc32` info filled in. */
2554 if(rar->file.stored_crc32 > 0) {
2555 rar->file.calculated_crc32 =
2556 crc32(rar->file.calculated_crc32, p, (unsigned int)to_read);
2557 }
2558
2559 /* Check if the file uses an optional BLAKE2sp checksum
2560 * algorithm. */
2561 if(rar->file.has_blake2 > 0) {
2562 /* Return value of the `update` function is always 0,
2563 * so we can explicitly ignore it here. */
2564 (void) blake2sp_update(&rar->file.b2state, p, to_read);
2565 }
2566 }
2567 }
2568
create_decode_tables(uint8_t * bit_length,struct decode_table * table,int size)2569 static int create_decode_tables(uint8_t* bit_length,
2570 struct decode_table* table, int size)
2571 {
2572 int code, upper_limit = 0, i, lc[16];
2573 uint32_t decode_pos_clone[rar5_countof(table->decode_pos)];
2574 ssize_t cur_len, quick_data_size;
2575
2576 memset(&lc, 0, sizeof(lc));
2577 memset(table->decode_num, 0, sizeof(table->decode_num));
2578 table->size = size;
2579 table->quick_bits = size == HUFF_NC ? 10 : 7;
2580
2581 for(i = 0; i < size; i++) {
2582 lc[bit_length[i] & 15]++;
2583 }
2584
2585 lc[0] = 0;
2586 table->decode_pos[0] = 0;
2587 table->decode_len[0] = 0;
2588
2589 for(i = 1; i < 16; i++) {
2590 upper_limit += lc[i];
2591
2592 table->decode_len[i] = upper_limit << (16 - i);
2593 table->decode_pos[i] = table->decode_pos[i - 1] + lc[i - 1];
2594
2595 upper_limit <<= 1;
2596 }
2597
2598 memcpy(decode_pos_clone, table->decode_pos, sizeof(decode_pos_clone));
2599
2600 for(i = 0; i < size; i++) {
2601 uint8_t clen = bit_length[i] & 15;
2602 if(clen > 0) {
2603 int last_pos = decode_pos_clone[clen];
2604 table->decode_num[last_pos] = i;
2605 decode_pos_clone[clen]++;
2606 }
2607 }
2608
2609 quick_data_size = (int64_t)1 << table->quick_bits;
2610 cur_len = 1;
2611 for(code = 0; code < quick_data_size; code++) {
2612 int bit_field = code << (16 - table->quick_bits);
2613 int dist, pos;
2614
2615 while(cur_len < rar5_countof(table->decode_len) &&
2616 bit_field >= table->decode_len[cur_len]) {
2617 cur_len++;
2618 }
2619
2620 table->quick_len[code] = (uint8_t) cur_len;
2621
2622 dist = bit_field - table->decode_len[cur_len - 1];
2623 dist >>= (16 - cur_len);
2624
2625 pos = table->decode_pos[cur_len & 15] + dist;
2626 if(cur_len < rar5_countof(table->decode_pos) && pos < size) {
2627 table->quick_num[code] = table->decode_num[pos];
2628 } else {
2629 table->quick_num[code] = 0;
2630 }
2631 }
2632
2633 return ARCHIVE_OK;
2634 }
2635
decode_number(struct archive_read * a,struct decode_table * table,const uint8_t * p,uint16_t * num)2636 static int decode_number(struct archive_read* a, struct decode_table* table,
2637 const uint8_t* p, uint16_t* num)
2638 {
2639 int i, bits, dist, ret;
2640 uint16_t bitfield;
2641 uint32_t pos;
2642 struct rar5* rar = get_context(a);
2643
2644 if(ARCHIVE_OK != (ret = read_bits_16(a, rar, p, &bitfield))) {
2645 return ret;
2646 }
2647
2648 bitfield &= 0xfffe;
2649
2650 if(bitfield < table->decode_len[table->quick_bits]) {
2651 int code = bitfield >> (16 - table->quick_bits);
2652 skip_bits(rar, table->quick_len[code]);
2653 *num = table->quick_num[code];
2654 return ARCHIVE_OK;
2655 }
2656
2657 bits = 15;
2658
2659 for(i = table->quick_bits + 1; i < 15; i++) {
2660 if(bitfield < table->decode_len[i]) {
2661 bits = i;
2662 break;
2663 }
2664 }
2665
2666 skip_bits(rar, bits);
2667
2668 dist = bitfield - table->decode_len[bits - 1];
2669 dist >>= (16 - bits);
2670 pos = table->decode_pos[bits] + dist;
2671
2672 if(pos >= table->size)
2673 pos = 0;
2674
2675 *num = table->decode_num[pos];
2676 return ARCHIVE_OK;
2677 }
2678
2679 /* Reads and parses Huffman tables from the beginning of the block. */
parse_tables(struct archive_read * a,struct rar5 * rar,const uint8_t * p)2680 static int parse_tables(struct archive_read* a, struct rar5* rar,
2681 const uint8_t* p)
2682 {
2683 int ret, value, i, w, idx = 0;
2684 uint8_t bit_length[HUFF_BC],
2685 table[HUFF_TABLE_SIZE],
2686 nibble_mask = 0xF0,
2687 nibble_shift = 4;
2688
2689 enum { ESCAPE = 15 };
2690
2691 /* The data for table generation is compressed using a simple RLE-like
2692 * algorithm when storing zeroes, so we need to unpack it first. */
2693 for(w = 0, i = 0; w < HUFF_BC;) {
2694 if(i >= rar->cstate.cur_block_size) {
2695 /* Truncated data, can't continue. */
2696 archive_set_error(&a->archive,
2697 ARCHIVE_ERRNO_FILE_FORMAT,
2698 "Truncated data in huffman tables");
2699 return ARCHIVE_FATAL;
2700 }
2701
2702 value = (p[i] & nibble_mask) >> nibble_shift;
2703
2704 if(nibble_mask == 0x0F)
2705 ++i;
2706
2707 nibble_mask ^= 0xFF;
2708 nibble_shift ^= 4;
2709
2710 /* Values smaller than 15 is data, so we write it directly.
2711 * Value 15 is a flag telling us that we need to unpack more
2712 * bytes. */
2713 if(value == ESCAPE) {
2714 value = (p[i] & nibble_mask) >> nibble_shift;
2715 if(nibble_mask == 0x0F)
2716 ++i;
2717 nibble_mask ^= 0xFF;
2718 nibble_shift ^= 4;
2719
2720 if(value == 0) {
2721 /* We sometimes need to write the actual value
2722 * of 15, so this case handles that. */
2723 bit_length[w++] = ESCAPE;
2724 } else {
2725 int k;
2726
2727 /* Fill zeroes. */
2728 for(k = 0; (k < value + 2) && (w < HUFF_BC);
2729 k++) {
2730 bit_length[w++] = 0;
2731 }
2732 }
2733 } else {
2734 bit_length[w++] = value;
2735 }
2736 }
2737
2738 rar->bits.in_addr = i;
2739 rar->bits.bit_addr = nibble_shift ^ 4;
2740
2741 ret = create_decode_tables(bit_length, &rar->cstate.bd, HUFF_BC);
2742 if(ret != ARCHIVE_OK) {
2743 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
2744 "Decoding huffman tables failed");
2745 return ARCHIVE_FATAL;
2746 }
2747
2748 for(i = 0; i < HUFF_TABLE_SIZE;) {
2749 uint16_t num;
2750
2751 ret = decode_number(a, &rar->cstate.bd, p, &num);
2752 if(ret != ARCHIVE_OK) {
2753 archive_set_error(&a->archive,
2754 ARCHIVE_ERRNO_FILE_FORMAT,
2755 "Decoding huffman tables failed");
2756 return ARCHIVE_FATAL;
2757 }
2758
2759 if(num < 16) {
2760 /* 0..15: store directly */
2761 table[i] = (uint8_t) num;
2762 i++;
2763 } else if(num < 18) {
2764 /* 16..17: repeat previous code */
2765 uint16_t n;
2766
2767 if(ARCHIVE_OK != (ret = read_bits_16(a, rar, p, &n)))
2768 return ret;
2769
2770 if(num == 16) {
2771 n >>= 13;
2772 n += 3;
2773 skip_bits(rar, 3);
2774 } else {
2775 n >>= 9;
2776 n += 11;
2777 skip_bits(rar, 7);
2778 }
2779
2780 if(i > 0) {
2781 while(n-- > 0 && i < HUFF_TABLE_SIZE) {
2782 table[i] = table[i - 1];
2783 i++;
2784 }
2785 } else {
2786 archive_set_error(&a->archive,
2787 ARCHIVE_ERRNO_FILE_FORMAT,
2788 "Unexpected error when decoding "
2789 "huffman tables");
2790 return ARCHIVE_FATAL;
2791 }
2792 } else {
2793 /* other codes: fill with zeroes `n` times */
2794 uint16_t n;
2795
2796 if(ARCHIVE_OK != (ret = read_bits_16(a, rar, p, &n)))
2797 return ret;
2798
2799 if(num == 18) {
2800 n >>= 13;
2801 n += 3;
2802 skip_bits(rar, 3);
2803 } else {
2804 n >>= 9;
2805 n += 11;
2806 skip_bits(rar, 7);
2807 }
2808
2809 while(n-- > 0 && i < HUFF_TABLE_SIZE)
2810 table[i++] = 0;
2811 }
2812 }
2813
2814 ret = create_decode_tables(&table[idx], &rar->cstate.ld, HUFF_NC);
2815 if(ret != ARCHIVE_OK) {
2816 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
2817 "Failed to create literal table");
2818 return ARCHIVE_FATAL;
2819 }
2820
2821 idx += HUFF_NC;
2822
2823 ret = create_decode_tables(&table[idx], &rar->cstate.dd, HUFF_DC);
2824 if(ret != ARCHIVE_OK) {
2825 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
2826 "Failed to create distance table");
2827 return ARCHIVE_FATAL;
2828 }
2829
2830 idx += HUFF_DC;
2831
2832 ret = create_decode_tables(&table[idx], &rar->cstate.ldd, HUFF_LDC);
2833 if(ret != ARCHIVE_OK) {
2834 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
2835 "Failed to create lower bits of distances table");
2836 return ARCHIVE_FATAL;
2837 }
2838
2839 idx += HUFF_LDC;
2840
2841 ret = create_decode_tables(&table[idx], &rar->cstate.rd, HUFF_RC);
2842 if(ret != ARCHIVE_OK) {
2843 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
2844 "Failed to create repeating distances table");
2845 return ARCHIVE_FATAL;
2846 }
2847
2848 return ARCHIVE_OK;
2849 }
2850
2851 /* Parses the block header, verifies its CRC byte, and saves the header
2852 * fields inside the `hdr` pointer. */
parse_block_header(struct archive_read * a,const uint8_t * p,ssize_t * block_size,struct compressed_block_header * hdr)2853 static int parse_block_header(struct archive_read* a, const uint8_t* p,
2854 ssize_t* block_size, struct compressed_block_header* hdr)
2855 {
2856 uint8_t calculated_cksum;
2857 memcpy(hdr, p, sizeof(struct compressed_block_header));
2858
2859 if(bf_byte_count(hdr) > 2) {
2860 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
2861 "Unsupported block header size (was %d, max is 2)",
2862 bf_byte_count(hdr));
2863 return ARCHIVE_FATAL;
2864 }
2865
2866 /* This should probably use bit reader interface in order to be more
2867 * future-proof. */
2868 *block_size = 0;
2869 switch(bf_byte_count(hdr)) {
2870 /* 1-byte block size */
2871 case 0:
2872 *block_size = *(const uint8_t*) &p[2];
2873 break;
2874
2875 /* 2-byte block size */
2876 case 1:
2877 *block_size = archive_le16dec(&p[2]);
2878 break;
2879
2880 /* 3-byte block size */
2881 case 2:
2882 *block_size = archive_le32dec(&p[2]);
2883 *block_size &= 0x00FFFFFF;
2884 break;
2885
2886 /* Other block sizes are not supported. This case is not
2887 * reached, because we have an 'if' guard before the switch
2888 * that makes sure of it. */
2889 default:
2890 return ARCHIVE_FATAL;
2891 }
2892
2893 /* Verify the block header checksum. 0x5A is a magic value and is
2894 * always * constant. */
2895 calculated_cksum = 0x5A
2896 ^ (uint8_t) hdr->block_flags_u8
2897 ^ (uint8_t) *block_size
2898 ^ (uint8_t) (*block_size >> 8)
2899 ^ (uint8_t) (*block_size >> 16);
2900
2901 if(calculated_cksum != hdr->block_cksum) {
2902 #ifndef DONT_FAIL_ON_CRC_ERROR
2903 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
2904 "Block checksum error: got 0x%x, expected 0x%x",
2905 hdr->block_cksum, calculated_cksum);
2906
2907 return ARCHIVE_FATAL;
2908 #endif
2909 }
2910
2911 return ARCHIVE_OK;
2912 }
2913
2914 /* Convenience function used during filter processing. */
parse_filter_data(struct archive_read * a,struct rar5 * rar,const uint8_t * p,uint32_t * filter_data)2915 static int parse_filter_data(struct archive_read* a, struct rar5* rar,
2916 const uint8_t* p, uint32_t* filter_data)
2917 {
2918 int i, bytes, ret;
2919 uint32_t data = 0;
2920
2921 if(ARCHIVE_OK != (ret = read_consume_bits(a, rar, p, 2, &bytes)))
2922 return ret;
2923
2924 bytes++;
2925
2926 for(i = 0; i < bytes; i++) {
2927 uint16_t byte;
2928
2929 if(ARCHIVE_OK != (ret = read_bits_16(a, rar, p, &byte))) {
2930 return ret;
2931 }
2932
2933 /* Cast to uint32_t will ensure the shift operation will not
2934 * produce undefined result. */
2935 data += ((uint32_t) byte >> 8) << (i * 8);
2936 skip_bits(rar, 8);
2937 }
2938
2939 *filter_data = data;
2940 return ARCHIVE_OK;
2941 }
2942
2943 /* Function is used during sanity checking. */
is_valid_filter_block_start(struct rar5 * rar,uint32_t start)2944 static int is_valid_filter_block_start(struct rar5* rar,
2945 uint32_t start)
2946 {
2947 const int64_t block_start = (ssize_t) start + rar->cstate.write_ptr;
2948 const int64_t last_bs = rar->cstate.last_block_start;
2949 const ssize_t last_bl = rar->cstate.last_block_length;
2950
2951 if(last_bs == 0 || last_bl == 0) {
2952 /* We didn't have any filters yet, so accept this offset. */
2953 return 1;
2954 }
2955
2956 if(block_start >= last_bs + last_bl) {
2957 /* Current offset is bigger than last block's end offset, so
2958 * accept current offset. */
2959 return 1;
2960 }
2961
2962 /* Any other case is not a normal situation and we should fail. */
2963 return 0;
2964 }
2965
2966 /* The function will create a new filter, read its parameters from the input
2967 * stream and add it to the filter collection. */
parse_filter(struct archive_read * ar,const uint8_t * p)2968 static int parse_filter(struct archive_read* ar, const uint8_t* p) {
2969 uint32_t block_start, block_length;
2970 uint16_t filter_type;
2971 struct filter_info* filt = NULL;
2972 struct rar5* rar = get_context(ar);
2973 int ret;
2974
2975 /* Read the parameters from the input stream. */
2976 if(ARCHIVE_OK != (ret = parse_filter_data(ar, rar, p, &block_start)))
2977 return ret;
2978
2979 if(ARCHIVE_OK != (ret = parse_filter_data(ar, rar, p, &block_length)))
2980 return ret;
2981
2982 if(ARCHIVE_OK != (ret = read_bits_16(ar, rar, p, &filter_type)))
2983 return ret;
2984
2985 filter_type >>= 13;
2986 skip_bits(rar, 3);
2987
2988 /* Perform some sanity checks on this filter parameters. Note that we
2989 * allow only DELTA, E8/E9 and ARM filters here, because rest of
2990 * filters are not used in RARv5. */
2991
2992 if(block_length < 4 ||
2993 block_length > 0x400000 ||
2994 filter_type > FILTER_ARM ||
2995 !is_valid_filter_block_start(rar, block_start))
2996 {
2997 archive_set_error(&ar->archive, ARCHIVE_ERRNO_FILE_FORMAT,
2998 "Invalid filter encountered");
2999 return ARCHIVE_FATAL;
3000 }
3001
3002 /* Allocate a new filter. */
3003 filt = add_new_filter(rar);
3004 if(filt == NULL) {
3005 archive_set_error(&ar->archive, ENOMEM,
3006 "Can't allocate memory for a filter descriptor.");
3007 return ARCHIVE_FATAL;
3008 }
3009
3010 filt->type = filter_type;
3011 filt->block_start = rar->cstate.write_ptr + block_start;
3012 filt->block_length = block_length;
3013
3014 rar->cstate.last_block_start = filt->block_start;
3015 rar->cstate.last_block_length = filt->block_length;
3016
3017 /* Read some more data in case this is a DELTA filter. Other filter
3018 * types don't require any additional data over what was already
3019 * read. */
3020 if(filter_type == FILTER_DELTA) {
3021 int channels;
3022
3023 if(ARCHIVE_OK != (ret = read_consume_bits(ar, rar, p, 5, &channels)))
3024 return ret;
3025
3026 filt->channels = channels + 1;
3027 }
3028
3029 return ARCHIVE_OK;
3030 }
3031
decode_code_length(struct archive_read * a,struct rar5 * rar,const uint8_t * p,uint16_t code)3032 static int decode_code_length(struct archive_read* a, struct rar5* rar,
3033 const uint8_t* p, uint16_t code)
3034 {
3035 int lbits, length = 2;
3036
3037 if(code < 8) {
3038 lbits = 0;
3039 length += code;
3040 } else {
3041 lbits = code / 4 - 1;
3042 length += (4 | (code & 3)) << lbits;
3043 }
3044
3045 if(lbits > 0) {
3046 int add;
3047
3048 if(ARCHIVE_OK != read_consume_bits(a, rar, p, lbits, &add))
3049 return -1;
3050
3051 length += add;
3052 }
3053
3054 return length;
3055 }
3056
copy_string(struct archive_read * a,int len,int dist)3057 static int copy_string(struct archive_read* a, int len, int dist) {
3058 struct rar5* rar = get_context(a);
3059 const ssize_t cmask = rar->cstate.window_mask;
3060 const uint64_t write_ptr = rar->cstate.write_ptr +
3061 rar->cstate.solid_offset;
3062 int i;
3063
3064 if (rar->cstate.window_buf == NULL)
3065 return ARCHIVE_FATAL;
3066
3067 /* The unpacker spends most of the time in this function. It would be
3068 * a good idea to introduce some optimizations here.
3069 *
3070 * Just remember that this loop treats buffers that overlap differently
3071 * than buffers that do not overlap. This is why a simple memcpy(3)
3072 * call will not be enough. */
3073
3074 for(i = 0; i < len; i++) {
3075 const ssize_t write_idx = (write_ptr + i) & cmask;
3076 const ssize_t read_idx = (write_ptr + i - dist) & cmask;
3077 rar->cstate.window_buf[write_idx] =
3078 rar->cstate.window_buf[read_idx];
3079 }
3080
3081 rar->cstate.write_ptr += len;
3082 return ARCHIVE_OK;
3083 }
3084
do_uncompress_block(struct archive_read * a,const uint8_t * p)3085 static int do_uncompress_block(struct archive_read* a, const uint8_t* p) {
3086 struct rar5* rar = get_context(a);
3087 uint16_t num;
3088 int ret;
3089
3090 const uint64_t cmask = rar->cstate.window_mask;
3091 const struct compressed_block_header* hdr = &rar->last_block_hdr;
3092 const uint8_t bit_size = 1 + bf_bit_size(hdr);
3093
3094 while(1) {
3095 if(rar->cstate.write_ptr - rar->cstate.last_write_ptr >
3096 (rar->cstate.window_size >> 1)) {
3097 /* Don't allow growing data by more than half of the
3098 * window size at a time. In such case, break the loop;
3099 * next call to this function will continue processing
3100 * from this moment. */
3101 break;
3102 }
3103
3104 if(rar->bits.in_addr > rar->cstate.cur_block_size - 1 ||
3105 (rar->bits.in_addr == rar->cstate.cur_block_size - 1 &&
3106 rar->bits.bit_addr >= bit_size))
3107 {
3108 /* If the program counter is here, it means the
3109 * function has finished processing the block. */
3110 rar->cstate.block_parsing_finished = 1;
3111 break;
3112 }
3113
3114 /* Decode the next literal. */
3115 if(ARCHIVE_OK != decode_number(a, &rar->cstate.ld, p, &num)) {
3116 return ARCHIVE_EOF;
3117 }
3118
3119 /* Num holds a decompression literal, or 'command code'.
3120 *
3121 * - Values lower than 256 are just bytes. Those codes
3122 * can be stored in the output buffer directly.
3123 *
3124 * - Code 256 defines a new filter, which is later used to
3125 * transform the data block accordingly to the filter type.
3126 * The data block needs to be fully uncompressed first.
3127 *
3128 * - Code bigger than 257 and smaller than 262 define
3129 * a repetition pattern that should be copied from
3130 * an already uncompressed chunk of data.
3131 */
3132
3133 if(num < 256) {
3134 /* Directly store the byte. */
3135 int64_t write_idx = rar->cstate.solid_offset +
3136 rar->cstate.write_ptr++;
3137
3138 rar->cstate.window_buf[write_idx & cmask] =
3139 (uint8_t) num;
3140 continue;
3141 } else if(num >= 262) {
3142 uint16_t dist_slot;
3143 int len = decode_code_length(a, rar, p, num - 262),
3144 dbits,
3145 dist = 1;
3146
3147 if(len == -1) {
3148 archive_set_error(&a->archive,
3149 ARCHIVE_ERRNO_PROGRAMMER,
3150 "Failed to decode the code length");
3151
3152 return ARCHIVE_FATAL;
3153 }
3154
3155 if(ARCHIVE_OK != decode_number(a, &rar->cstate.dd, p,
3156 &dist_slot))
3157 {
3158 archive_set_error(&a->archive,
3159 ARCHIVE_ERRNO_PROGRAMMER,
3160 "Failed to decode the distance slot");
3161
3162 return ARCHIVE_FATAL;
3163 }
3164
3165 if(dist_slot < 4) {
3166 dbits = 0;
3167 dist += dist_slot;
3168 } else {
3169 dbits = dist_slot / 2 - 1;
3170
3171 /* Cast to uint32_t will make sure the shift
3172 * left operation won't produce undefined
3173 * result. Then, the uint32_t type will
3174 * be implicitly casted to int. */
3175 dist += (uint32_t) (2 |
3176 (dist_slot & 1)) << dbits;
3177 }
3178
3179 if(dbits > 0) {
3180 if(dbits >= 4) {
3181 uint32_t add = 0;
3182 uint16_t low_dist;
3183
3184 if(dbits > 4) {
3185 if(ARCHIVE_OK != (ret = read_bits_32(
3186 a, rar, p, &add))) {
3187 /* Return EOF if we
3188 * can't read more
3189 * data. */
3190 return ret;
3191 }
3192
3193 skip_bits(rar, dbits - 4);
3194 add = (add >> (
3195 36 - dbits)) << 4;
3196 dist += add;
3197 }
3198
3199 if(ARCHIVE_OK != decode_number(a,
3200 &rar->cstate.ldd, p, &low_dist))
3201 {
3202 archive_set_error(&a->archive,
3203 ARCHIVE_ERRNO_PROGRAMMER,
3204 "Failed to decode the "
3205 "distance slot");
3206
3207 return ARCHIVE_FATAL;
3208 }
3209
3210 if(dist >= INT_MAX - low_dist - 1) {
3211 /* This only happens in
3212 * invalid archives. */
3213 archive_set_error(&a->archive,
3214 ARCHIVE_ERRNO_FILE_FORMAT,
3215 "Distance pointer "
3216 "overflow");
3217 return ARCHIVE_FATAL;
3218 }
3219
3220 dist += low_dist;
3221 } else {
3222 /* dbits is one of [0,1,2,3] */
3223 int add;
3224
3225 if(ARCHIVE_OK != (ret = read_consume_bits(a, rar,
3226 p, dbits, &add))) {
3227 /* Return EOF if we can't read
3228 * more data. */
3229 return ret;
3230 }
3231
3232 dist += add;
3233 }
3234 }
3235
3236 if(dist > 0x100) {
3237 len++;
3238
3239 if(dist > 0x2000) {
3240 len++;
3241
3242 if(dist > 0x40000) {
3243 len++;
3244 }
3245 }
3246 }
3247
3248 dist_cache_push(rar, dist);
3249 rar->cstate.last_len = len;
3250
3251 if(ARCHIVE_OK != copy_string(a, len, dist))
3252 return ARCHIVE_FATAL;
3253
3254 continue;
3255 } else if(num == 256) {
3256 /* Create a filter. */
3257 ret = parse_filter(a, p);
3258 if(ret != ARCHIVE_OK)
3259 return ret;
3260
3261 continue;
3262 } else if(num == 257) {
3263 if(rar->cstate.last_len != 0) {
3264 if(ARCHIVE_OK != copy_string(a,
3265 rar->cstate.last_len,
3266 rar->cstate.dist_cache[0]))
3267 {
3268 return ARCHIVE_FATAL;
3269 }
3270 }
3271
3272 continue;
3273 } else {
3274 /* num < 262 */
3275 const int idx = num - 258;
3276 const int dist = dist_cache_touch(rar, idx);
3277
3278 uint16_t len_slot;
3279 int len;
3280
3281 if(ARCHIVE_OK != decode_number(a, &rar->cstate.rd, p,
3282 &len_slot)) {
3283 return ARCHIVE_FATAL;
3284 }
3285
3286 len = decode_code_length(a, rar, p, len_slot);
3287 if (len == -1) {
3288 return ARCHIVE_FATAL;
3289 }
3290
3291 rar->cstate.last_len = len;
3292
3293 if(ARCHIVE_OK != copy_string(a, len, dist))
3294 return ARCHIVE_FATAL;
3295
3296 continue;
3297 }
3298 }
3299
3300 return ARCHIVE_OK;
3301 }
3302
3303 /* Binary search for the RARv5 signature. */
scan_for_signature(struct archive_read * a)3304 static int scan_for_signature(struct archive_read* a) {
3305 const uint8_t* p;
3306 const int chunk_size = 512;
3307 ssize_t i;
3308 char signature[sizeof(rar5_signature_xor)];
3309
3310 /* If we're here, it means we're on an 'unknown territory' data.
3311 * There's no indication what kind of data we're reading here.
3312 * It could be some text comment, any kind of binary data,
3313 * digital sign, dragons, etc.
3314 *
3315 * We want to find a valid RARv5 magic header inside this unknown
3316 * data. */
3317
3318 /* Is it possible in libarchive to just skip everything until the
3319 * end of the file? If so, it would be a better approach than the
3320 * current implementation of this function. */
3321
3322 rar5_signature(signature);
3323
3324 while(1) {
3325 if(!read_ahead(a, chunk_size, &p))
3326 return ARCHIVE_EOF;
3327
3328 for(i = 0; i < chunk_size - (int)sizeof(rar5_signature_xor);
3329 i++) {
3330 if(memcmp(&p[i], signature,
3331 sizeof(rar5_signature_xor)) == 0) {
3332 /* Consume the number of bytes we've used to
3333 * search for the signature, as well as the
3334 * number of bytes used by the signature
3335 * itself. After this we should be standing
3336 * on a valid base block header. */
3337 (void) consume(a,
3338 i + sizeof(rar5_signature_xor));
3339 return ARCHIVE_OK;
3340 }
3341 }
3342
3343 consume(a, chunk_size);
3344 }
3345
3346 return ARCHIVE_FATAL;
3347 }
3348
3349 /* This function will switch the multivolume archive file to another file,
3350 * i.e. from part03 to part 04. */
advance_multivolume(struct archive_read * a)3351 static int advance_multivolume(struct archive_read* a) {
3352 int lret;
3353 struct rar5* rar = get_context(a);
3354
3355 /* A small state machine that will skip unnecessary data, needed to
3356 * switch from one multivolume to another. Such skipping is needed if
3357 * we want to be an stream-oriented (instead of file-oriented)
3358 * unpacker.
3359 *
3360 * The state machine starts with `rar->main.endarc` == 0. It also
3361 * assumes that current stream pointer points to some base block
3362 * header.
3363 *
3364 * The `endarc` field is being set when the base block parsing
3365 * function encounters the 'end of archive' marker.
3366 */
3367
3368 while(1) {
3369 if(rar->main.endarc == 1) {
3370 int looping = 1;
3371
3372 rar->main.endarc = 0;
3373
3374 while(looping) {
3375 lret = skip_base_block(a);
3376 switch(lret) {
3377 case ARCHIVE_RETRY:
3378 /* Continue looping. */
3379 break;
3380 case ARCHIVE_OK:
3381 /* Break loop. */
3382 looping = 0;
3383 break;
3384 default:
3385 /* Forward any errors to the
3386 * caller. */
3387 return lret;
3388 }
3389 }
3390
3391 break;
3392 } else {
3393 /* Skip current base block. In order to properly skip
3394 * it, we really need to simply parse it and discard
3395 * the results. */
3396
3397 lret = skip_base_block(a);
3398 if(lret == ARCHIVE_FATAL || lret == ARCHIVE_FAILED)
3399 return lret;
3400
3401 /* The `skip_base_block` function tells us if we
3402 * should continue with skipping, or we should stop
3403 * skipping. We're trying to skip everything up to
3404 * a base FILE block. */
3405
3406 if(lret != ARCHIVE_RETRY) {
3407 /* If there was an error during skipping, or we
3408 * have just skipped a FILE base block... */
3409
3410 if(rar->main.endarc == 0) {
3411 return lret;
3412 } else {
3413 continue;
3414 }
3415 }
3416 }
3417 }
3418
3419 return ARCHIVE_OK;
3420 }
3421
3422 /* Merges the partial block from the first multivolume archive file, and
3423 * partial block from the second multivolume archive file. The result is
3424 * a chunk of memory containing the whole block, and the stream pointer
3425 * is advanced to the next block in the second multivolume archive file. */
merge_block(struct archive_read * a,ssize_t block_size,const uint8_t ** p)3426 static int merge_block(struct archive_read* a, ssize_t block_size,
3427 const uint8_t** p)
3428 {
3429 struct rar5* rar = get_context(a);
3430 ssize_t cur_block_size, partial_offset = 0;
3431 const uint8_t* lp;
3432 int ret;
3433
3434 if(rar->merge_mode) {
3435 archive_set_error(&a->archive, ARCHIVE_ERRNO_PROGRAMMER,
3436 "Recursive merge is not allowed");
3437
3438 return ARCHIVE_FATAL;
3439 }
3440
3441 /* Set a flag that we're in the switching mode. */
3442 rar->cstate.switch_multivolume = 1;
3443
3444 /* Reallocate the memory which will hold the whole block. */
3445 if(rar->vol.push_buf)
3446 free((void*) rar->vol.push_buf);
3447
3448 /* Increasing the allocation block by 8 is due to bit reading functions,
3449 * which are using additional 2 or 4 bytes. Allocating the block size
3450 * by exact value would make bit reader perform reads from invalid
3451 * memory block when reading the last byte from the buffer. */
3452 rar->vol.push_buf = malloc(block_size + 8);
3453 if(!rar->vol.push_buf) {
3454 archive_set_error(&a->archive, ENOMEM,
3455 "Can't allocate memory for a merge block buffer.");
3456 return ARCHIVE_FATAL;
3457 }
3458
3459 /* Valgrind complains if the extension block for bit reader is not
3460 * initialized, so initialize it. */
3461 memset(&rar->vol.push_buf[block_size], 0, 8);
3462
3463 /* A single block can span across multiple multivolume archive files,
3464 * so we use a loop here. This loop will consume enough multivolume
3465 * archive files until the whole block is read. */
3466
3467 while(1) {
3468 /* Get the size of current block chunk in this multivolume
3469 * archive file and read it. */
3470 cur_block_size = rar5_min(rar->file.bytes_remaining,
3471 block_size - partial_offset);
3472
3473 if(cur_block_size == 0) {
3474 archive_set_error(&a->archive,
3475 ARCHIVE_ERRNO_FILE_FORMAT,
3476 "Encountered block size == 0 during block merge");
3477 return ARCHIVE_FATAL;
3478 }
3479
3480 if(!read_ahead(a, cur_block_size, &lp))
3481 return ARCHIVE_EOF;
3482
3483 /* Sanity check; there should never be a situation where this
3484 * function reads more data than the block's size. */
3485 if(partial_offset + cur_block_size > block_size) {
3486 archive_set_error(&a->archive,
3487 ARCHIVE_ERRNO_PROGRAMMER,
3488 "Consumed too much data when merging blocks.");
3489 return ARCHIVE_FATAL;
3490 }
3491
3492 /* Merge previous block chunk with current block chunk,
3493 * or create first block chunk if this is our first
3494 * iteration. */
3495 memcpy(&rar->vol.push_buf[partial_offset], lp, cur_block_size);
3496
3497 /* Advance the stream read pointer by this block chunk size. */
3498 if(ARCHIVE_OK != consume(a, cur_block_size))
3499 return ARCHIVE_EOF;
3500
3501 /* Update the pointers. `partial_offset` contains information
3502 * about the sum of merged block chunks. */
3503 partial_offset += cur_block_size;
3504 rar->file.bytes_remaining -= cur_block_size;
3505
3506 /* If `partial_offset` is the same as `block_size`, this means
3507 * we've merged all block chunks and we have a valid full
3508 * block. */
3509 if(partial_offset == block_size) {
3510 break;
3511 }
3512
3513 /* If we don't have any bytes to read, this means we should
3514 * switch to another multivolume archive file. */
3515 if(rar->file.bytes_remaining == 0) {
3516 rar->merge_mode++;
3517 ret = advance_multivolume(a);
3518 rar->merge_mode--;
3519 if(ret != ARCHIVE_OK) {
3520 return ret;
3521 }
3522 }
3523 }
3524
3525 *p = rar->vol.push_buf;
3526
3527 /* If we're here, we can resume unpacking by processing the block
3528 * pointed to by the `*p` memory pointer. */
3529
3530 return ARCHIVE_OK;
3531 }
3532
process_block(struct archive_read * a)3533 static int process_block(struct archive_read* a) {
3534 const uint8_t* p;
3535 struct rar5* rar = get_context(a);
3536 int ret;
3537
3538 /* If we don't have any data to be processed, this most probably means
3539 * we need to switch to the next volume. */
3540 if(rar->main.volume && rar->file.bytes_remaining == 0) {
3541 ret = advance_multivolume(a);
3542 if(ret != ARCHIVE_OK)
3543 return ret;
3544 }
3545
3546 if(rar->cstate.block_parsing_finished) {
3547 ssize_t block_size;
3548 ssize_t to_skip;
3549 ssize_t cur_block_size;
3550
3551 /* The header size won't be bigger than 6 bytes. */
3552 if(!read_ahead(a, 6, &p)) {
3553 /* Failed to prefetch data block header. */
3554 return ARCHIVE_EOF;
3555 }
3556
3557 /*
3558 * Read block_size by parsing block header. Validate the header
3559 * by calculating CRC byte stored inside the header. Size of
3560 * the header is not constant (block size can be stored either
3561 * in 1 or 2 bytes), that's why block size is left out from the
3562 * `compressed_block_header` structure and returned by
3563 * `parse_block_header` as the second argument. */
3564
3565 ret = parse_block_header(a, p, &block_size,
3566 &rar->last_block_hdr);
3567 if(ret != ARCHIVE_OK) {
3568 return ret;
3569 }
3570
3571 /* Skip block header. Next data is huffman tables,
3572 * if present. */
3573 to_skip = sizeof(struct compressed_block_header) +
3574 bf_byte_count(&rar->last_block_hdr) + 1;
3575
3576 if(ARCHIVE_OK != consume(a, to_skip))
3577 return ARCHIVE_EOF;
3578
3579 rar->file.bytes_remaining -= to_skip;
3580
3581 /* The block size gives information about the whole block size,
3582 * but the block could be stored in split form when using
3583 * multi-volume archives. In this case, the block size will be
3584 * bigger than the actual data stored in this file. Remaining
3585 * part of the data will be in another file. */
3586
3587 cur_block_size =
3588 rar5_min(rar->file.bytes_remaining, block_size);
3589
3590 if(block_size > rar->file.bytes_remaining) {
3591 /* If current blocks' size is bigger than our data
3592 * size, this means we have a multivolume archive.
3593 * In this case, skip all base headers until the end
3594 * of the file, proceed to next "partXXX.rar" volume,
3595 * find its signature, skip all headers up to the first
3596 * FILE base header, and continue from there.
3597 *
3598 * Note that `merge_block` will update the `rar`
3599 * context structure quite extensively. */
3600
3601 ret = merge_block(a, block_size, &p);
3602 if(ret != ARCHIVE_OK) {
3603 return ret;
3604 }
3605
3606 cur_block_size = block_size;
3607
3608 /* Current stream pointer should be now directly
3609 * *after* the block that spanned through multiple
3610 * archive files. `p` pointer should have the data of
3611 * the *whole* block (merged from partial blocks
3612 * stored in multiple archives files). */
3613 } else {
3614 rar->cstate.switch_multivolume = 0;
3615
3616 /* Read the whole block size into memory. This can take
3617 * up to 8 megabytes of memory in theoretical cases.
3618 * Might be worth to optimize this and use a standard
3619 * chunk of 4kb's. */
3620 if(!read_ahead(a, 4 + cur_block_size, &p)) {
3621 /* Failed to prefetch block data. */
3622 return ARCHIVE_EOF;
3623 }
3624 }
3625
3626 rar->cstate.block_buf = p;
3627 rar->cstate.cur_block_size = cur_block_size;
3628 rar->cstate.block_parsing_finished = 0;
3629
3630 rar->bits.in_addr = 0;
3631 rar->bits.bit_addr = 0;
3632
3633 if(bf_is_table_present(&rar->last_block_hdr)) {
3634 /* Load Huffman tables. */
3635 ret = parse_tables(a, rar, p);
3636 if(ret != ARCHIVE_OK) {
3637 /* Error during decompression of Huffman
3638 * tables. */
3639 return ret;
3640 }
3641 }
3642 } else {
3643 /* Block parsing not finished, reuse previous memory buffer. */
3644 p = rar->cstate.block_buf;
3645 }
3646
3647 /* Uncompress the block, or a part of it, depending on how many bytes
3648 * will be generated by uncompressing the block.
3649 *
3650 * In case too many bytes will be generated, calling this function
3651 * again will resume the uncompression operation. */
3652 ret = do_uncompress_block(a, p);
3653 if(ret != ARCHIVE_OK) {
3654 return ret;
3655 }
3656
3657 if(rar->cstate.block_parsing_finished &&
3658 rar->cstate.switch_multivolume == 0 &&
3659 rar->cstate.cur_block_size > 0)
3660 {
3661 /* If we're processing a normal block, consume the whole
3662 * block. We can do this because we've already read the whole
3663 * block to memory. */
3664 if(ARCHIVE_OK != consume(a, rar->cstate.cur_block_size))
3665 return ARCHIVE_FATAL;
3666
3667 rar->file.bytes_remaining -= rar->cstate.cur_block_size;
3668 } else if(rar->cstate.switch_multivolume) {
3669 /* Don't consume the block if we're doing multivolume
3670 * processing. The volume switching function will consume
3671 * the proper count of bytes instead. */
3672 rar->cstate.switch_multivolume = 0;
3673 }
3674
3675 return ARCHIVE_OK;
3676 }
3677
3678 /* Pops the `buf`, `size` and `offset` from the "data ready" stack.
3679 *
3680 * Returns ARCHIVE_OK when those arguments can be used, ARCHIVE_RETRY
3681 * when there is no data on the stack. */
use_data(struct rar5 * rar,const void ** buf,size_t * size,int64_t * offset)3682 static int use_data(struct rar5* rar, const void** buf, size_t* size,
3683 int64_t* offset)
3684 {
3685 int i;
3686
3687 for(i = 0; i < rar5_countof(rar->cstate.dready); i++) {
3688 struct data_ready *d = &rar->cstate.dready[i];
3689
3690 if(d->used) {
3691 if(buf) *buf = d->buf;
3692 if(size) *size = d->size;
3693 if(offset) *offset = d->offset;
3694
3695 d->used = 0;
3696 return ARCHIVE_OK;
3697 }
3698 }
3699
3700 return ARCHIVE_RETRY;
3701 }
3702
clear_data_ready_stack(struct rar5 * rar)3703 static void clear_data_ready_stack(struct rar5* rar) {
3704 memset(&rar->cstate.dready, 0, sizeof(rar->cstate.dready));
3705 }
3706
3707 /* Pushes the `buf`, `size` and `offset` arguments to the rar->cstate.dready
3708 * FIFO stack. Those values will be popped from this stack by the `use_data`
3709 * function. */
push_data_ready(struct archive_read * a,struct rar5 * rar,const uint8_t * buf,size_t size,int64_t offset)3710 static int push_data_ready(struct archive_read* a, struct rar5* rar,
3711 const uint8_t* buf, size_t size, int64_t offset)
3712 {
3713 int i;
3714
3715 /* Don't push if we're in skip mode. This is needed because solid
3716 * streams need full processing even if we're skipping data. After
3717 * fully processing the stream, we need to discard the generated bytes,
3718 * because we're interested only in the side effect: building up the
3719 * internal window circular buffer. This window buffer will be used
3720 * later during unpacking of requested data. */
3721 if(rar->skip_mode)
3722 return ARCHIVE_OK;
3723
3724 /* Sanity check. */
3725 if(offset != rar->file.last_offset + rar->file.last_size) {
3726 archive_set_error(&a->archive, ARCHIVE_ERRNO_PROGRAMMER,
3727 "Sanity check error: output stream is not continuous");
3728 return ARCHIVE_FATAL;
3729 }
3730
3731 for(i = 0; i < rar5_countof(rar->cstate.dready); i++) {
3732 struct data_ready* d = &rar->cstate.dready[i];
3733 if(!d->used) {
3734 d->used = 1;
3735 d->buf = buf;
3736 d->size = size;
3737 d->offset = offset;
3738
3739 /* These fields are used only in sanity checking. */
3740 rar->file.last_offset = offset;
3741 rar->file.last_size = size;
3742
3743 /* Calculate the checksum of this new block before
3744 * submitting data to libarchive's engine. */
3745 update_crc(rar, d->buf, d->size);
3746
3747 return ARCHIVE_OK;
3748 }
3749 }
3750
3751 /* Program counter will reach this code if the `rar->cstate.data_ready`
3752 * stack will be filled up so that no new entries will be allowed. The
3753 * code shouldn't allow such situation to occur. So we treat this case
3754 * as an internal error. */
3755
3756 archive_set_error(&a->archive, ARCHIVE_ERRNO_PROGRAMMER,
3757 "Error: premature end of data_ready stack");
3758 return ARCHIVE_FATAL;
3759 }
3760
3761 /* This function uncompresses the data that is stored in the <FILE> base
3762 * block.
3763 *
3764 * The FILE base block looks like this:
3765 *
3766 * <header><huffman tables><block_1><block_2>...<block_n>
3767 *
3768 * The <header> is a block header, that is parsed in parse_block_header().
3769 * It's a "compressed_block_header" structure, containing metadata needed
3770 * to know when we should stop looking for more <block_n> blocks.
3771 *
3772 * <huffman tables> contain data needed to set up the huffman tables, needed
3773 * for the actual decompression.
3774 *
3775 * Each <block_n> consists of series of literals:
3776 *
3777 * <literal><literal><literal>...<literal>
3778 *
3779 * Those literals generate the uncompression data. They operate on a circular
3780 * buffer, sometimes writing raw data into it, sometimes referencing
3781 * some previous data inside this buffer, and sometimes declaring a filter
3782 * that will need to be executed on the data stored in the circular buffer.
3783 * It all depends on the literal that is used.
3784 *
3785 * Sometimes blocks produce output data, sometimes they don't. For example, for
3786 * some huge files that use lots of filters, sometimes a block is filled with
3787 * only filter declaration literals. Such blocks won't produce any data in the
3788 * circular buffer.
3789 *
3790 * Sometimes blocks will produce 4 bytes of data, and sometimes 1 megabyte,
3791 * because a literal can reference previously decompressed data. For example,
3792 * there can be a literal that says: 'append a byte 0xFE here', and after
3793 * it another literal can say 'append 1 megabyte of data from circular buffer
3794 * offset 0x12345'. This is how RAR format handles compressing repeated
3795 * patterns.
3796 *
3797 * The RAR compressor creates those literals and the actual efficiency of
3798 * compression depends on what those literals are. The literals can also
3799 * be seen as a kind of a non-turing-complete virtual machine that simply
3800 * tells the decompressor what it should do.
3801 * */
3802
do_uncompress_file(struct archive_read * a)3803 static int do_uncompress_file(struct archive_read* a) {
3804 struct rar5* rar = get_context(a);
3805 int ret;
3806 int64_t max_end_pos;
3807
3808 if(!rar->cstate.initialized) {
3809 /* Don't perform full context reinitialization if we're
3810 * processing a solid archive. */
3811 if(!rar->main.solid || !rar->cstate.window_buf) {
3812 init_unpack(rar);
3813 }
3814
3815 rar->cstate.initialized = 1;
3816 }
3817
3818 /* Don't allow extraction if window_size is invalid. */
3819 if(rar->cstate.window_size == 0) {
3820 archive_set_error(&a->archive,
3821 ARCHIVE_ERRNO_FILE_FORMAT,
3822 "Invalid window size declaration in this file");
3823
3824 /* This should never happen in valid files. */
3825 return ARCHIVE_FATAL;
3826 }
3827
3828 if(rar->cstate.all_filters_applied == 1) {
3829 /* We use while(1) here, but standard case allows for just 1
3830 * iteration. The loop will iterate if process_block() didn't
3831 * generate any data at all. This can happen if the block
3832 * contains only filter definitions (this is common in big
3833 * files). */
3834 while(1) {
3835 ret = process_block(a);
3836 if(ret == ARCHIVE_EOF || ret == ARCHIVE_FATAL)
3837 return ret;
3838
3839 if(rar->cstate.last_write_ptr ==
3840 rar->cstate.write_ptr) {
3841 /* The block didn't generate any new data,
3842 * so just process a new block if this one
3843 * wasn't the last block in the file. */
3844 if (bf_is_last_block(&rar->last_block_hdr)) {
3845 return ARCHIVE_EOF;
3846 }
3847
3848 continue;
3849 }
3850
3851 /* The block has generated some new data, so break
3852 * the loop. */
3853 break;
3854 }
3855 }
3856
3857 /* Try to run filters. If filters won't be applied, it means that
3858 * insufficient data was generated. */
3859 ret = apply_filters(a);
3860 if(ret == ARCHIVE_RETRY) {
3861 return ARCHIVE_OK;
3862 } else if(ret == ARCHIVE_FATAL) {
3863 return ARCHIVE_FATAL;
3864 }
3865
3866 /* If apply_filters() will return ARCHIVE_OK, we can continue here. */
3867
3868 if(cdeque_size(&rar->cstate.filters) > 0) {
3869 /* Check if we can write something before hitting first
3870 * filter. */
3871 struct filter_info* flt;
3872
3873 /* Get the block_start offset from the first filter. */
3874 if(CDE_OK != cdeque_front(&rar->cstate.filters,
3875 cdeque_filter_p(&flt)))
3876 {
3877 archive_set_error(&a->archive,
3878 ARCHIVE_ERRNO_PROGRAMMER,
3879 "Can't read first filter");
3880 return ARCHIVE_FATAL;
3881 }
3882
3883 max_end_pos = rar5_min(flt->block_start,
3884 rar->cstate.write_ptr);
3885 } else {
3886 /* There are no filters defined, or all filters were applied.
3887 * This means we can just store the data without any
3888 * postprocessing. */
3889 max_end_pos = rar->cstate.write_ptr;
3890 }
3891
3892 if(max_end_pos == rar->cstate.last_write_ptr) {
3893 /* We can't write anything yet. The block uncompression
3894 * function did not generate enough data, and no filter can be
3895 * applied. At the same time we don't have any data that can be
3896 * stored without filter postprocessing. This means we need to
3897 * wait for more data to be generated, so we can apply the
3898 * filters.
3899 *
3900 * Signal the caller that we need more data to be able to do
3901 * anything.
3902 */
3903 return ARCHIVE_RETRY;
3904 } else {
3905 /* We can write the data before hitting the first filter.
3906 * So let's do it. The push_window_data() function will
3907 * effectively return the selected data block to the user
3908 * application. */
3909 push_window_data(a, rar, rar->cstate.last_write_ptr,
3910 max_end_pos);
3911 rar->cstate.last_write_ptr = max_end_pos;
3912 }
3913
3914 return ARCHIVE_OK;
3915 }
3916
uncompress_file(struct archive_read * a)3917 static int uncompress_file(struct archive_read* a) {
3918 int ret;
3919
3920 while(1) {
3921 /* Sometimes the uncompression function will return a
3922 * 'retry' signal. If this will happen, we have to retry
3923 * the function. */
3924 ret = do_uncompress_file(a);
3925 if(ret != ARCHIVE_RETRY)
3926 return ret;
3927 }
3928 }
3929
3930
do_unstore_file(struct archive_read * a,struct rar5 * rar,const void ** buf,size_t * size,int64_t * offset)3931 static int do_unstore_file(struct archive_read* a,
3932 struct rar5* rar, const void** buf, size_t* size, int64_t* offset)
3933 {
3934 size_t to_read;
3935 const uint8_t* p;
3936
3937 if(rar->file.bytes_remaining == 0 && rar->main.volume > 0 &&
3938 rar->generic.split_after > 0)
3939 {
3940 int ret;
3941
3942 rar->cstate.switch_multivolume = 1;
3943 ret = advance_multivolume(a);
3944 rar->cstate.switch_multivolume = 0;
3945
3946 if(ret != ARCHIVE_OK) {
3947 /* Failed to advance to next multivolume archive
3948 * file. */
3949 return ret;
3950 }
3951 }
3952
3953 to_read = rar5_min(rar->file.bytes_remaining, 64 * 1024);
3954 if(to_read == 0) {
3955 return ARCHIVE_EOF;
3956 }
3957
3958 if(!read_ahead(a, to_read, &p)) {
3959 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
3960 "I/O error when unstoring file");
3961 return ARCHIVE_FATAL;
3962 }
3963
3964 if(ARCHIVE_OK != consume(a, to_read)) {
3965 return ARCHIVE_EOF;
3966 }
3967
3968 if(buf) *buf = p;
3969 if(size) *size = to_read;
3970 if(offset) *offset = rar->cstate.last_unstore_ptr;
3971
3972 rar->file.bytes_remaining -= to_read;
3973 rar->cstate.last_unstore_ptr += to_read;
3974
3975 update_crc(rar, p, to_read);
3976 return ARCHIVE_OK;
3977 }
3978
do_unpack(struct archive_read * a,struct rar5 * rar,const void ** buf,size_t * size,int64_t * offset)3979 static int do_unpack(struct archive_read* a, struct rar5* rar,
3980 const void** buf, size_t* size, int64_t* offset)
3981 {
3982 enum COMPRESSION_METHOD {
3983 STORE = 0, FASTEST = 1, FAST = 2, NORMAL = 3, GOOD = 4,
3984 BEST = 5
3985 };
3986
3987 if(rar->file.service > 0) {
3988 return do_unstore_file(a, rar, buf, size, offset);
3989 } else {
3990 switch(rar->cstate.method) {
3991 case STORE:
3992 return do_unstore_file(a, rar, buf, size,
3993 offset);
3994 case FASTEST:
3995 /* fallthrough */
3996 case FAST:
3997 /* fallthrough */
3998 case NORMAL:
3999 /* fallthrough */
4000 case GOOD:
4001 /* fallthrough */
4002 case BEST:
4003 /* No data is returned here. But because a sparse-file aware
4004 * caller (like archive_read_data_into_fd) may treat zero-size
4005 * as a sparse file block, we need to update the offset
4006 * accordingly. At this point the decoder doesn't have any
4007 * pending uncompressed data blocks, so the current position in
4008 * the output file should be last_write_ptr. */
4009 if (offset) *offset = rar->cstate.last_write_ptr;
4010 return uncompress_file(a);
4011 default:
4012 archive_set_error(&a->archive,
4013 ARCHIVE_ERRNO_FILE_FORMAT,
4014 "Compression method not supported: 0x%x",
4015 (unsigned int)rar->cstate.method);
4016
4017 return ARCHIVE_FATAL;
4018 }
4019 }
4020
4021 #if !defined WIN32
4022 /* Not reached. */
4023 return ARCHIVE_OK;
4024 #endif
4025 }
4026
verify_checksums(struct archive_read * a)4027 static int verify_checksums(struct archive_read* a) {
4028 int verify_crc;
4029 struct rar5* rar = get_context(a);
4030
4031 /* Check checksums only when actually unpacking the data. There's no
4032 * need to calculate checksum when we're skipping data in solid archives
4033 * (skipping in solid archives is the same thing as unpacking compressed
4034 * data and discarding the result). */
4035
4036 if(!rar->skip_mode) {
4037 /* Always check checksums if we're not in skip mode */
4038 verify_crc = 1;
4039 } else {
4040 /* We can override the logic above with a compile-time option
4041 * NO_CRC_ON_SOLID_SKIP. This option is used during debugging,
4042 * and it will check checksums of unpacked data even when
4043 * we're skipping it. */
4044
4045 #if defined CHECK_CRC_ON_SOLID_SKIP
4046 /* Debug case */
4047 verify_crc = 1;
4048 #else
4049 /* Normal case */
4050 verify_crc = 0;
4051 #endif
4052 }
4053
4054 if(verify_crc) {
4055 /* During unpacking, on each unpacked block we're calling the
4056 * update_crc() function. Since we are here, the unpacking
4057 * process is already over and we can check if calculated
4058 * checksum (CRC32 or BLAKE2sp) is the same as what is stored
4059 * in the archive. */
4060 if(rar->file.stored_crc32 > 0) {
4061 /* Check CRC32 only when the file contains a CRC32
4062 * value for this file. */
4063
4064 if(rar->file.calculated_crc32 !=
4065 rar->file.stored_crc32) {
4066 /* Checksums do not match; the unpacked file
4067 * is corrupted. */
4068
4069 DEBUG_CODE {
4070 printf("Checksum error: CRC32 "
4071 "(was: %08" PRIx32 ", expected: %08" PRIx32 ")\n",
4072 rar->file.calculated_crc32,
4073 rar->file.stored_crc32);
4074 }
4075
4076 #ifndef DONT_FAIL_ON_CRC_ERROR
4077 archive_set_error(&a->archive,
4078 ARCHIVE_ERRNO_FILE_FORMAT,
4079 "Checksum error: CRC32");
4080 return ARCHIVE_FATAL;
4081 #endif
4082 } else {
4083 DEBUG_CODE {
4084 printf("Checksum OK: CRC32 "
4085 "(%08" PRIx32 "/%08" PRIx32 ")\n",
4086 rar->file.stored_crc32,
4087 rar->file.calculated_crc32);
4088 }
4089 }
4090 }
4091
4092 if(rar->file.has_blake2 > 0) {
4093 /* BLAKE2sp is an optional checksum algorithm that is
4094 * added to RARv5 archives when using the `-htb` switch
4095 * during creation of archive.
4096 *
4097 * We now finalize the hash calculation by calling the
4098 * `final` function. This will generate the final hash
4099 * value we can use to compare it with the BLAKE2sp
4100 * checksum that is stored in the archive.
4101 *
4102 * The return value of this `final` function is not
4103 * very helpful, as it guards only against improper use.
4104 * This is why we're explicitly ignoring it. */
4105
4106 uint8_t b2_buf[32];
4107 (void) blake2sp_final(&rar->file.b2state, b2_buf, 32);
4108
4109 if(memcmp(&rar->file.blake2sp, b2_buf, 32) != 0) {
4110 #ifndef DONT_FAIL_ON_CRC_ERROR
4111 archive_set_error(&a->archive,
4112 ARCHIVE_ERRNO_FILE_FORMAT,
4113 "Checksum error: BLAKE2");
4114
4115 return ARCHIVE_FATAL;
4116 #endif
4117 }
4118 }
4119 }
4120
4121 /* Finalization for this file has been successfully completed. */
4122 return ARCHIVE_OK;
4123 }
4124
verify_global_checksums(struct archive_read * a)4125 static int verify_global_checksums(struct archive_read* a) {
4126 return verify_checksums(a);
4127 }
4128
4129 /*
4130 * Decryption function for the magic signature pattern. Check the comment near
4131 * the `rar5_signature_xor` symbol to read the rationale behind this.
4132 */
rar5_signature(char * buf)4133 static void rar5_signature(char *buf) {
4134 size_t i;
4135
4136 for(i = 0; i < sizeof(rar5_signature_xor); i++) {
4137 buf[i] = rar5_signature_xor[i] ^ 0xA1;
4138 }
4139 }
4140
rar5_read_data(struct archive_read * a,const void ** buff,size_t * size,int64_t * offset)4141 static int rar5_read_data(struct archive_read *a, const void **buff,
4142 size_t *size, int64_t *offset) {
4143 int ret;
4144 struct rar5* rar = get_context(a);
4145
4146 if (size)
4147 *size = 0;
4148
4149 if (rar->has_encrypted_entries == ARCHIVE_READ_FORMAT_ENCRYPTION_DONT_KNOW) {
4150 rar->has_encrypted_entries = 0;
4151 }
4152
4153 if (rar->headers_are_encrypted || rar->cstate.data_encrypted) {
4154 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
4155 "Reading encrypted data is not currently supported");
4156 return ARCHIVE_FATAL;
4157 }
4158
4159 if(rar->file.dir > 0) {
4160 /* Don't process any data if this file entry was declared
4161 * as a directory. This is needed, because entries marked as
4162 * directory doesn't have any dictionary buffer allocated, so
4163 * it's impossible to perform any decompression. */
4164 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
4165 "Can't decompress an entry marked as a directory");
4166 return ARCHIVE_FAILED;
4167 }
4168
4169 if(!rar->skip_mode && (rar->cstate.last_write_ptr > rar->file.unpacked_size)) {
4170 archive_set_error(&a->archive, ARCHIVE_ERRNO_PROGRAMMER,
4171 "Unpacker has written too many bytes");
4172 return ARCHIVE_FATAL;
4173 }
4174
4175 ret = use_data(rar, buff, size, offset);
4176 if(ret == ARCHIVE_OK) {
4177 return ret;
4178 }
4179
4180 if(rar->file.eof == 1) {
4181 return ARCHIVE_EOF;
4182 }
4183
4184 ret = do_unpack(a, rar, buff, size, offset);
4185 if(ret != ARCHIVE_OK) {
4186 return ret;
4187 }
4188
4189 if(rar->file.bytes_remaining == 0 &&
4190 rar->cstate.last_write_ptr == rar->file.unpacked_size)
4191 {
4192 /* If all bytes of current file were processed, run
4193 * finalization.
4194 *
4195 * Finalization will check checksum against proper values. If
4196 * some of the checksums will not match, we'll return an error
4197 * value in the last `archive_read_data` call to signal an error
4198 * to the user. */
4199
4200 rar->file.eof = 1;
4201 return verify_global_checksums(a);
4202 }
4203
4204 return ARCHIVE_OK;
4205 }
4206
rar5_read_data_skip(struct archive_read * a)4207 static int rar5_read_data_skip(struct archive_read *a) {
4208 struct rar5* rar = get_context(a);
4209
4210 if(rar->main.solid && (rar->cstate.data_encrypted == 0)) {
4211 /* In solid archives, instead of skipping the data, we need to
4212 * extract it, and dispose the result. The side effect of this
4213 * operation will be setting up the initial window buffer state
4214 * needed to be able to extract the selected file. Note that
4215 * this is only possible when data withing this solid block is
4216 * not encrypted, in which case we'll skip and fail if the user
4217 * tries to read data. */
4218
4219 int ret;
4220
4221 /* Make sure to process all blocks in the compressed stream. */
4222 while(rar->file.bytes_remaining > 0) {
4223 /* Setting the "skip mode" will allow us to skip
4224 * checksum checks during data skipping. Checking the
4225 * checksum of skipped data isn't really necessary and
4226 * it's only slowing things down.
4227 *
4228 * This is incremented instead of setting to 1 because
4229 * this data skipping function can be called
4230 * recursively. */
4231 rar->skip_mode++;
4232
4233 /* We're disposing 1 block of data, so we use triple
4234 * NULLs in arguments. */
4235 ret = rar5_read_data(a, NULL, NULL, NULL);
4236
4237 /* Turn off "skip mode". */
4238 rar->skip_mode--;
4239
4240 if(ret < 0 || ret == ARCHIVE_EOF) {
4241 /* Propagate any potential error conditions
4242 * to the caller. */
4243 return ret;
4244 }
4245 }
4246 } else {
4247 /* In standard archives, we can just jump over the compressed
4248 * stream. Each file in non-solid archives starts from an empty
4249 * window buffer. */
4250
4251 if(ARCHIVE_OK != consume(a, rar->file.bytes_remaining)) {
4252 return ARCHIVE_FATAL;
4253 }
4254
4255 rar->file.bytes_remaining = 0;
4256 }
4257
4258 return ARCHIVE_OK;
4259 }
4260
rar5_seek_data(struct archive_read * a,int64_t offset,int whence)4261 static int64_t rar5_seek_data(struct archive_read *a, int64_t offset,
4262 int whence)
4263 {
4264 (void) a;
4265 (void) offset;
4266 (void) whence;
4267
4268 /* We're a streaming unpacker, and we don't support seeking. */
4269
4270 return ARCHIVE_FATAL;
4271 }
4272
rar5_cleanup(struct archive_read * a)4273 static int rar5_cleanup(struct archive_read *a) {
4274 struct rar5* rar = get_context(a);
4275
4276 free(rar->cstate.window_buf);
4277 free(rar->cstate.filtered_buf);
4278 clear_data_ready_stack(rar);
4279
4280 free(rar->vol.push_buf);
4281
4282 free_filters(rar);
4283 cdeque_free(&rar->cstate.filters);
4284
4285 free(rar);
4286 a->format->data = NULL;
4287
4288 return ARCHIVE_OK;
4289 }
4290
rar5_capabilities(struct archive_read * a)4291 static int rar5_capabilities(struct archive_read * a) {
4292 (void) a;
4293 return (ARCHIVE_READ_FORMAT_CAPS_ENCRYPT_DATA
4294 | ARCHIVE_READ_FORMAT_CAPS_ENCRYPT_METADATA);
4295 }
4296
rar5_has_encrypted_entries(struct archive_read * _a)4297 static int rar5_has_encrypted_entries(struct archive_read *_a) {
4298 if (_a && _a->format) {
4299 struct rar5 *rar = (struct rar5 *)_a->format->data;
4300 if (rar) {
4301 return rar->has_encrypted_entries;
4302 }
4303 }
4304
4305 return ARCHIVE_READ_FORMAT_ENCRYPTION_DONT_KNOW;
4306 }
4307
rar5_init(struct rar5 * rar)4308 static int rar5_init(struct rar5* rar) {
4309 memset(rar, 0, sizeof(struct rar5));
4310
4311 if(CDE_OK != cdeque_init(&rar->cstate.filters, 8192))
4312 return ARCHIVE_FATAL;
4313
4314 /*
4315 * Until enough data has been read, we cannot tell about
4316 * any encrypted entries yet.
4317 */
4318 rar->has_encrypted_entries = ARCHIVE_READ_FORMAT_ENCRYPTION_DONT_KNOW;
4319
4320 return ARCHIVE_OK;
4321 }
4322
archive_read_support_format_rar5(struct archive * _a)4323 int archive_read_support_format_rar5(struct archive *_a) {
4324 struct archive_read* ar;
4325 int ret;
4326 struct rar5* rar;
4327
4328 if(ARCHIVE_OK != (ret = get_archive_read(_a, &ar)))
4329 return ret;
4330
4331 rar = malloc(sizeof(*rar));
4332 if(rar == NULL) {
4333 archive_set_error(&ar->archive, ENOMEM,
4334 "Can't allocate rar5 data");
4335 return ARCHIVE_FATAL;
4336 }
4337
4338 if(ARCHIVE_OK != rar5_init(rar)) {
4339 archive_set_error(&ar->archive, ENOMEM,
4340 "Can't allocate rar5 filter buffer");
4341 free(rar);
4342 return ARCHIVE_FATAL;
4343 }
4344
4345 ret = __archive_read_register_format(ar,
4346 rar,
4347 "rar5",
4348 rar5_bid,
4349 rar5_options,
4350 rar5_read_header,
4351 rar5_read_data,
4352 rar5_read_data_skip,
4353 rar5_seek_data,
4354 rar5_cleanup,
4355 rar5_capabilities,
4356 rar5_has_encrypted_entries);
4357
4358 if(ret != ARCHIVE_OK) {
4359 (void) rar5_cleanup(ar);
4360 }
4361
4362 return ret;
4363 }
4364