1 /*-
2 * Copyright (c) 2018 Grzegorz Antoniak (http://antoniak.org)
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR
15 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
16 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
17 * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT,
18 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
19 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
20 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
21 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
23 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24 */
25
26 #include "archive_platform.h"
27 #include "archive_endian.h"
28
29 #ifdef HAVE_ERRNO_H
30 #include <errno.h>
31 #endif
32 #include <time.h>
33 #ifdef HAVE_ZLIB_H
34 #include <zlib.h> /* crc32 */
35 #endif
36 #ifdef HAVE_LIMITS_H
37 #include <limits.h>
38 #endif
39
40 #include "archive.h"
41 #ifndef HAVE_ZLIB_H
42 #include "archive_crc32.h"
43 #endif
44
45 #include "archive_entry.h"
46 #include "archive_entry_locale.h"
47 #include "archive_ppmd7_private.h"
48 #include "archive_entry_private.h"
49
50 #ifdef HAVE_BLAKE2_H
51 #include <blake2.h>
52 #else
53 #include "archive_blake2.h"
54 #endif
55
56 /*#define CHECK_CRC_ON_SOLID_SKIP*/
57 /*#define DONT_FAIL_ON_CRC_ERROR*/
58 /*#define DEBUG*/
59
60 #define rar5_min(a, b) (((a) > (b)) ? (b) : (a))
61 #define rar5_max(a, b) (((a) > (b)) ? (a) : (b))
62 #define rar5_countof(X) ((const ssize_t) (sizeof(X) / sizeof(*X)))
63
64 #if defined DEBUG
65 #define DEBUG_CODE if(1)
66 #define LOG(...) do { printf("rar5: " __VA_ARGS__); puts(""); } while(0)
67 #else
68 #define DEBUG_CODE if(0)
69 #endif
70
71 /* Real RAR5 magic number is:
72 *
73 * 0x52, 0x61, 0x72, 0x21, 0x1a, 0x07, 0x01, 0x00
74 * "Rar!→•☺·\x00"
75 *
76 * Retrieved with `rar5_signature()` by XOR'ing it with 0xA1, because I don't
77 * want to put this magic sequence in each binary that uses libarchive, so
78 * applications that scan through the file for this marker won't trigger on
79 * this "false" one.
80 *
81 * The array itself is decrypted in `rar5_init` function. */
82
83 static unsigned char rar5_signature_xor[] = { 243, 192, 211, 128, 187, 166, 160, 161 };
84 static const size_t g_unpack_window_size = 0x20000;
85
86 /* These could have been static const's, but they aren't, because of
87 * Visual Studio. */
88 #define MAX_NAME_IN_CHARS 2048
89 #define MAX_NAME_IN_BYTES (4 * MAX_NAME_IN_CHARS)
90
91 struct file_header {
92 ssize_t bytes_remaining;
93 ssize_t unpacked_size;
94 int64_t last_offset; /* Used in sanity checks. */
95 int64_t last_size; /* Used in sanity checks. */
96
97 uint8_t solid : 1; /* Is this a solid stream? */
98 uint8_t service : 1; /* Is this file a service data? */
99 uint8_t eof : 1; /* Did we finish unpacking the file? */
100 uint8_t dir : 1; /* Is this file entry a directory? */
101
102 /* Optional time fields. */
103 uint64_t e_mtime;
104 uint64_t e_ctime;
105 uint64_t e_atime;
106 uint32_t e_unix_ns;
107
108 /* Optional hash fields. */
109 uint32_t stored_crc32;
110 uint32_t calculated_crc32;
111 uint8_t blake2sp[32];
112 blake2sp_state b2state;
113 char has_blake2;
114
115 /* Optional redir fields */
116 uint64_t redir_type;
117 uint64_t redir_flags;
118
119 ssize_t solid_window_size; /* Used in file format check. */
120 };
121
122 enum EXTRA {
123 EX_CRYPT = 0x01,
124 EX_HASH = 0x02,
125 EX_HTIME = 0x03,
126 EX_VERSION = 0x04,
127 EX_REDIR = 0x05,
128 EX_UOWNER = 0x06,
129 EX_SUBDATA = 0x07
130 };
131
132 #define REDIR_SYMLINK_IS_DIR 1
133
134 enum REDIR_TYPE {
135 REDIR_TYPE_NONE = 0,
136 REDIR_TYPE_UNIXSYMLINK = 1,
137 REDIR_TYPE_WINSYMLINK = 2,
138 REDIR_TYPE_JUNCTION = 3,
139 REDIR_TYPE_HARDLINK = 4,
140 REDIR_TYPE_FILECOPY = 5,
141 };
142
143 #define OWNER_USER_NAME 0x01
144 #define OWNER_GROUP_NAME 0x02
145 #define OWNER_USER_UID 0x04
146 #define OWNER_GROUP_GID 0x08
147 #define OWNER_MAXNAMELEN 256
148
149 enum FILTER_TYPE {
150 FILTER_DELTA = 0, /* Generic pattern. */
151 FILTER_E8 = 1, /* Intel x86 code. */
152 FILTER_E8E9 = 2, /* Intel x86 code. */
153 FILTER_ARM = 3, /* ARM code. */
154 FILTER_AUDIO = 4, /* Audio filter, not used in RARv5. */
155 FILTER_RGB = 5, /* Color palette, not used in RARv5. */
156 FILTER_ITANIUM = 6, /* Intel's Itanium, not used in RARv5. */
157 FILTER_PPM = 7, /* Predictive pattern matching, not used in
158 RARv5. */
159 FILTER_NONE = 8,
160 };
161
162 struct filter_info {
163 int type;
164 int channels;
165 int pos_r;
166
167 int64_t block_start;
168 ssize_t block_length;
169 uint16_t width;
170 };
171
172 struct data_ready {
173 char used;
174 const uint8_t* buf;
175 size_t size;
176 int64_t offset;
177 };
178
179 struct cdeque {
180 uint16_t beg_pos;
181 uint16_t end_pos;
182 uint16_t cap_mask;
183 uint16_t size;
184 size_t* arr;
185 };
186
187 struct decode_table {
188 uint32_t size;
189 int32_t decode_len[16];
190 uint32_t decode_pos[16];
191 uint32_t quick_bits;
192 uint8_t quick_len[1 << 10];
193 uint16_t quick_num[1 << 10];
194 uint16_t decode_num[306];
195 };
196
197 struct comp_state {
198 /* Flag used to specify if unpacker needs to reinitialize the
199 uncompression context. */
200 uint8_t initialized : 1;
201
202 /* Flag used when applying filters. */
203 uint8_t all_filters_applied : 1;
204
205 /* Flag used to skip file context reinitialization, used when unpacker
206 is skipping through different multivolume archives. */
207 uint8_t switch_multivolume : 1;
208
209 /* Flag used to specify if unpacker has processed the whole data block
210 or just a part of it. */
211 uint8_t block_parsing_finished : 1;
212
213 /* Flag used to indicate that a previous file using this buffer was
214 encrypted, meaning no data in the buffer can be trusted */
215 uint8_t data_encrypted : 1;
216
217 signed int notused : 3;
218
219 int flags; /* Uncompression flags. */
220 int method; /* Uncompression algorithm method. */
221 int version; /* Uncompression algorithm version. */
222 ssize_t window_size; /* Size of window_buf. */
223 uint8_t* window_buf; /* Circular buffer used during
224 decompression. */
225 uint8_t* filtered_buf; /* Buffer used when applying filters. */
226 const uint8_t* block_buf; /* Buffer used when merging blocks. */
227 ssize_t window_mask; /* Convenience field; window_size - 1. */
228 int64_t write_ptr; /* This amount of data has been unpacked
229 in the window buffer. */
230 int64_t last_write_ptr; /* This amount of data has been stored in
231 the output file. */
232 int64_t last_unstore_ptr; /* Counter of bytes extracted during
233 unstoring. This is separate from
234 last_write_ptr because of how SERVICE
235 base blocks are handled during skipping
236 in solid multiarchive archives. */
237 int64_t solid_offset; /* Additional offset inside the window
238 buffer, used in unpacking solid
239 archives. */
240 ssize_t cur_block_size; /* Size of current data block. */
241 int last_len; /* Flag used in lzss decompression. */
242
243 /* Decode tables used during lzss uncompression. */
244
245 #define HUFF_BC 20
246 struct decode_table bd; /* huffman bit lengths */
247 #define HUFF_NC 306
248 struct decode_table ld; /* literals */
249 #define HUFF_DC 64
250 struct decode_table dd; /* distances */
251 #define HUFF_LDC 16
252 struct decode_table ldd; /* lower bits of distances */
253 #define HUFF_RC 44
254 struct decode_table rd; /* repeating distances */
255 #define HUFF_TABLE_SIZE (HUFF_NC + HUFF_DC + HUFF_RC + HUFF_LDC)
256
257 /* Circular deque for storing filters. */
258 struct cdeque filters;
259 int64_t last_block_start; /* Used for sanity checking. */
260 ssize_t last_block_length; /* Used for sanity checking. */
261
262 /* Distance cache used during lzss uncompression. */
263 int dist_cache[4];
264
265 /* Data buffer stack. */
266 struct data_ready dready[2];
267 };
268
269 /* Bit reader state. */
270 struct bit_reader {
271 int8_t bit_addr; /* Current bit pointer inside current byte. */
272 int in_addr; /* Current byte pointer. */
273 };
274
275 /* RARv5 block header structure. Use bf_* functions to get values from
276 * block_flags_u8 field. I.e. bf_byte_count, etc. */
277 struct compressed_block_header {
278 /* block_flags_u8 contain fields encoded in little-endian bitfield:
279 *
280 * - table present flag (shr 7, and 1),
281 * - last block flag (shr 6, and 1),
282 * - byte_count (shr 3, and 7),
283 * - bit_size (shr 0, and 7).
284 */
285 uint8_t block_flags_u8;
286 uint8_t block_cksum;
287 };
288
289 /* RARv5 main header structure. */
290 struct main_header {
291 /* Does the archive contain solid streams? */
292 uint8_t solid : 1;
293
294 /* If this a multi-file archive? */
295 uint8_t volume : 1;
296 uint8_t endarc : 1;
297 uint8_t notused : 5;
298
299 unsigned int vol_no;
300 };
301
302 struct generic_header {
303 uint8_t split_after : 1;
304 uint8_t split_before : 1;
305 uint8_t padding : 6;
306 int size;
307 int last_header_id;
308 };
309
310 struct multivolume {
311 unsigned int expected_vol_no;
312 uint8_t* push_buf;
313 };
314
315 /* Main context structure. */
316 struct rar5 {
317 int header_initialized;
318
319 /* Set to 1 if current file is positioned AFTER the magic value
320 * of the archive file. This is used in header reading functions. */
321 int skipped_magic;
322
323 /* Set to not zero if we're in skip mode (either by calling
324 * rar5_data_skip function or when skipping over solid streams).
325 * Set to 0 when in * extraction mode. This is used during checksum
326 * calculation functions. */
327 int skip_mode;
328
329 /* Set to not zero if we're in block merging mode (i.e. when switching
330 * to another file in multivolume archive, last block from 1st archive
331 * needs to be merged with 1st block from 2nd archive). This flag
332 * guards against recursive use of the merging function, which doesn't
333 * support recursive calls. */
334 int merge_mode;
335
336 /* An offset to QuickOpen list. This is not supported by this unpacker,
337 * because we're focusing on streaming interface. QuickOpen is designed
338 * to make things quicker for non-stream interfaces, so it's not our
339 * use case. */
340 uint64_t qlist_offset;
341
342 /* An offset to additional Recovery data. This is not supported by this
343 * unpacker. Recovery data are additional Reed-Solomon codes that could
344 * be used to calculate bytes that are missing in archive or are
345 * corrupted. */
346 uint64_t rr_offset;
347
348 /* Various context variables grouped to different structures. */
349 struct generic_header generic;
350 struct main_header main;
351 struct comp_state cstate;
352 struct file_header file;
353 struct bit_reader bits;
354 struct multivolume vol;
355
356 /* The header of currently processed RARv5 block. Used in main
357 * decompression logic loop. */
358 struct compressed_block_header last_block_hdr;
359
360 /*
361 * Custom field to denote that this archive contains encrypted entries
362 */
363 int has_encrypted_entries;
364 int headers_are_encrypted;
365 };
366
367 /* Forward function declarations. */
368
369 static void rar5_signature(char *buf);
370 static int verify_global_checksums(struct archive_read* a);
371 static int rar5_read_data_skip(struct archive_read *a);
372 static int push_data_ready(struct archive_read* a, struct rar5* rar,
373 const uint8_t* buf, size_t size, int64_t offset);
374 static void clear_data_ready_stack(struct rar5* rar);
375
376 /* CDE_xxx = Circular Double Ended (Queue) return values. */
377 enum CDE_RETURN_VALUES {
378 CDE_OK, CDE_ALLOC, CDE_PARAM, CDE_OUT_OF_BOUNDS,
379 };
380
381 /* Clears the contents of this circular deque. */
cdeque_clear(struct cdeque * d)382 static void cdeque_clear(struct cdeque* d) {
383 d->size = 0;
384 d->beg_pos = 0;
385 d->end_pos = 0;
386 }
387
388 /* Creates a new circular deque object. Capacity must be power of 2: 8, 16, 32,
389 * 64, 256, etc. When the user will add another item above current capacity,
390 * the circular deque will overwrite the oldest entry. */
cdeque_init(struct cdeque * d,int max_capacity_power_of_2)391 static int cdeque_init(struct cdeque* d, int max_capacity_power_of_2) {
392 if(d == NULL || max_capacity_power_of_2 == 0)
393 return CDE_PARAM;
394
395 d->cap_mask = max_capacity_power_of_2 - 1;
396 d->arr = NULL;
397
398 if((max_capacity_power_of_2 & d->cap_mask) != 0)
399 return CDE_PARAM;
400
401 cdeque_clear(d);
402 d->arr = malloc(sizeof(void*) * max_capacity_power_of_2);
403
404 return d->arr ? CDE_OK : CDE_ALLOC;
405 }
406
407 /* Return the current size (not capacity) of circular deque `d`. */
cdeque_size(struct cdeque * d)408 static size_t cdeque_size(struct cdeque* d) {
409 return d->size;
410 }
411
412 /* Returns the first element of current circular deque. Note that this function
413 * doesn't perform any bounds checking. If you need bounds checking, use
414 * `cdeque_front()` function instead. */
cdeque_front_fast(struct cdeque * d,void ** value)415 static void cdeque_front_fast(struct cdeque* d, void** value) {
416 *value = (void*) d->arr[d->beg_pos];
417 }
418
419 /* Returns the first element of current circular deque. This function
420 * performs bounds checking. */
cdeque_front(struct cdeque * d,void ** value)421 static int cdeque_front(struct cdeque* d, void** value) {
422 if(d->size > 0) {
423 cdeque_front_fast(d, value);
424 return CDE_OK;
425 } else
426 return CDE_OUT_OF_BOUNDS;
427 }
428
429 /* Pushes a new element into the end of this circular deque object. If current
430 * size will exceed capacity, the oldest element will be overwritten. */
cdeque_push_back(struct cdeque * d,void * item)431 static int cdeque_push_back(struct cdeque* d, void* item) {
432 if(d == NULL)
433 return CDE_PARAM;
434
435 if(d->size == d->cap_mask + 1)
436 return CDE_OUT_OF_BOUNDS;
437
438 d->arr[d->end_pos] = (size_t) item;
439 d->end_pos = (d->end_pos + 1) & d->cap_mask;
440 d->size++;
441
442 return CDE_OK;
443 }
444
445 /* Pops a front element of this circular deque object and returns its value.
446 * This function doesn't perform any bounds checking. */
cdeque_pop_front_fast(struct cdeque * d,void ** value)447 static void cdeque_pop_front_fast(struct cdeque* d, void** value) {
448 *value = (void*) d->arr[d->beg_pos];
449 d->beg_pos = (d->beg_pos + 1) & d->cap_mask;
450 d->size--;
451 }
452
453 /* Pops a front element of this circular deque object and returns its value.
454 * This function performs bounds checking. */
cdeque_pop_front(struct cdeque * d,void ** value)455 static int cdeque_pop_front(struct cdeque* d, void** value) {
456 if(!d || !value)
457 return CDE_PARAM;
458
459 if(d->size == 0)
460 return CDE_OUT_OF_BOUNDS;
461
462 cdeque_pop_front_fast(d, value);
463 return CDE_OK;
464 }
465
466 /* Convenience function to cast filter_info** to void **. */
cdeque_filter_p(struct filter_info ** f)467 static void** cdeque_filter_p(struct filter_info** f) {
468 return (void**) (size_t) f;
469 }
470
471 /* Convenience function to cast filter_info* to void *. */
cdeque_filter(struct filter_info * f)472 static void* cdeque_filter(struct filter_info* f) {
473 return (void**) (size_t) f;
474 }
475
476 /* Destroys this circular deque object. Deallocates the memory of the
477 * collection buffer, but doesn't deallocate the memory of any pointer passed
478 * to this deque as a value. */
cdeque_free(struct cdeque * d)479 static void cdeque_free(struct cdeque* d) {
480 if(!d)
481 return;
482
483 if(!d->arr)
484 return;
485
486 free(d->arr);
487
488 d->arr = NULL;
489 d->beg_pos = -1;
490 d->end_pos = -1;
491 d->cap_mask = 0;
492 }
493
494 static inline
bf_bit_size(const struct compressed_block_header * hdr)495 uint8_t bf_bit_size(const struct compressed_block_header* hdr) {
496 return hdr->block_flags_u8 & 7;
497 }
498
499 static inline
bf_byte_count(const struct compressed_block_header * hdr)500 uint8_t bf_byte_count(const struct compressed_block_header* hdr) {
501 return (hdr->block_flags_u8 >> 3) & 7;
502 }
503
504 static inline
bf_is_table_present(const struct compressed_block_header * hdr)505 uint8_t bf_is_table_present(const struct compressed_block_header* hdr) {
506 return (hdr->block_flags_u8 >> 7) & 1;
507 }
508
509 static inline
bf_is_last_block(const struct compressed_block_header * hdr)510 uint8_t bf_is_last_block(const struct compressed_block_header* hdr) {
511 return (hdr->block_flags_u8 >> 6) & 1;
512 }
513
get_context(struct archive_read * a)514 static inline struct rar5* get_context(struct archive_read* a) {
515 return (struct rar5*) a->format->data;
516 }
517
518 /* Convenience functions used by filter implementations. */
circular_memcpy(uint8_t * dst,uint8_t * window,const ssize_t mask,int64_t start,int64_t end)519 static void circular_memcpy(uint8_t* dst, uint8_t* window, const ssize_t mask,
520 int64_t start, int64_t end)
521 {
522 if((start & mask) > (end & mask)) {
523 ssize_t len1 = mask + 1 - (start & mask);
524 ssize_t len2 = end & mask;
525
526 memcpy(dst, &window[start & mask], len1);
527 memcpy(dst + len1, window, len2);
528 } else {
529 memcpy(dst, &window[start & mask], (size_t) (end - start));
530 }
531 }
532
read_filter_data(struct rar5 * rar,uint32_t offset)533 static uint32_t read_filter_data(struct rar5* rar, uint32_t offset) {
534 uint8_t linear_buf[4];
535 circular_memcpy(linear_buf, rar->cstate.window_buf,
536 rar->cstate.window_mask, offset, offset + 4);
537 return archive_le32dec(linear_buf);
538 }
539
write_filter_data(struct rar5 * rar,uint32_t offset,uint32_t value)540 static void write_filter_data(struct rar5* rar, uint32_t offset,
541 uint32_t value)
542 {
543 archive_le32enc(&rar->cstate.filtered_buf[offset], value);
544 }
545
546 /* Allocates a new filter descriptor and adds it to the filter array. */
add_new_filter(struct rar5 * rar)547 static struct filter_info* add_new_filter(struct rar5* rar) {
548 struct filter_info* f = calloc(1, sizeof(*f));
549
550 if(!f) {
551 return NULL;
552 }
553
554 cdeque_push_back(&rar->cstate.filters, cdeque_filter(f));
555 return f;
556 }
557
run_delta_filter(struct rar5 * rar,struct filter_info * flt)558 static int run_delta_filter(struct rar5* rar, struct filter_info* flt) {
559 int i;
560 ssize_t dest_pos, src_pos = 0;
561
562 for(i = 0; i < flt->channels; i++) {
563 uint8_t prev_byte = 0;
564 for(dest_pos = i;
565 dest_pos < flt->block_length;
566 dest_pos += flt->channels)
567 {
568 uint8_t byte;
569
570 byte = rar->cstate.window_buf[
571 (rar->cstate.solid_offset + flt->block_start +
572 src_pos) & rar->cstate.window_mask];
573
574 prev_byte -= byte;
575 rar->cstate.filtered_buf[dest_pos] = prev_byte;
576 src_pos++;
577 }
578 }
579
580 return ARCHIVE_OK;
581 }
582
run_e8e9_filter(struct rar5 * rar,struct filter_info * flt,int extended)583 static int run_e8e9_filter(struct rar5* rar, struct filter_info* flt,
584 int extended)
585 {
586 const uint32_t file_size = 0x1000000;
587 ssize_t i;
588
589 circular_memcpy(rar->cstate.filtered_buf,
590 rar->cstate.window_buf, rar->cstate.window_mask,
591 rar->cstate.solid_offset + flt->block_start,
592 rar->cstate.solid_offset + flt->block_start + flt->block_length);
593
594 for(i = 0; i < flt->block_length - 4;) {
595 uint8_t b = rar->cstate.window_buf[
596 (rar->cstate.solid_offset + flt->block_start +
597 i++) & rar->cstate.window_mask];
598
599 /*
600 * 0xE8 = x86's call <relative_addr_uint32> (function call)
601 * 0xE9 = x86's jmp <relative_addr_uint32> (unconditional jump)
602 */
603 if(b == 0xE8 || (extended && b == 0xE9)) {
604
605 uint32_t addr;
606 uint32_t offset = (i + flt->block_start) % file_size;
607
608 addr = read_filter_data(rar,
609 (uint32_t)(rar->cstate.solid_offset +
610 flt->block_start + i) & rar->cstate.window_mask);
611
612 if(addr & 0x80000000) {
613 if(((addr + offset) & 0x80000000) == 0) {
614 write_filter_data(rar, (uint32_t)i,
615 addr + file_size);
616 }
617 } else {
618 if((addr - file_size) & 0x80000000) {
619 uint32_t naddr = addr - offset;
620 write_filter_data(rar, (uint32_t)i,
621 naddr);
622 }
623 }
624
625 i += 4;
626 }
627 }
628
629 return ARCHIVE_OK;
630 }
631
run_arm_filter(struct rar5 * rar,struct filter_info * flt)632 static int run_arm_filter(struct rar5* rar, struct filter_info* flt) {
633 ssize_t i = 0;
634 uint32_t offset;
635
636 circular_memcpy(rar->cstate.filtered_buf,
637 rar->cstate.window_buf, rar->cstate.window_mask,
638 rar->cstate.solid_offset + flt->block_start,
639 rar->cstate.solid_offset + flt->block_start + flt->block_length);
640
641 for(i = 0; i < flt->block_length - 3; i += 4) {
642 uint8_t* b = &rar->cstate.window_buf[
643 (rar->cstate.solid_offset +
644 flt->block_start + i + 3) & rar->cstate.window_mask];
645
646 if(*b == 0xEB) {
647 /* 0xEB = ARM's BL (branch + link) instruction. */
648 offset = read_filter_data(rar,
649 (rar->cstate.solid_offset + flt->block_start + i) &
650 (uint32_t)rar->cstate.window_mask) & 0x00ffffff;
651
652 offset -= (uint32_t) ((i + flt->block_start) / 4);
653 offset = (offset & 0x00ffffff) | 0xeb000000;
654 write_filter_data(rar, (uint32_t)i, offset);
655 }
656 }
657
658 return ARCHIVE_OK;
659 }
660
run_filter(struct archive_read * a,struct filter_info * flt)661 static int run_filter(struct archive_read* a, struct filter_info* flt) {
662 int ret;
663 struct rar5* rar = get_context(a);
664
665 clear_data_ready_stack(rar);
666 free(rar->cstate.filtered_buf);
667
668 rar->cstate.filtered_buf = malloc(flt->block_length);
669 if(!rar->cstate.filtered_buf) {
670 archive_set_error(&a->archive, ENOMEM,
671 "Can't allocate memory for filter data.");
672 return ARCHIVE_FATAL;
673 }
674
675 switch(flt->type) {
676 case FILTER_DELTA:
677 ret = run_delta_filter(rar, flt);
678 break;
679
680 case FILTER_E8:
681 /* fallthrough */
682 case FILTER_E8E9:
683 ret = run_e8e9_filter(rar, flt,
684 flt->type == FILTER_E8E9);
685 break;
686
687 case FILTER_ARM:
688 ret = run_arm_filter(rar, flt);
689 break;
690
691 default:
692 archive_set_error(&a->archive,
693 ARCHIVE_ERRNO_FILE_FORMAT,
694 "Unsupported filter type: 0x%x", flt->type);
695 return ARCHIVE_FATAL;
696 }
697
698 if(ret != ARCHIVE_OK) {
699 /* Filter has failed. */
700 return ret;
701 }
702
703 if(ARCHIVE_OK != push_data_ready(a, rar, rar->cstate.filtered_buf,
704 flt->block_length, rar->cstate.last_write_ptr))
705 {
706 archive_set_error(&a->archive, ARCHIVE_ERRNO_PROGRAMMER,
707 "Stack overflow when submitting unpacked data");
708
709 return ARCHIVE_FATAL;
710 }
711
712 rar->cstate.last_write_ptr += flt->block_length;
713 return ARCHIVE_OK;
714 }
715
716 /* The `push_data` function submits the selected data range to the user.
717 * Next call of `use_data` will use the pointer, size and offset arguments
718 * that are specified here. These arguments are pushed to the FIFO stack here,
719 * and popped from the stack by the `use_data` function. */
push_data(struct archive_read * a,struct rar5 * rar,const uint8_t * buf,int64_t idx_begin,int64_t idx_end)720 static void push_data(struct archive_read* a, struct rar5* rar,
721 const uint8_t* buf, int64_t idx_begin, int64_t idx_end)
722 {
723 const ssize_t wmask = rar->cstate.window_mask;
724 const ssize_t solid_write_ptr = (rar->cstate.solid_offset +
725 rar->cstate.last_write_ptr) & wmask;
726
727 idx_begin += rar->cstate.solid_offset;
728 idx_end += rar->cstate.solid_offset;
729
730 /* Check if our unpacked data is wrapped inside the window circular
731 * buffer. If it's not wrapped, it can be copied out by using
732 * a single memcpy, but when it's wrapped, we need to copy the first
733 * part with one memcpy, and the second part with another memcpy. */
734
735 if((idx_begin & wmask) > (idx_end & wmask)) {
736 /* The data is wrapped (begin offset sis bigger than end
737 * offset). */
738 const ssize_t frag1_size = rar->cstate.window_size -
739 (idx_begin & wmask);
740 const ssize_t frag2_size = idx_end & wmask;
741
742 /* Copy the first part of the buffer first. */
743 push_data_ready(a, rar, buf + solid_write_ptr, frag1_size,
744 rar->cstate.last_write_ptr);
745
746 /* Copy the second part of the buffer. */
747 push_data_ready(a, rar, buf, frag2_size,
748 rar->cstate.last_write_ptr + frag1_size);
749
750 rar->cstate.last_write_ptr += frag1_size + frag2_size;
751 } else {
752 /* Data is not wrapped, so we can just use one call to copy the
753 * data. */
754 push_data_ready(a, rar,
755 buf + solid_write_ptr, (idx_end - idx_begin) & wmask,
756 rar->cstate.last_write_ptr);
757
758 rar->cstate.last_write_ptr += idx_end - idx_begin;
759 }
760 }
761
762 /* Convenience function that submits the data to the user. It uses the
763 * unpack window buffer as a source location. */
push_window_data(struct archive_read * a,struct rar5 * rar,int64_t idx_begin,int64_t idx_end)764 static void push_window_data(struct archive_read* a, struct rar5* rar,
765 int64_t idx_begin, int64_t idx_end)
766 {
767 push_data(a, rar, rar->cstate.window_buf, idx_begin, idx_end);
768 }
769
apply_filters(struct archive_read * a)770 static int apply_filters(struct archive_read* a) {
771 struct filter_info* flt;
772 struct rar5* rar = get_context(a);
773 int ret;
774
775 rar->cstate.all_filters_applied = 0;
776
777 /* Get the first filter that can be applied to our data. The data
778 * needs to be fully unpacked before the filter can be run. */
779 if(CDE_OK == cdeque_front(&rar->cstate.filters,
780 cdeque_filter_p(&flt))) {
781 /* Check if our unpacked data fully covers this filter's
782 * range. */
783 if(rar->cstate.write_ptr > flt->block_start &&
784 rar->cstate.write_ptr >= flt->block_start +
785 flt->block_length) {
786 /* Check if we have some data pending to be written
787 * right before the filter's start offset. */
788 if(rar->cstate.last_write_ptr == flt->block_start) {
789 /* Run the filter specified by descriptor
790 * `flt`. */
791 ret = run_filter(a, flt);
792 if(ret != ARCHIVE_OK) {
793 /* Filter failure, return error. */
794 return ret;
795 }
796
797 /* Filter descriptor won't be needed anymore
798 * after it's used, * so remove it from the
799 * filter list and free its memory. */
800 (void) cdeque_pop_front(&rar->cstate.filters,
801 cdeque_filter_p(&flt));
802
803 free(flt);
804 } else {
805 /* We can't run filters yet, dump the memory
806 * right before the filter. */
807 push_window_data(a, rar,
808 rar->cstate.last_write_ptr,
809 flt->block_start);
810 }
811
812 /* Return 'filter applied or not needed' state to the
813 * caller. */
814 return ARCHIVE_RETRY;
815 }
816 }
817
818 rar->cstate.all_filters_applied = 1;
819 return ARCHIVE_OK;
820 }
821
dist_cache_push(struct rar5 * rar,int value)822 static void dist_cache_push(struct rar5* rar, int value) {
823 int* q = rar->cstate.dist_cache;
824
825 q[3] = q[2];
826 q[2] = q[1];
827 q[1] = q[0];
828 q[0] = value;
829 }
830
dist_cache_touch(struct rar5 * rar,int idx)831 static int dist_cache_touch(struct rar5* rar, int idx) {
832 int* q = rar->cstate.dist_cache;
833 int i, dist = q[idx];
834
835 for(i = idx; i > 0; i--)
836 q[i] = q[i - 1];
837
838 q[0] = dist;
839 return dist;
840 }
841
free_filters(struct rar5 * rar)842 static void free_filters(struct rar5* rar) {
843 struct cdeque* d = &rar->cstate.filters;
844
845 /* Free any remaining filters. All filters should be naturally
846 * consumed by the unpacking function, so remaining filters after
847 * unpacking normally mean that unpacking wasn't successful.
848 * But still of course we shouldn't leak memory in such case. */
849
850 /* cdeque_size() is a fast operation, so we can use it as a loop
851 * expression. */
852 while(cdeque_size(d) > 0) {
853 struct filter_info* f = NULL;
854
855 /* Pop_front will also decrease the collection's size. */
856 if (CDE_OK == cdeque_pop_front(d, cdeque_filter_p(&f)))
857 free(f);
858 }
859
860 cdeque_clear(d);
861
862 /* Also clear out the variables needed for sanity checking. */
863 rar->cstate.last_block_start = 0;
864 rar->cstate.last_block_length = 0;
865 }
866
reset_file_context(struct rar5 * rar)867 static void reset_file_context(struct rar5* rar) {
868 memset(&rar->file, 0, sizeof(rar->file));
869 blake2sp_init(&rar->file.b2state, 32);
870
871 if(rar->main.solid) {
872 rar->cstate.solid_offset += rar->cstate.write_ptr;
873 } else {
874 rar->cstate.solid_offset = 0;
875 }
876
877 rar->cstate.write_ptr = 0;
878 rar->cstate.last_write_ptr = 0;
879 rar->cstate.last_unstore_ptr = 0;
880
881 rar->file.redir_type = REDIR_TYPE_NONE;
882 rar->file.redir_flags = 0;
883
884 free_filters(rar);
885 }
886
get_archive_read(struct archive * a,struct archive_read ** ar)887 static inline int get_archive_read(struct archive* a,
888 struct archive_read** ar)
889 {
890 *ar = (struct archive_read*) a;
891 archive_check_magic(a, ARCHIVE_READ_MAGIC, ARCHIVE_STATE_NEW,
892 "archive_read_support_format_rar5");
893
894 return ARCHIVE_OK;
895 }
896
read_ahead(struct archive_read * a,size_t how_many,const uint8_t ** ptr)897 static int read_ahead(struct archive_read* a, size_t how_many,
898 const uint8_t** ptr)
899 {
900 ssize_t avail = -1;
901 if(!ptr)
902 return 0;
903
904 *ptr = __archive_read_ahead(a, how_many, &avail);
905 if(*ptr == NULL) {
906 return 0;
907 }
908
909 return 1;
910 }
911
consume(struct archive_read * a,int64_t how_many)912 static int consume(struct archive_read* a, int64_t how_many) {
913 int ret;
914
915 ret = how_many == __archive_read_consume(a, how_many)
916 ? ARCHIVE_OK
917 : ARCHIVE_FATAL;
918
919 return ret;
920 }
921
922 /**
923 * Read a RAR5 variable sized numeric value. This value will be stored in
924 * `pvalue`. The `pvalue_len` argument points to a variable that will receive
925 * the byte count that was consumed in order to decode the `pvalue` value, plus
926 * one.
927 *
928 * pvalue_len is optional and can be NULL.
929 *
930 * NOTE: if `pvalue_len` is NOT NULL, the caller needs to manually consume
931 * the number of bytes that `pvalue_len` value contains. If the `pvalue_len`
932 * is NULL, this consuming operation is done automatically.
933 *
934 * Returns 1 if *pvalue was successfully read.
935 * Returns 0 if there was an error. In this case, *pvalue contains an
936 * invalid value.
937 */
938
read_var(struct archive_read * a,uint64_t * pvalue,uint64_t * pvalue_len)939 static int read_var(struct archive_read* a, uint64_t* pvalue,
940 uint64_t* pvalue_len)
941 {
942 uint64_t result = 0;
943 size_t shift, i;
944 const uint8_t* p;
945 uint8_t b;
946
947 /* We will read maximum of 8 bytes. We don't have to handle the
948 * situation to read the RAR5 variable-sized value stored at the end of
949 * the file, because such situation will never happen. */
950 if(!read_ahead(a, 8, &p))
951 return 0;
952
953 for(shift = 0, i = 0; i < 8; i++, shift += 7) {
954 b = p[i];
955
956 /* Strip the MSB from the input byte and add the resulting
957 * number to the `result`. */
958 result += (b & (uint64_t)0x7F) << shift;
959
960 /* MSB set to 1 means we need to continue decoding process.
961 * MSB set to 0 means we're done.
962 *
963 * This conditional checks for the second case. */
964 if((b & 0x80) == 0) {
965 if(pvalue) {
966 *pvalue = result;
967 }
968
969 /* If the caller has passed the `pvalue_len` pointer,
970 * store the number of consumed bytes in it and do NOT
971 * consume those bytes, since the caller has all the
972 * information it needs to perform */
973 if(pvalue_len) {
974 *pvalue_len = 1 + i;
975 } else {
976 /* If the caller did not provide the
977 * `pvalue_len` pointer, it will not have the
978 * possibility to advance the file pointer,
979 * because it will not know how many bytes it
980 * needs to consume. This is why we handle
981 * such situation here automatically. */
982 if(ARCHIVE_OK != consume(a, 1 + i)) {
983 return 0;
984 }
985 }
986
987 /* End of decoding process, return success. */
988 return 1;
989 }
990 }
991
992 /* The decoded value takes the maximum number of 8 bytes.
993 * It's a maximum number of bytes, so end decoding process here
994 * even if the first bit of last byte is 1. */
995 if(pvalue) {
996 *pvalue = result;
997 }
998
999 if(pvalue_len) {
1000 *pvalue_len = 9;
1001 } else {
1002 if(ARCHIVE_OK != consume(a, 9)) {
1003 return 0;
1004 }
1005 }
1006
1007 return 1;
1008 }
1009
read_var_sized(struct archive_read * a,size_t * pvalue,size_t * pvalue_len)1010 static int read_var_sized(struct archive_read* a, size_t* pvalue,
1011 size_t* pvalue_len)
1012 {
1013 uint64_t v;
1014 uint64_t v_size = 0;
1015
1016 const int ret = pvalue_len ? read_var(a, &v, &v_size)
1017 : read_var(a, &v, NULL);
1018
1019 if(ret == 1 && pvalue) {
1020 *pvalue = (size_t) v;
1021 }
1022
1023 if(pvalue_len) {
1024 /* Possible data truncation should be safe. */
1025 *pvalue_len = (size_t) v_size;
1026 }
1027
1028 return ret;
1029 }
1030
read_bits_32(struct archive_read * a,struct rar5 * rar,const uint8_t * p,uint32_t * value)1031 static int read_bits_32(struct archive_read* a, struct rar5* rar,
1032 const uint8_t* p, uint32_t* value)
1033 {
1034 if(rar->bits.in_addr >= rar->cstate.cur_block_size) {
1035 archive_set_error(&a->archive,
1036 ARCHIVE_ERRNO_PROGRAMMER,
1037 "Premature end of stream during extraction of data (#1)");
1038 return ARCHIVE_FATAL;
1039 }
1040
1041 uint32_t bits = ((uint32_t) p[rar->bits.in_addr]) << 24;
1042 bits |= p[rar->bits.in_addr + 1] << 16;
1043 bits |= p[rar->bits.in_addr + 2] << 8;
1044 bits |= p[rar->bits.in_addr + 3];
1045 bits <<= rar->bits.bit_addr;
1046 bits |= p[rar->bits.in_addr + 4] >> (8 - rar->bits.bit_addr);
1047 *value = bits;
1048 return ARCHIVE_OK;
1049 }
1050
read_bits_16(struct archive_read * a,struct rar5 * rar,const uint8_t * p,uint16_t * value)1051 static int read_bits_16(struct archive_read* a, struct rar5* rar,
1052 const uint8_t* p, uint16_t* value)
1053 {
1054 if(rar->bits.in_addr >= rar->cstate.cur_block_size) {
1055 archive_set_error(&a->archive,
1056 ARCHIVE_ERRNO_PROGRAMMER,
1057 "Premature end of stream during extraction of data (#2)");
1058 return ARCHIVE_FATAL;
1059 }
1060
1061 int bits = (int) ((uint32_t) p[rar->bits.in_addr]) << 16;
1062 bits |= (int) p[rar->bits.in_addr + 1] << 8;
1063 bits |= (int) p[rar->bits.in_addr + 2];
1064 bits >>= (8 - rar->bits.bit_addr);
1065 *value = bits & 0xffff;
1066 return ARCHIVE_OK;
1067 }
1068
skip_bits(struct rar5 * rar,int bits)1069 static void skip_bits(struct rar5* rar, int bits) {
1070 const int new_bits = rar->bits.bit_addr + bits;
1071 rar->bits.in_addr += new_bits >> 3;
1072 rar->bits.bit_addr = new_bits & 7;
1073 }
1074
1075 /* n = up to 16 */
read_consume_bits(struct archive_read * a,struct rar5 * rar,const uint8_t * p,int n,int * value)1076 static int read_consume_bits(struct archive_read* a, struct rar5* rar,
1077 const uint8_t* p, int n, int* value)
1078 {
1079 uint16_t v;
1080 int ret, num;
1081
1082 if(n == 0 || n > 16) {
1083 /* This is a programmer error and should never happen
1084 * in runtime. */
1085 return ARCHIVE_FATAL;
1086 }
1087
1088 ret = read_bits_16(a, rar, p, &v);
1089 if(ret != ARCHIVE_OK)
1090 return ret;
1091
1092 num = (int) v;
1093 num >>= 16 - n;
1094
1095 skip_bits(rar, n);
1096
1097 if(value)
1098 *value = num;
1099
1100 return ARCHIVE_OK;
1101 }
1102
read_u32(struct archive_read * a,uint32_t * pvalue)1103 static int read_u32(struct archive_read* a, uint32_t* pvalue) {
1104 const uint8_t* p;
1105 if(!read_ahead(a, 4, &p))
1106 return 0;
1107
1108 *pvalue = archive_le32dec(p);
1109 return ARCHIVE_OK == consume(a, 4) ? 1 : 0;
1110 }
1111
read_u64(struct archive_read * a,uint64_t * pvalue)1112 static int read_u64(struct archive_read* a, uint64_t* pvalue) {
1113 const uint8_t* p;
1114 if(!read_ahead(a, 8, &p))
1115 return 0;
1116
1117 *pvalue = archive_le64dec(p);
1118 return ARCHIVE_OK == consume(a, 8) ? 1 : 0;
1119 }
1120
bid_standard(struct archive_read * a)1121 static int bid_standard(struct archive_read* a) {
1122 const uint8_t* p;
1123 char signature[sizeof(rar5_signature_xor)];
1124
1125 rar5_signature(signature);
1126
1127 if(!read_ahead(a, sizeof(rar5_signature_xor), &p))
1128 return -1;
1129
1130 if(!memcmp(signature, p, sizeof(rar5_signature_xor)))
1131 return 30;
1132
1133 return -1;
1134 }
1135
bid_sfx(struct archive_read * a)1136 static int bid_sfx(struct archive_read *a)
1137 {
1138 const char *p;
1139
1140 if ((p = __archive_read_ahead(a, 7, NULL)) == NULL)
1141 return -1;
1142
1143 if ((p[0] == 'M' && p[1] == 'Z') || memcmp(p, "\x7F\x45LF", 4) == 0) {
1144 /* This is a PE file */
1145 char signature[sizeof(rar5_signature_xor)];
1146 ssize_t offset = 0x10000;
1147 ssize_t window = 4096;
1148 ssize_t bytes_avail;
1149
1150 rar5_signature(signature);
1151
1152 while (offset + window <= (1024 * 512)) {
1153 const char *buff = __archive_read_ahead(a, offset + window, &bytes_avail);
1154 if (buff == NULL) {
1155 /* Remaining bytes are less than window. */
1156 window >>= 1;
1157 if (window < 0x40)
1158 return 0;
1159 continue;
1160 }
1161 p = buff + offset;
1162 while (p + 8 < buff + bytes_avail) {
1163 if (memcmp(p, signature, sizeof(signature)) == 0)
1164 return 30;
1165 p += 0x10;
1166 }
1167 offset = p - buff;
1168 }
1169 }
1170
1171 return 0;
1172 }
1173
rar5_bid(struct archive_read * a,int best_bid)1174 static int rar5_bid(struct archive_read* a, int best_bid) {
1175 int my_bid;
1176
1177 if(best_bid > 30)
1178 return -1;
1179
1180 my_bid = bid_standard(a);
1181 if(my_bid > -1) {
1182 return my_bid;
1183 }
1184 my_bid = bid_sfx(a);
1185 if (my_bid > -1) {
1186 return my_bid;
1187 }
1188
1189 return -1;
1190 }
1191
rar5_options(struct archive_read * a,const char * key,const char * val)1192 static int rar5_options(struct archive_read *a, const char *key,
1193 const char *val) {
1194 (void) a;
1195 (void) key;
1196 (void) val;
1197
1198 /* No options supported in this version. Return the ARCHIVE_WARN code
1199 * to signal the options supervisor that the unpacker didn't handle
1200 * setting this option. */
1201
1202 return ARCHIVE_WARN;
1203 }
1204
init_header(struct archive_read * a)1205 static void init_header(struct archive_read* a) {
1206 a->archive.archive_format = ARCHIVE_FORMAT_RAR_V5;
1207 a->archive.archive_format_name = "RAR5";
1208 }
1209
init_window_mask(struct rar5 * rar)1210 static void init_window_mask(struct rar5* rar) {
1211 if (rar->cstate.window_size)
1212 rar->cstate.window_mask = rar->cstate.window_size - 1;
1213 else
1214 rar->cstate.window_mask = 0;
1215 }
1216
1217 enum HEADER_FLAGS {
1218 HFL_EXTRA_DATA = 0x0001,
1219 HFL_DATA = 0x0002,
1220 HFL_SKIP_IF_UNKNOWN = 0x0004,
1221 HFL_SPLIT_BEFORE = 0x0008,
1222 HFL_SPLIT_AFTER = 0x0010,
1223 HFL_CHILD = 0x0020,
1224 HFL_INHERITED = 0x0040
1225 };
1226
process_main_locator_extra_block(struct archive_read * a,struct rar5 * rar)1227 static int process_main_locator_extra_block(struct archive_read* a,
1228 struct rar5* rar)
1229 {
1230 uint64_t locator_flags;
1231
1232 enum LOCATOR_FLAGS {
1233 QLIST = 0x01, RECOVERY = 0x02,
1234 };
1235
1236 if(!read_var(a, &locator_flags, NULL)) {
1237 return ARCHIVE_EOF;
1238 }
1239
1240 if(locator_flags & QLIST) {
1241 if(!read_var(a, &rar->qlist_offset, NULL)) {
1242 return ARCHIVE_EOF;
1243 }
1244
1245 /* qlist is not used */
1246 }
1247
1248 if(locator_flags & RECOVERY) {
1249 if(!read_var(a, &rar->rr_offset, NULL)) {
1250 return ARCHIVE_EOF;
1251 }
1252
1253 /* rr is not used */
1254 }
1255
1256 return ARCHIVE_OK;
1257 }
1258
parse_file_extra_hash(struct archive_read * a,struct rar5 * rar,int64_t * extra_data_size)1259 static int parse_file_extra_hash(struct archive_read* a, struct rar5* rar,
1260 int64_t* extra_data_size)
1261 {
1262 size_t hash_type = 0;
1263 size_t value_len;
1264
1265 enum HASH_TYPE {
1266 BLAKE2sp = 0x00
1267 };
1268
1269 if(!read_var_sized(a, &hash_type, &value_len))
1270 return ARCHIVE_EOF;
1271
1272 *extra_data_size -= value_len;
1273 if(ARCHIVE_OK != consume(a, value_len)) {
1274 return ARCHIVE_EOF;
1275 }
1276
1277 /* The file uses BLAKE2sp checksum algorithm instead of plain old
1278 * CRC32. */
1279 if(hash_type == BLAKE2sp) {
1280 const uint8_t* p;
1281 const int hash_size = sizeof(rar->file.blake2sp);
1282
1283 if(!read_ahead(a, hash_size, &p))
1284 return ARCHIVE_EOF;
1285
1286 rar->file.has_blake2 = 1;
1287 memcpy(&rar->file.blake2sp, p, hash_size);
1288
1289 if(ARCHIVE_OK != consume(a, hash_size)) {
1290 return ARCHIVE_EOF;
1291 }
1292
1293 *extra_data_size -= hash_size;
1294 } else {
1295 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
1296 "Unsupported hash type (0x%x)", (int) hash_type);
1297 return ARCHIVE_FATAL;
1298 }
1299
1300 return ARCHIVE_OK;
1301 }
1302
time_win_to_unix(uint64_t win_time)1303 static uint64_t time_win_to_unix(uint64_t win_time) {
1304 const size_t ns_in_sec = 10000000;
1305 const uint64_t sec_to_unix = 11644473600LL;
1306 return win_time / ns_in_sec - sec_to_unix;
1307 }
1308
parse_htime_item(struct archive_read * a,char unix_time,uint64_t * where,int64_t * extra_data_size)1309 static int parse_htime_item(struct archive_read* a, char unix_time,
1310 uint64_t* where, int64_t* extra_data_size)
1311 {
1312 if(unix_time) {
1313 uint32_t time_val;
1314 if(!read_u32(a, &time_val))
1315 return ARCHIVE_EOF;
1316
1317 *extra_data_size -= 4;
1318 *where = (uint64_t) time_val;
1319 } else {
1320 uint64_t windows_time;
1321 if(!read_u64(a, &windows_time))
1322 return ARCHIVE_EOF;
1323
1324 *where = time_win_to_unix(windows_time);
1325 *extra_data_size -= 8;
1326 }
1327
1328 return ARCHIVE_OK;
1329 }
1330
parse_file_extra_version(struct archive_read * a,struct archive_entry * e,int64_t * extra_data_size)1331 static int parse_file_extra_version(struct archive_read* a,
1332 struct archive_entry* e, int64_t* extra_data_size)
1333 {
1334 size_t flags = 0;
1335 size_t version = 0;
1336 size_t value_len = 0;
1337 struct archive_string version_string;
1338 struct archive_string name_utf8_string;
1339 const char* cur_filename;
1340
1341 /* Flags are ignored. */
1342 if(!read_var_sized(a, &flags, &value_len))
1343 return ARCHIVE_EOF;
1344
1345 *extra_data_size -= value_len;
1346 if(ARCHIVE_OK != consume(a, value_len))
1347 return ARCHIVE_EOF;
1348
1349 if(!read_var_sized(a, &version, &value_len))
1350 return ARCHIVE_EOF;
1351
1352 *extra_data_size -= value_len;
1353 if(ARCHIVE_OK != consume(a, value_len))
1354 return ARCHIVE_EOF;
1355
1356 /* extra_data_size should be zero here. */
1357
1358 cur_filename = archive_entry_pathname_utf8(e);
1359 if(cur_filename == NULL) {
1360 archive_set_error(&a->archive, ARCHIVE_ERRNO_PROGRAMMER,
1361 "Version entry without file name");
1362 return ARCHIVE_FATAL;
1363 }
1364
1365 archive_string_init(&version_string);
1366 archive_string_init(&name_utf8_string);
1367
1368 /* Prepare a ;123 suffix for the filename, where '123' is the version
1369 * value of this file. */
1370 archive_string_sprintf(&version_string, ";%zu", version);
1371
1372 /* Build the new filename. */
1373 archive_strcat(&name_utf8_string, cur_filename);
1374 archive_strcat(&name_utf8_string, version_string.s);
1375
1376 /* Apply the new filename into this file's context. */
1377 archive_entry_update_pathname_utf8(e, name_utf8_string.s);
1378
1379 /* Free buffers. */
1380 archive_string_free(&version_string);
1381 archive_string_free(&name_utf8_string);
1382 return ARCHIVE_OK;
1383 }
1384
parse_file_extra_htime(struct archive_read * a,struct archive_entry * e,struct rar5 * rar,int64_t * extra_data_size)1385 static int parse_file_extra_htime(struct archive_read* a,
1386 struct archive_entry* e, struct rar5* rar, int64_t* extra_data_size)
1387 {
1388 char unix_time = 0;
1389 size_t flags = 0;
1390 size_t value_len;
1391
1392 enum HTIME_FLAGS {
1393 IS_UNIX = 0x01,
1394 HAS_MTIME = 0x02,
1395 HAS_CTIME = 0x04,
1396 HAS_ATIME = 0x08,
1397 HAS_UNIX_NS = 0x10,
1398 };
1399
1400 if(!read_var_sized(a, &flags, &value_len))
1401 return ARCHIVE_EOF;
1402
1403 *extra_data_size -= value_len;
1404 if(ARCHIVE_OK != consume(a, value_len)) {
1405 return ARCHIVE_EOF;
1406 }
1407
1408 unix_time = flags & IS_UNIX;
1409
1410 if(flags & HAS_MTIME) {
1411 parse_htime_item(a, unix_time, &rar->file.e_mtime,
1412 extra_data_size);
1413 archive_entry_set_mtime(e, rar->file.e_mtime, 0);
1414 }
1415
1416 if(flags & HAS_CTIME) {
1417 parse_htime_item(a, unix_time, &rar->file.e_ctime,
1418 extra_data_size);
1419 archive_entry_set_ctime(e, rar->file.e_ctime, 0);
1420 }
1421
1422 if(flags & HAS_ATIME) {
1423 parse_htime_item(a, unix_time, &rar->file.e_atime,
1424 extra_data_size);
1425 archive_entry_set_atime(e, rar->file.e_atime, 0);
1426 }
1427
1428 if(flags & HAS_UNIX_NS) {
1429 if(!read_u32(a, &rar->file.e_unix_ns))
1430 return ARCHIVE_EOF;
1431
1432 *extra_data_size -= 4;
1433 }
1434
1435 return ARCHIVE_OK;
1436 }
1437
parse_file_extra_redir(struct archive_read * a,struct archive_entry * e,struct rar5 * rar,int64_t * extra_data_size)1438 static int parse_file_extra_redir(struct archive_read* a,
1439 struct archive_entry* e, struct rar5* rar, int64_t* extra_data_size)
1440 {
1441 uint64_t value_size = 0;
1442 size_t target_size = 0;
1443 char target_utf8_buf[MAX_NAME_IN_BYTES];
1444 const uint8_t* p;
1445
1446 if(!read_var(a, &rar->file.redir_type, &value_size))
1447 return ARCHIVE_EOF;
1448 if(ARCHIVE_OK != consume(a, (int64_t)value_size))
1449 return ARCHIVE_EOF;
1450 *extra_data_size -= value_size;
1451
1452 if(!read_var(a, &rar->file.redir_flags, &value_size))
1453 return ARCHIVE_EOF;
1454 if(ARCHIVE_OK != consume(a, (int64_t)value_size))
1455 return ARCHIVE_EOF;
1456 *extra_data_size -= value_size;
1457
1458 if(!read_var_sized(a, &target_size, NULL))
1459 return ARCHIVE_EOF;
1460 *extra_data_size -= target_size + 1;
1461
1462 if(target_size > (MAX_NAME_IN_CHARS - 1)) {
1463 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
1464 "Link target is too long");
1465 return ARCHIVE_FATAL;
1466 }
1467
1468 if(target_size == 0) {
1469 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
1470 "No link target specified");
1471 return ARCHIVE_FATAL;
1472 }
1473
1474 if(!read_ahead(a, target_size, &p))
1475 return ARCHIVE_EOF;
1476
1477 memcpy(target_utf8_buf, p, target_size);
1478 target_utf8_buf[target_size] = 0;
1479
1480 if(ARCHIVE_OK != consume(a, (int64_t)target_size))
1481 return ARCHIVE_EOF;
1482
1483 switch(rar->file.redir_type) {
1484 case REDIR_TYPE_UNIXSYMLINK:
1485 case REDIR_TYPE_WINSYMLINK:
1486 archive_entry_set_filetype(e, AE_IFLNK);
1487 archive_entry_update_symlink_utf8(e, target_utf8_buf);
1488 if (rar->file.redir_flags & REDIR_SYMLINK_IS_DIR) {
1489 archive_entry_set_symlink_type(e,
1490 AE_SYMLINK_TYPE_DIRECTORY);
1491 } else {
1492 archive_entry_set_symlink_type(e,
1493 AE_SYMLINK_TYPE_FILE);
1494 }
1495 break;
1496
1497 case REDIR_TYPE_HARDLINK:
1498 archive_entry_set_filetype(e, AE_IFREG);
1499 archive_entry_update_hardlink_utf8(e, target_utf8_buf);
1500 break;
1501
1502 default:
1503 /* Unknown redir type, skip it. */
1504 break;
1505 }
1506 return ARCHIVE_OK;
1507 }
1508
parse_file_extra_owner(struct archive_read * a,struct archive_entry * e,int64_t * extra_data_size)1509 static int parse_file_extra_owner(struct archive_read* a,
1510 struct archive_entry* e, int64_t* extra_data_size)
1511 {
1512 uint64_t flags = 0;
1513 uint64_t value_size = 0;
1514 uint64_t id = 0;
1515 size_t name_len = 0;
1516 size_t name_size = 0;
1517 char namebuf[OWNER_MAXNAMELEN];
1518 const uint8_t* p;
1519
1520 if(!read_var(a, &flags, &value_size))
1521 return ARCHIVE_EOF;
1522 if(ARCHIVE_OK != consume(a, (int64_t)value_size))
1523 return ARCHIVE_EOF;
1524 *extra_data_size -= value_size;
1525
1526 if ((flags & OWNER_USER_NAME) != 0) {
1527 if(!read_var_sized(a, &name_size, NULL))
1528 return ARCHIVE_EOF;
1529 *extra_data_size -= name_size + 1;
1530
1531 if(!read_ahead(a, name_size, &p))
1532 return ARCHIVE_EOF;
1533
1534 if (name_size >= OWNER_MAXNAMELEN) {
1535 name_len = OWNER_MAXNAMELEN - 1;
1536 } else {
1537 name_len = name_size;
1538 }
1539
1540 memcpy(namebuf, p, name_len);
1541 namebuf[name_len] = 0;
1542 if(ARCHIVE_OK != consume(a, (int64_t)name_size))
1543 return ARCHIVE_EOF;
1544
1545 archive_entry_set_uname(e, namebuf);
1546 }
1547 if ((flags & OWNER_GROUP_NAME) != 0) {
1548 if(!read_var_sized(a, &name_size, NULL))
1549 return ARCHIVE_EOF;
1550 *extra_data_size -= name_size + 1;
1551
1552 if(!read_ahead(a, name_size, &p))
1553 return ARCHIVE_EOF;
1554
1555 if (name_size >= OWNER_MAXNAMELEN) {
1556 name_len = OWNER_MAXNAMELEN - 1;
1557 } else {
1558 name_len = name_size;
1559 }
1560
1561 memcpy(namebuf, p, name_len);
1562 namebuf[name_len] = 0;
1563 if(ARCHIVE_OK != consume(a, (int64_t)name_size))
1564 return ARCHIVE_EOF;
1565
1566 archive_entry_set_gname(e, namebuf);
1567 }
1568 if ((flags & OWNER_USER_UID) != 0) {
1569 if(!read_var(a, &id, &value_size))
1570 return ARCHIVE_EOF;
1571 if(ARCHIVE_OK != consume(a, (int64_t)value_size))
1572 return ARCHIVE_EOF;
1573 *extra_data_size -= value_size;
1574
1575 archive_entry_set_uid(e, (la_int64_t)id);
1576 }
1577 if ((flags & OWNER_GROUP_GID) != 0) {
1578 if(!read_var(a, &id, &value_size))
1579 return ARCHIVE_EOF;
1580 if(ARCHIVE_OK != consume(a, (int64_t)value_size))
1581 return ARCHIVE_EOF;
1582 *extra_data_size -= value_size;
1583
1584 archive_entry_set_gid(e, (la_int64_t)id);
1585 }
1586 return ARCHIVE_OK;
1587 }
1588
process_head_file_extra(struct archive_read * a,struct archive_entry * e,struct rar5 * rar,int64_t extra_data_size)1589 static int process_head_file_extra(struct archive_read* a,
1590 struct archive_entry* e, struct rar5* rar, int64_t extra_data_size)
1591 {
1592 uint64_t extra_field_size;
1593 uint64_t extra_field_id = 0;
1594 int ret = ARCHIVE_FATAL;
1595 uint64_t var_size;
1596
1597 while(extra_data_size > 0) {
1598 if(!read_var(a, &extra_field_size, &var_size))
1599 return ARCHIVE_EOF;
1600
1601 extra_data_size -= var_size;
1602 if(ARCHIVE_OK != consume(a, var_size)) {
1603 return ARCHIVE_EOF;
1604 }
1605
1606 if(!read_var(a, &extra_field_id, &var_size))
1607 return ARCHIVE_EOF;
1608
1609 extra_field_size -= var_size;
1610 extra_data_size -= var_size;
1611 if(ARCHIVE_OK != consume(a, var_size)) {
1612 return ARCHIVE_EOF;
1613 }
1614
1615 switch(extra_field_id) {
1616 case EX_HASH:
1617 ret = parse_file_extra_hash(a, rar,
1618 &extra_data_size);
1619 break;
1620 case EX_HTIME:
1621 ret = parse_file_extra_htime(a, e, rar,
1622 &extra_data_size);
1623 break;
1624 case EX_REDIR:
1625 ret = parse_file_extra_redir(a, e, rar,
1626 &extra_data_size);
1627 break;
1628 case EX_UOWNER:
1629 ret = parse_file_extra_owner(a, e,
1630 &extra_data_size);
1631 break;
1632 case EX_VERSION:
1633 ret = parse_file_extra_version(a, e,
1634 &extra_data_size);
1635 break;
1636 case EX_CRYPT:
1637 /* Mark the entry as encrypted */
1638 archive_entry_set_is_data_encrypted(e, 1);
1639 rar->has_encrypted_entries = 1;
1640 rar->cstate.data_encrypted = 1;
1641 /* fallthrough */
1642 case EX_SUBDATA:
1643 /* fallthrough */
1644 default:
1645 /* Skip unsupported entry. */
1646 extra_data_size -= extra_field_size;
1647 if (ARCHIVE_OK != consume(a, extra_field_size)) {
1648 return ARCHIVE_EOF;
1649 }
1650 }
1651 }
1652
1653 if(ret != ARCHIVE_OK) {
1654 /* Attribute not implemented. */
1655 return ret;
1656 }
1657
1658 return ARCHIVE_OK;
1659 }
1660
process_head_file(struct archive_read * a,struct rar5 * rar,struct archive_entry * entry,size_t block_flags)1661 static int process_head_file(struct archive_read* a, struct rar5* rar,
1662 struct archive_entry* entry, size_t block_flags)
1663 {
1664 int64_t extra_data_size = 0;
1665 size_t data_size = 0;
1666 size_t file_flags = 0;
1667 size_t file_attr = 0;
1668 size_t compression_info = 0;
1669 size_t host_os = 0;
1670 size_t name_size = 0;
1671 uint64_t unpacked_size, window_size;
1672 uint32_t mtime = 0, crc = 0;
1673 int c_method = 0, c_version = 0;
1674 char name_utf8_buf[MAX_NAME_IN_BYTES];
1675 const uint8_t* p;
1676
1677 enum FILE_FLAGS {
1678 DIRECTORY = 0x0001, UTIME = 0x0002, CRC32 = 0x0004,
1679 UNKNOWN_UNPACKED_SIZE = 0x0008,
1680 };
1681
1682 enum FILE_ATTRS {
1683 ATTR_READONLY = 0x1, ATTR_HIDDEN = 0x2, ATTR_SYSTEM = 0x4,
1684 ATTR_DIRECTORY = 0x10,
1685 };
1686
1687 enum COMP_INFO_FLAGS {
1688 SOLID = 0x0040,
1689 };
1690
1691 enum HOST_OS {
1692 HOST_WINDOWS = 0,
1693 HOST_UNIX = 1,
1694 };
1695
1696 archive_entry_clear(entry);
1697
1698 /* Do not reset file context if we're switching archives. */
1699 if(!rar->cstate.switch_multivolume) {
1700 reset_file_context(rar);
1701 }
1702
1703 if(block_flags & HFL_EXTRA_DATA) {
1704 uint64_t edata_size = 0;
1705 if(!read_var(a, &edata_size, NULL))
1706 return ARCHIVE_EOF;
1707
1708 /* Intentional type cast from unsigned to signed. */
1709 extra_data_size = (int64_t) edata_size;
1710 }
1711
1712 if(block_flags & HFL_DATA) {
1713 if(!read_var_sized(a, &data_size, NULL))
1714 return ARCHIVE_EOF;
1715
1716 rar->file.bytes_remaining = data_size;
1717 } else {
1718 rar->file.bytes_remaining = 0;
1719
1720 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
1721 "no data found in file/service block");
1722 return ARCHIVE_FATAL;
1723 }
1724
1725 if(!read_var_sized(a, &file_flags, NULL))
1726 return ARCHIVE_EOF;
1727
1728 if(!read_var(a, &unpacked_size, NULL))
1729 return ARCHIVE_EOF;
1730
1731 if(file_flags & UNKNOWN_UNPACKED_SIZE) {
1732 archive_set_error(&a->archive, ARCHIVE_ERRNO_PROGRAMMER,
1733 "Files with unknown unpacked size are not supported");
1734 return ARCHIVE_FATAL;
1735 }
1736
1737 rar->file.dir = (uint8_t) ((file_flags & DIRECTORY) > 0);
1738
1739 if(!read_var_sized(a, &file_attr, NULL))
1740 return ARCHIVE_EOF;
1741
1742 if(file_flags & UTIME) {
1743 if(!read_u32(a, &mtime))
1744 return ARCHIVE_EOF;
1745 }
1746
1747 if(file_flags & CRC32) {
1748 if(!read_u32(a, &crc))
1749 return ARCHIVE_EOF;
1750 }
1751
1752 if(!read_var_sized(a, &compression_info, NULL))
1753 return ARCHIVE_EOF;
1754
1755 c_method = (int) (compression_info >> 7) & 0x7;
1756 c_version = (int) (compression_info & 0x3f);
1757
1758 /* RAR5 seems to limit the dictionary size to 64MB. */
1759 window_size = (rar->file.dir > 0) ?
1760 0 :
1761 g_unpack_window_size << ((compression_info >> 10) & 15);
1762 rar->cstate.method = c_method;
1763 rar->cstate.version = c_version + 50;
1764 rar->file.solid = (compression_info & SOLID) > 0;
1765
1766 /* Archives which declare solid files without initializing the window
1767 * buffer first are invalid, unless previous data was encrypted, in
1768 * which case we may never have had the chance */
1769
1770 if(rar->file.solid > 0 && rar->cstate.data_encrypted == 0 &&
1771 rar->cstate.window_buf == NULL) {
1772 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
1773 "Declared solid file, but no window buffer "
1774 "initialized yet.");
1775 return ARCHIVE_FATAL;
1776 }
1777
1778 /* Check if window_size is a sane value. Also, if the file is not
1779 * declared as a directory, disallow window_size == 0. */
1780 if(window_size > (64 * 1024 * 1024) ||
1781 (rar->file.dir == 0 && window_size == 0))
1782 {
1783 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
1784 "Declared dictionary size is not supported.");
1785 return ARCHIVE_FATAL;
1786 }
1787
1788 if(rar->file.solid > 0) {
1789 /* Re-check if current window size is the same as previous
1790 * window size (for solid files only). */
1791 if(rar->file.solid_window_size > 0 &&
1792 rar->file.solid_window_size != (ssize_t) window_size)
1793 {
1794 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
1795 "Window size for this solid file doesn't match "
1796 "the window size used in previous solid file. ");
1797 return ARCHIVE_FATAL;
1798 }
1799 }
1800 else
1801 rar->cstate.data_encrypted = 0; /* Reset for new buffer */
1802
1803 if(rar->cstate.window_size < (ssize_t) window_size &&
1804 rar->cstate.window_buf)
1805 {
1806 /* The `data_ready` stack contains pointers to the `window_buf` or
1807 * `filtered_buf` buffers. Since we're about to reallocate the first
1808 * buffer, some of those pointers could become invalid. Therefore, we
1809 * need to dispose of all entries from the stack before attempting the
1810 * realloc. */
1811 clear_data_ready_stack(rar);
1812
1813 /* If window_buf has been allocated before, reallocate it, so
1814 * that its size will match new window_size. */
1815
1816 uint8_t* new_window_buf =
1817 realloc(rar->cstate.window_buf, (size_t) window_size);
1818
1819 if(!new_window_buf) {
1820 archive_set_error(&a->archive, ARCHIVE_ERRNO_PROGRAMMER,
1821 "Not enough memory when trying to realloc the window "
1822 "buffer.");
1823 return ARCHIVE_FATAL;
1824 }
1825
1826 rar->cstate.window_buf = new_window_buf;
1827 }
1828
1829 /* Values up to 64M should fit into ssize_t on every
1830 * architecture. */
1831 rar->cstate.window_size = (ssize_t) window_size;
1832
1833 if(rar->file.solid > 0 && rar->file.solid_window_size == 0) {
1834 /* Solid files have to have the same window_size across
1835 whole archive. Remember the window_size parameter
1836 for first solid file found. */
1837 rar->file.solid_window_size = rar->cstate.window_size;
1838 }
1839
1840 init_window_mask(rar);
1841
1842 rar->file.service = 0;
1843
1844 if(!read_var_sized(a, &host_os, NULL))
1845 return ARCHIVE_EOF;
1846
1847 if(host_os == HOST_WINDOWS) {
1848 /* Host OS is Windows */
1849
1850 __LA_MODE_T mode;
1851
1852 if(file_attr & ATTR_DIRECTORY) {
1853 if (file_attr & ATTR_READONLY) {
1854 mode = 0555 | AE_IFDIR;
1855 } else {
1856 mode = 0755 | AE_IFDIR;
1857 }
1858 } else {
1859 if (file_attr & ATTR_READONLY) {
1860 mode = 0444 | AE_IFREG;
1861 } else {
1862 mode = 0644 | AE_IFREG;
1863 }
1864 }
1865
1866 archive_entry_set_mode(entry, mode);
1867
1868 if (file_attr & (ATTR_READONLY | ATTR_HIDDEN | ATTR_SYSTEM)) {
1869 char *fflags_text, *ptr;
1870 /* allocate for ",rdonly,hidden,system" */
1871 fflags_text = malloc(22 * sizeof(*fflags_text));
1872 if (fflags_text != NULL) {
1873 ptr = fflags_text;
1874 if (file_attr & ATTR_READONLY) {
1875 strcpy(ptr, ",rdonly");
1876 ptr = ptr + 7;
1877 }
1878 if (file_attr & ATTR_HIDDEN) {
1879 strcpy(ptr, ",hidden");
1880 ptr = ptr + 7;
1881 }
1882 if (file_attr & ATTR_SYSTEM) {
1883 strcpy(ptr, ",system");
1884 ptr = ptr + 7;
1885 }
1886 if (ptr > fflags_text) {
1887 archive_entry_copy_fflags_text(entry,
1888 fflags_text + 1);
1889 }
1890 free(fflags_text);
1891 }
1892 }
1893 } else if(host_os == HOST_UNIX) {
1894 /* Host OS is Unix */
1895 archive_entry_set_mode(entry, (__LA_MODE_T) file_attr);
1896 } else {
1897 /* Unknown host OS */
1898 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
1899 "Unsupported Host OS: 0x%x", (int) host_os);
1900
1901 return ARCHIVE_FATAL;
1902 }
1903
1904 if(!read_var_sized(a, &name_size, NULL))
1905 return ARCHIVE_EOF;
1906
1907 if(name_size > (MAX_NAME_IN_CHARS - 1)) {
1908 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
1909 "Filename is too long");
1910
1911 return ARCHIVE_FATAL;
1912 }
1913
1914 if(name_size == 0) {
1915 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
1916 "No filename specified");
1917
1918 return ARCHIVE_FATAL;
1919 }
1920
1921 if(!read_ahead(a, name_size, &p))
1922 return ARCHIVE_EOF;
1923
1924 memcpy(name_utf8_buf, p, name_size);
1925 name_utf8_buf[name_size] = 0;
1926 if(ARCHIVE_OK != consume(a, name_size)) {
1927 return ARCHIVE_EOF;
1928 }
1929
1930 archive_entry_update_pathname_utf8(entry, name_utf8_buf);
1931
1932 if(extra_data_size > 0) {
1933 int ret = process_head_file_extra(a, entry, rar,
1934 extra_data_size);
1935
1936 /*
1937 * TODO: rewrite or remove useless sanity check
1938 * as extra_data_size is not passed as a pointer
1939 *
1940 if(extra_data_size < 0) {
1941 archive_set_error(&a->archive, ARCHIVE_ERRNO_PROGRAMMER,
1942 "File extra data size is not zero");
1943 return ARCHIVE_FATAL;
1944 }
1945 */
1946
1947 if(ret != ARCHIVE_OK)
1948 return ret;
1949 }
1950
1951 if((file_flags & UNKNOWN_UNPACKED_SIZE) == 0) {
1952 rar->file.unpacked_size = (ssize_t) unpacked_size;
1953 if(rar->file.redir_type == REDIR_TYPE_NONE)
1954 archive_entry_set_size(entry, unpacked_size);
1955 }
1956
1957 if(file_flags & UTIME) {
1958 archive_entry_set_mtime(entry, (time_t) mtime, 0);
1959 }
1960
1961 if(file_flags & CRC32) {
1962 rar->file.stored_crc32 = crc;
1963 }
1964
1965 if(!rar->cstate.switch_multivolume) {
1966 /* Do not reinitialize unpacking state if we're switching
1967 * archives. */
1968 rar->cstate.block_parsing_finished = 1;
1969 rar->cstate.all_filters_applied = 1;
1970 rar->cstate.initialized = 0;
1971 }
1972
1973 if(rar->generic.split_before > 0) {
1974 /* If now we're standing on a header that has a 'split before'
1975 * mark, it means we're standing on a 'continuation' file
1976 * header. Signal the caller that if it wants to move to
1977 * another file, it must call rar5_read_header() function
1978 * again. */
1979
1980 return ARCHIVE_RETRY;
1981 } else {
1982 return ARCHIVE_OK;
1983 }
1984 }
1985
process_head_service(struct archive_read * a,struct rar5 * rar,struct archive_entry * entry,size_t block_flags)1986 static int process_head_service(struct archive_read* a, struct rar5* rar,
1987 struct archive_entry* entry, size_t block_flags)
1988 {
1989 /* Process this SERVICE block the same way as FILE blocks. */
1990 int ret = process_head_file(a, rar, entry, block_flags);
1991 if(ret != ARCHIVE_OK)
1992 return ret;
1993
1994 rar->file.service = 1;
1995
1996 /* But skip the data part automatically. It's no use for the user
1997 * anyway. It contains only service data, not even needed to
1998 * properly unpack the file. */
1999 ret = rar5_read_data_skip(a);
2000 if(ret != ARCHIVE_OK)
2001 return ret;
2002
2003 /* After skipping, try parsing another block automatically. */
2004 return ARCHIVE_RETRY;
2005 }
2006
process_head_main(struct archive_read * a,struct rar5 * rar,struct archive_entry * entry,size_t block_flags)2007 static int process_head_main(struct archive_read* a, struct rar5* rar,
2008 struct archive_entry* entry, size_t block_flags)
2009 {
2010 int ret;
2011 uint64_t extra_data_size = 0;
2012 size_t extra_field_size = 0;
2013 size_t extra_field_id = 0;
2014 size_t archive_flags = 0;
2015
2016 enum MAIN_FLAGS {
2017 VOLUME = 0x0001, /* multi-volume archive */
2018 VOLUME_NUMBER = 0x0002, /* volume number, first vol doesn't
2019 * have it */
2020 SOLID = 0x0004, /* solid archive */
2021 PROTECT = 0x0008, /* contains Recovery info */
2022 LOCK = 0x0010, /* readonly flag, not used */
2023 };
2024
2025 enum MAIN_EXTRA {
2026 // Just one attribute here.
2027 LOCATOR = 0x01,
2028 };
2029
2030 (void) entry;
2031
2032 if(block_flags & HFL_EXTRA_DATA) {
2033 if(!read_var(a, &extra_data_size, NULL))
2034 return ARCHIVE_EOF;
2035 } else {
2036 extra_data_size = 0;
2037 }
2038
2039 if(!read_var_sized(a, &archive_flags, NULL)) {
2040 return ARCHIVE_EOF;
2041 }
2042
2043 rar->main.volume = (archive_flags & VOLUME) > 0;
2044 rar->main.solid = (archive_flags & SOLID) > 0;
2045
2046 if(archive_flags & VOLUME_NUMBER) {
2047 size_t v = 0;
2048 if(!read_var_sized(a, &v, NULL)) {
2049 return ARCHIVE_EOF;
2050 }
2051
2052 if (v > UINT_MAX) {
2053 archive_set_error(&a->archive,
2054 ARCHIVE_ERRNO_FILE_FORMAT,
2055 "Invalid volume number");
2056 return ARCHIVE_FATAL;
2057 }
2058
2059 rar->main.vol_no = (unsigned int) v;
2060 } else {
2061 rar->main.vol_no = 0;
2062 }
2063
2064 if(rar->vol.expected_vol_no > 0 &&
2065 rar->main.vol_no != rar->vol.expected_vol_no)
2066 {
2067 /* Returning EOF instead of FATAL because of strange
2068 * libarchive behavior. When opening multiple files via
2069 * archive_read_open_filenames(), after reading up the whole
2070 * last file, the __archive_read_ahead function wraps up to
2071 * the first archive instead of returning EOF. */
2072 return ARCHIVE_EOF;
2073 }
2074
2075 if(extra_data_size == 0) {
2076 /* Early return. */
2077 return ARCHIVE_OK;
2078 }
2079
2080 if(!read_var_sized(a, &extra_field_size, NULL)) {
2081 return ARCHIVE_EOF;
2082 }
2083
2084 if(!read_var_sized(a, &extra_field_id, NULL)) {
2085 return ARCHIVE_EOF;
2086 }
2087
2088 if(extra_field_size == 0) {
2089 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
2090 "Invalid extra field size");
2091 return ARCHIVE_FATAL;
2092 }
2093
2094 switch(extra_field_id) {
2095 case LOCATOR:
2096 ret = process_main_locator_extra_block(a, rar);
2097 if(ret != ARCHIVE_OK) {
2098 /* Error while parsing main locator extra
2099 * block. */
2100 return ret;
2101 }
2102
2103 break;
2104 default:
2105 archive_set_error(&a->archive,
2106 ARCHIVE_ERRNO_FILE_FORMAT,
2107 "Unsupported extra type (0x%x)",
2108 (int) extra_field_id);
2109 return ARCHIVE_FATAL;
2110 }
2111
2112 return ARCHIVE_OK;
2113 }
2114
skip_unprocessed_bytes(struct archive_read * a)2115 static int skip_unprocessed_bytes(struct archive_read* a) {
2116 struct rar5* rar = get_context(a);
2117 int ret;
2118
2119 if(rar->file.bytes_remaining) {
2120 /* Use different skipping method in block merging mode than in
2121 * normal mode. If merge mode is active, rar5_read_data_skip
2122 * can't be used, because it could allow recursive use of
2123 * merge_block() * function, and this function doesn't support
2124 * recursive use. */
2125 if(rar->merge_mode) {
2126 /* Discard whole merged block. This is valid in solid
2127 * mode as well, because the code will discard blocks
2128 * only if those blocks are safe to discard (i.e.
2129 * they're not FILE blocks). */
2130 ret = consume(a, rar->file.bytes_remaining);
2131 if(ret != ARCHIVE_OK) {
2132 return ret;
2133 }
2134 rar->file.bytes_remaining = 0;
2135 } else {
2136 /* If we're not in merge mode, use safe skipping code.
2137 * This will ensure we'll handle solid archives
2138 * properly. */
2139 ret = rar5_read_data_skip(a);
2140 if(ret != ARCHIVE_OK) {
2141 return ret;
2142 }
2143 }
2144 }
2145
2146 return ARCHIVE_OK;
2147 }
2148
2149 static int scan_for_signature(struct archive_read* a);
2150
2151 /* Base block processing function. A 'base block' is a RARv5 header block
2152 * that tells the reader what kind of data is stored inside the block.
2153 *
2154 * From the birds-eye view a RAR file looks file this:
2155 *
2156 * <magic><base_block_1><base_block_2>...<base_block_n>
2157 *
2158 * There are a few types of base blocks. Those types are specified inside
2159 * the 'switch' statement in this function. For example purposes, I'll write
2160 * how a standard RARv5 file could look like here:
2161 *
2162 * <magic><MAIN><FILE><FILE><FILE><SERVICE><ENDARC>
2163 *
2164 * The structure above could describe an archive file with 3 files in it,
2165 * one service "QuickOpen" block (that is ignored by this parser), and an
2166 * end of file base block marker.
2167 *
2168 * If the file is stored in multiple archive files ("multiarchive"), it might
2169 * look like this:
2170 *
2171 * .part01.rar: <magic><MAIN><FILE><ENDARC>
2172 * .part02.rar: <magic><MAIN><FILE><ENDARC>
2173 * .part03.rar: <magic><MAIN><FILE><ENDARC>
2174 *
2175 * This example could describe 3 RAR files that contain ONE archived file.
2176 * Or it could describe 3 RAR files that contain 3 different files. Or 3
2177 * RAR files than contain 2 files. It all depends what metadata is stored in
2178 * the headers of <FILE> blocks.
2179 *
2180 * Each <FILE> block contains info about its size, the name of the file it's
2181 * storing inside, and whether this FILE block is a continuation block of
2182 * previous archive ('split before'), and is this FILE block should be
2183 * continued in another archive ('split after'). By parsing the 'split before'
2184 * and 'split after' flags, we're able to tell if multiple <FILE> base blocks
2185 * are describing one file, or multiple files (with the same filename, for
2186 * example).
2187 *
2188 * One thing to note is that if we're parsing the first <FILE> block, and
2189 * we see 'split after' flag, then we need to jump over to another <FILE>
2190 * block to be able to decompress rest of the data. To do this, we need
2191 * to skip the <ENDARC> block, then switch to another file, then skip the
2192 * <magic> block, <MAIN> block, and then we're standing on the proper
2193 * <FILE> block.
2194 */
2195
process_base_block(struct archive_read * a,struct archive_entry * entry)2196 static int process_base_block(struct archive_read* a,
2197 struct archive_entry* entry)
2198 {
2199 const size_t SMALLEST_RAR5_BLOCK_SIZE = 3;
2200
2201 struct rar5* rar = get_context(a);
2202 uint32_t hdr_crc, computed_crc;
2203 size_t raw_hdr_size = 0, hdr_size_len, hdr_size;
2204 size_t header_id = 0;
2205 size_t header_flags = 0;
2206 const uint8_t* p;
2207 int ret;
2208
2209 enum HEADER_TYPE {
2210 HEAD_MARK = 0x00, HEAD_MAIN = 0x01, HEAD_FILE = 0x02,
2211 HEAD_SERVICE = 0x03, HEAD_CRYPT = 0x04, HEAD_ENDARC = 0x05,
2212 HEAD_UNKNOWN = 0xff,
2213 };
2214
2215 /* Skip any unprocessed data for this file. */
2216 ret = skip_unprocessed_bytes(a);
2217 if(ret != ARCHIVE_OK)
2218 return ret;
2219
2220 /* Read the expected CRC32 checksum. */
2221 if(!read_u32(a, &hdr_crc)) {
2222 return ARCHIVE_EOF;
2223 }
2224
2225 /* Read header size. */
2226 if(!read_var_sized(a, &raw_hdr_size, &hdr_size_len)) {
2227 return ARCHIVE_EOF;
2228 }
2229
2230 hdr_size = raw_hdr_size + hdr_size_len;
2231
2232 /* Sanity check, maximum header size for RAR5 is 2MB. */
2233 if(hdr_size > (2 * 1024 * 1024)) {
2234 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
2235 "Base block header is too large");
2236
2237 return ARCHIVE_FATAL;
2238 }
2239
2240 /* Additional sanity checks to weed out invalid files. */
2241 if(raw_hdr_size == 0 || hdr_size_len == 0 ||
2242 hdr_size < SMALLEST_RAR5_BLOCK_SIZE)
2243 {
2244 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
2245 "Too small block encountered (%zu bytes)",
2246 raw_hdr_size);
2247
2248 return ARCHIVE_FATAL;
2249 }
2250
2251 /* Read the whole header data into memory, maximum memory use here is
2252 * 2MB. */
2253 if(!read_ahead(a, hdr_size, &p)) {
2254 return ARCHIVE_EOF;
2255 }
2256
2257 /* Verify the CRC32 of the header data. */
2258 computed_crc = (uint32_t) crc32(0, p, (int) hdr_size);
2259 if(computed_crc != hdr_crc) {
2260 #ifndef DONT_FAIL_ON_CRC_ERROR
2261 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
2262 "Header CRC error");
2263
2264 return ARCHIVE_FATAL;
2265 #endif
2266 }
2267
2268 /* If the checksum is OK, we proceed with parsing. */
2269 if(ARCHIVE_OK != consume(a, hdr_size_len)) {
2270 return ARCHIVE_EOF;
2271 }
2272
2273 if(!read_var_sized(a, &header_id, NULL))
2274 return ARCHIVE_EOF;
2275
2276 if(!read_var_sized(a, &header_flags, NULL))
2277 return ARCHIVE_EOF;
2278
2279 rar->generic.split_after = (header_flags & HFL_SPLIT_AFTER) > 0;
2280 rar->generic.split_before = (header_flags & HFL_SPLIT_BEFORE) > 0;
2281 rar->generic.size = (int)hdr_size;
2282 rar->generic.last_header_id = (int)header_id;
2283 rar->main.endarc = 0;
2284
2285 /* Those are possible header ids in RARv5. */
2286 switch(header_id) {
2287 case HEAD_MAIN:
2288 ret = process_head_main(a, rar, entry, header_flags);
2289
2290 /* Main header doesn't have any files in it, so it's
2291 * pointless to return to the caller. Retry to next
2292 * header, which should be HEAD_FILE/HEAD_SERVICE. */
2293 if(ret == ARCHIVE_OK)
2294 return ARCHIVE_RETRY;
2295
2296 return ret;
2297 case HEAD_SERVICE:
2298 ret = process_head_service(a, rar, entry, header_flags);
2299 return ret;
2300 case HEAD_FILE:
2301 ret = process_head_file(a, rar, entry, header_flags);
2302 return ret;
2303 case HEAD_CRYPT:
2304 archive_entry_set_is_metadata_encrypted(entry, 1);
2305 archive_entry_set_is_data_encrypted(entry, 1);
2306 rar->has_encrypted_entries = 1;
2307 rar->headers_are_encrypted = 1;
2308 archive_set_error(&a->archive,
2309 ARCHIVE_ERRNO_FILE_FORMAT,
2310 "Encryption is not supported");
2311 return ARCHIVE_FATAL;
2312 case HEAD_ENDARC:
2313 rar->main.endarc = 1;
2314
2315 /* After encountering an end of file marker, we need
2316 * to take into consideration if this archive is
2317 * continued in another file (i.e. is it part01.rar:
2318 * is there a part02.rar?) */
2319 if(rar->main.volume) {
2320 /* In case there is part02.rar, position the
2321 * read pointer in a proper place, so we can
2322 * resume parsing. */
2323 ret = scan_for_signature(a);
2324 if(ret == ARCHIVE_FATAL) {
2325 return ARCHIVE_EOF;
2326 } else {
2327 if(rar->vol.expected_vol_no ==
2328 UINT_MAX) {
2329 archive_set_error(&a->archive,
2330 ARCHIVE_ERRNO_FILE_FORMAT,
2331 "Header error");
2332 return ARCHIVE_FATAL;
2333 }
2334
2335 rar->vol.expected_vol_no =
2336 rar->main.vol_no + 1;
2337 return ARCHIVE_OK;
2338 }
2339 } else {
2340 return ARCHIVE_EOF;
2341 }
2342 case HEAD_MARK:
2343 return ARCHIVE_EOF;
2344 default:
2345 if((header_flags & HFL_SKIP_IF_UNKNOWN) == 0) {
2346 archive_set_error(&a->archive,
2347 ARCHIVE_ERRNO_FILE_FORMAT,
2348 "Header type error");
2349 return ARCHIVE_FATAL;
2350 } else {
2351 /* If the block is marked as 'skip if unknown',
2352 * do as the flag says: skip the block
2353 * instead on failing on it. */
2354 return ARCHIVE_RETRY;
2355 }
2356 }
2357
2358 #if !defined WIN32
2359 // Not reached.
2360 archive_set_error(&a->archive, ARCHIVE_ERRNO_PROGRAMMER,
2361 "Internal unpacker error");
2362 return ARCHIVE_FATAL;
2363 #endif
2364 }
2365
skip_base_block(struct archive_read * a)2366 static int skip_base_block(struct archive_read* a) {
2367 int ret;
2368 struct rar5* rar = get_context(a);
2369
2370 /* Create a new local archive_entry structure that will be operated on
2371 * by header reader; operations on this archive_entry will be discarded.
2372 */
2373 struct archive_entry* entry = archive_entry_new();
2374 ret = process_base_block(a, entry);
2375
2376 /* Discard operations on this archive_entry structure. */
2377 archive_entry_free(entry);
2378 if(ret == ARCHIVE_FATAL)
2379 return ret;
2380
2381 if(rar->generic.last_header_id == 2 && rar->generic.split_before > 0)
2382 return ARCHIVE_OK;
2383
2384 if(ret == ARCHIVE_OK)
2385 return ARCHIVE_RETRY;
2386 else
2387 return ret;
2388 }
2389
try_skip_sfx(struct archive_read * a)2390 static int try_skip_sfx(struct archive_read *a)
2391 {
2392 const char *p;
2393
2394 if ((p = __archive_read_ahead(a, 7, NULL)) == NULL)
2395 return ARCHIVE_EOF;
2396
2397 if ((p[0] == 'M' && p[1] == 'Z') || memcmp(p, "\x7F\x45LF", 4) == 0)
2398 {
2399 char signature[sizeof(rar5_signature_xor)];
2400 const void *h;
2401 const char *q;
2402 size_t skip, total = 0;
2403 ssize_t bytes, window = 4096;
2404
2405 rar5_signature(signature);
2406
2407 while (total + window <= (1024 * 512)) {
2408 h = __archive_read_ahead(a, window, &bytes);
2409 if (h == NULL) {
2410 /* Remaining bytes are less than window. */
2411 window >>= 1;
2412 if (window < 0x40)
2413 goto fatal;
2414 continue;
2415 }
2416 if (bytes < 0x40)
2417 goto fatal;
2418 p = h;
2419 q = p + bytes;
2420
2421 /*
2422 * Scan ahead until we find something that looks
2423 * like the RAR header.
2424 */
2425 while (p + 8 < q) {
2426 if (memcmp(p, signature, sizeof(signature)) == 0) {
2427 skip = p - (const char *)h;
2428 __archive_read_consume(a, skip);
2429 return (ARCHIVE_OK);
2430 }
2431 p += 0x10;
2432 }
2433 skip = p - (const char *)h;
2434 __archive_read_consume(a, skip);
2435 total += skip;
2436 }
2437 }
2438
2439 return ARCHIVE_OK;
2440 fatal:
2441 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
2442 "Couldn't find out RAR header");
2443 return (ARCHIVE_FATAL);
2444 }
2445
rar5_read_header(struct archive_read * a,struct archive_entry * entry)2446 static int rar5_read_header(struct archive_read *a,
2447 struct archive_entry *entry)
2448 {
2449 struct rar5* rar = get_context(a);
2450 int ret;
2451
2452 /*
2453 * It should be sufficient to call archive_read_next_header() for
2454 * a reader to determine if an entry is encrypted or not.
2455 */
2456 if (rar->has_encrypted_entries == ARCHIVE_READ_FORMAT_ENCRYPTION_DONT_KNOW) {
2457 rar->has_encrypted_entries = 0;
2458 }
2459
2460 if(rar->header_initialized == 0) {
2461 init_header(a);
2462 if ((ret = try_skip_sfx(a)) < ARCHIVE_WARN)
2463 return ret;
2464 rar->header_initialized = 1;
2465 }
2466
2467 if(rar->skipped_magic == 0) {
2468 if(ARCHIVE_OK != consume(a, sizeof(rar5_signature_xor))) {
2469 return ARCHIVE_EOF;
2470 }
2471
2472 rar->skipped_magic = 1;
2473 }
2474
2475 do {
2476 ret = process_base_block(a, entry);
2477 } while(ret == ARCHIVE_RETRY ||
2478 (rar->main.endarc > 0 && ret == ARCHIVE_OK));
2479
2480 return ret;
2481 }
2482
init_unpack(struct rar5 * rar)2483 static void init_unpack(struct rar5* rar) {
2484 rar->file.calculated_crc32 = 0;
2485 init_window_mask(rar);
2486
2487 free(rar->cstate.window_buf);
2488 free(rar->cstate.filtered_buf);
2489
2490 if(rar->cstate.window_size > 0) {
2491 rar->cstate.window_buf = calloc(1, rar->cstate.window_size);
2492 rar->cstate.filtered_buf = calloc(1, rar->cstate.window_size);
2493 } else {
2494 rar->cstate.window_buf = NULL;
2495 rar->cstate.filtered_buf = NULL;
2496 }
2497
2498 clear_data_ready_stack(rar);
2499
2500 rar->cstate.write_ptr = 0;
2501 rar->cstate.last_write_ptr = 0;
2502
2503 memset(&rar->cstate.bd, 0, sizeof(rar->cstate.bd));
2504 memset(&rar->cstate.ld, 0, sizeof(rar->cstate.ld));
2505 memset(&rar->cstate.dd, 0, sizeof(rar->cstate.dd));
2506 memset(&rar->cstate.ldd, 0, sizeof(rar->cstate.ldd));
2507 memset(&rar->cstate.rd, 0, sizeof(rar->cstate.rd));
2508 }
2509
update_crc(struct rar5 * rar,const uint8_t * p,size_t to_read)2510 static void update_crc(struct rar5* rar, const uint8_t* p, size_t to_read) {
2511 int verify_crc;
2512
2513 if(rar->skip_mode) {
2514 #if defined CHECK_CRC_ON_SOLID_SKIP
2515 verify_crc = 1;
2516 #else
2517 verify_crc = 0;
2518 #endif
2519 } else
2520 verify_crc = 1;
2521
2522 if(verify_crc) {
2523 /* Don't update CRC32 if the file doesn't have the
2524 * `stored_crc32` info filled in. */
2525 if(rar->file.stored_crc32 > 0) {
2526 rar->file.calculated_crc32 =
2527 crc32(rar->file.calculated_crc32, p, (unsigned int)to_read);
2528 }
2529
2530 /* Check if the file uses an optional BLAKE2sp checksum
2531 * algorithm. */
2532 if(rar->file.has_blake2 > 0) {
2533 /* Return value of the `update` function is always 0,
2534 * so we can explicitly ignore it here. */
2535 (void) blake2sp_update(&rar->file.b2state, p, to_read);
2536 }
2537 }
2538 }
2539
create_decode_tables(uint8_t * bit_length,struct decode_table * table,int size)2540 static int create_decode_tables(uint8_t* bit_length,
2541 struct decode_table* table, int size)
2542 {
2543 int code, upper_limit = 0, i, lc[16];
2544 uint32_t decode_pos_clone[rar5_countof(table->decode_pos)];
2545 ssize_t cur_len, quick_data_size;
2546
2547 memset(&lc, 0, sizeof(lc));
2548 memset(table->decode_num, 0, sizeof(table->decode_num));
2549 table->size = size;
2550 table->quick_bits = size == HUFF_NC ? 10 : 7;
2551
2552 for(i = 0; i < size; i++) {
2553 lc[bit_length[i] & 15]++;
2554 }
2555
2556 lc[0] = 0;
2557 table->decode_pos[0] = 0;
2558 table->decode_len[0] = 0;
2559
2560 for(i = 1; i < 16; i++) {
2561 upper_limit += lc[i];
2562
2563 table->decode_len[i] = upper_limit << (16 - i);
2564 table->decode_pos[i] = table->decode_pos[i - 1] + lc[i - 1];
2565
2566 upper_limit <<= 1;
2567 }
2568
2569 memcpy(decode_pos_clone, table->decode_pos, sizeof(decode_pos_clone));
2570
2571 for(i = 0; i < size; i++) {
2572 uint8_t clen = bit_length[i] & 15;
2573 if(clen > 0) {
2574 int last_pos = decode_pos_clone[clen];
2575 table->decode_num[last_pos] = i;
2576 decode_pos_clone[clen]++;
2577 }
2578 }
2579
2580 quick_data_size = (int64_t)1 << table->quick_bits;
2581 cur_len = 1;
2582 for(code = 0; code < quick_data_size; code++) {
2583 int bit_field = code << (16 - table->quick_bits);
2584 int dist, pos;
2585
2586 while(cur_len < rar5_countof(table->decode_len) &&
2587 bit_field >= table->decode_len[cur_len]) {
2588 cur_len++;
2589 }
2590
2591 table->quick_len[code] = (uint8_t) cur_len;
2592
2593 dist = bit_field - table->decode_len[cur_len - 1];
2594 dist >>= (16 - cur_len);
2595
2596 pos = table->decode_pos[cur_len & 15] + dist;
2597 if(cur_len < rar5_countof(table->decode_pos) && pos < size) {
2598 table->quick_num[code] = table->decode_num[pos];
2599 } else {
2600 table->quick_num[code] = 0;
2601 }
2602 }
2603
2604 return ARCHIVE_OK;
2605 }
2606
decode_number(struct archive_read * a,struct decode_table * table,const uint8_t * p,uint16_t * num)2607 static int decode_number(struct archive_read* a, struct decode_table* table,
2608 const uint8_t* p, uint16_t* num)
2609 {
2610 int i, bits, dist, ret;
2611 uint16_t bitfield;
2612 uint32_t pos;
2613 struct rar5* rar = get_context(a);
2614
2615 if(ARCHIVE_OK != (ret = read_bits_16(a, rar, p, &bitfield))) {
2616 return ret;
2617 }
2618
2619 bitfield &= 0xfffe;
2620
2621 if(bitfield < table->decode_len[table->quick_bits]) {
2622 int code = bitfield >> (16 - table->quick_bits);
2623 skip_bits(rar, table->quick_len[code]);
2624 *num = table->quick_num[code];
2625 return ARCHIVE_OK;
2626 }
2627
2628 bits = 15;
2629
2630 for(i = table->quick_bits + 1; i < 15; i++) {
2631 if(bitfield < table->decode_len[i]) {
2632 bits = i;
2633 break;
2634 }
2635 }
2636
2637 skip_bits(rar, bits);
2638
2639 dist = bitfield - table->decode_len[bits - 1];
2640 dist >>= (16 - bits);
2641 pos = table->decode_pos[bits] + dist;
2642
2643 if(pos >= table->size)
2644 pos = 0;
2645
2646 *num = table->decode_num[pos];
2647 return ARCHIVE_OK;
2648 }
2649
2650 /* Reads and parses Huffman tables from the beginning of the block. */
parse_tables(struct archive_read * a,struct rar5 * rar,const uint8_t * p)2651 static int parse_tables(struct archive_read* a, struct rar5* rar,
2652 const uint8_t* p)
2653 {
2654 int ret, value, i, w, idx = 0;
2655 uint8_t bit_length[HUFF_BC],
2656 table[HUFF_TABLE_SIZE],
2657 nibble_mask = 0xF0,
2658 nibble_shift = 4;
2659
2660 enum { ESCAPE = 15 };
2661
2662 /* The data for table generation is compressed using a simple RLE-like
2663 * algorithm when storing zeroes, so we need to unpack it first. */
2664 for(w = 0, i = 0; w < HUFF_BC;) {
2665 if(i >= rar->cstate.cur_block_size) {
2666 /* Truncated data, can't continue. */
2667 archive_set_error(&a->archive,
2668 ARCHIVE_ERRNO_FILE_FORMAT,
2669 "Truncated data in huffman tables");
2670 return ARCHIVE_FATAL;
2671 }
2672
2673 value = (p[i] & nibble_mask) >> nibble_shift;
2674
2675 if(nibble_mask == 0x0F)
2676 ++i;
2677
2678 nibble_mask ^= 0xFF;
2679 nibble_shift ^= 4;
2680
2681 /* Values smaller than 15 is data, so we write it directly.
2682 * Value 15 is a flag telling us that we need to unpack more
2683 * bytes. */
2684 if(value == ESCAPE) {
2685 value = (p[i] & nibble_mask) >> nibble_shift;
2686 if(nibble_mask == 0x0F)
2687 ++i;
2688 nibble_mask ^= 0xFF;
2689 nibble_shift ^= 4;
2690
2691 if(value == 0) {
2692 /* We sometimes need to write the actual value
2693 * of 15, so this case handles that. */
2694 bit_length[w++] = ESCAPE;
2695 } else {
2696 int k;
2697
2698 /* Fill zeroes. */
2699 for(k = 0; (k < value + 2) && (w < HUFF_BC);
2700 k++) {
2701 bit_length[w++] = 0;
2702 }
2703 }
2704 } else {
2705 bit_length[w++] = value;
2706 }
2707 }
2708
2709 rar->bits.in_addr = i;
2710 rar->bits.bit_addr = nibble_shift ^ 4;
2711
2712 ret = create_decode_tables(bit_length, &rar->cstate.bd, HUFF_BC);
2713 if(ret != ARCHIVE_OK) {
2714 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
2715 "Decoding huffman tables failed");
2716 return ARCHIVE_FATAL;
2717 }
2718
2719 for(i = 0; i < HUFF_TABLE_SIZE;) {
2720 uint16_t num;
2721
2722 ret = decode_number(a, &rar->cstate.bd, p, &num);
2723 if(ret != ARCHIVE_OK) {
2724 archive_set_error(&a->archive,
2725 ARCHIVE_ERRNO_FILE_FORMAT,
2726 "Decoding huffman tables failed");
2727 return ARCHIVE_FATAL;
2728 }
2729
2730 if(num < 16) {
2731 /* 0..15: store directly */
2732 table[i] = (uint8_t) num;
2733 i++;
2734 } else if(num < 18) {
2735 /* 16..17: repeat previous code */
2736 uint16_t n;
2737
2738 if(ARCHIVE_OK != (ret = read_bits_16(a, rar, p, &n)))
2739 return ret;
2740
2741 if(num == 16) {
2742 n >>= 13;
2743 n += 3;
2744 skip_bits(rar, 3);
2745 } else {
2746 n >>= 9;
2747 n += 11;
2748 skip_bits(rar, 7);
2749 }
2750
2751 if(i > 0) {
2752 while(n-- > 0 && i < HUFF_TABLE_SIZE) {
2753 table[i] = table[i - 1];
2754 i++;
2755 }
2756 } else {
2757 archive_set_error(&a->archive,
2758 ARCHIVE_ERRNO_FILE_FORMAT,
2759 "Unexpected error when decoding "
2760 "huffman tables");
2761 return ARCHIVE_FATAL;
2762 }
2763 } else {
2764 /* other codes: fill with zeroes `n` times */
2765 uint16_t n;
2766
2767 if(ARCHIVE_OK != (ret = read_bits_16(a, rar, p, &n)))
2768 return ret;
2769
2770 if(num == 18) {
2771 n >>= 13;
2772 n += 3;
2773 skip_bits(rar, 3);
2774 } else {
2775 n >>= 9;
2776 n += 11;
2777 skip_bits(rar, 7);
2778 }
2779
2780 while(n-- > 0 && i < HUFF_TABLE_SIZE)
2781 table[i++] = 0;
2782 }
2783 }
2784
2785 ret = create_decode_tables(&table[idx], &rar->cstate.ld, HUFF_NC);
2786 if(ret != ARCHIVE_OK) {
2787 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
2788 "Failed to create literal table");
2789 return ARCHIVE_FATAL;
2790 }
2791
2792 idx += HUFF_NC;
2793
2794 ret = create_decode_tables(&table[idx], &rar->cstate.dd, HUFF_DC);
2795 if(ret != ARCHIVE_OK) {
2796 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
2797 "Failed to create distance table");
2798 return ARCHIVE_FATAL;
2799 }
2800
2801 idx += HUFF_DC;
2802
2803 ret = create_decode_tables(&table[idx], &rar->cstate.ldd, HUFF_LDC);
2804 if(ret != ARCHIVE_OK) {
2805 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
2806 "Failed to create lower bits of distances table");
2807 return ARCHIVE_FATAL;
2808 }
2809
2810 idx += HUFF_LDC;
2811
2812 ret = create_decode_tables(&table[idx], &rar->cstate.rd, HUFF_RC);
2813 if(ret != ARCHIVE_OK) {
2814 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
2815 "Failed to create repeating distances table");
2816 return ARCHIVE_FATAL;
2817 }
2818
2819 return ARCHIVE_OK;
2820 }
2821
2822 /* Parses the block header, verifies its CRC byte, and saves the header
2823 * fields inside the `hdr` pointer. */
parse_block_header(struct archive_read * a,const uint8_t * p,ssize_t * block_size,struct compressed_block_header * hdr)2824 static int parse_block_header(struct archive_read* a, const uint8_t* p,
2825 ssize_t* block_size, struct compressed_block_header* hdr)
2826 {
2827 uint8_t calculated_cksum;
2828 memcpy(hdr, p, sizeof(struct compressed_block_header));
2829
2830 if(bf_byte_count(hdr) > 2) {
2831 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
2832 "Unsupported block header size (was %d, max is 2)",
2833 bf_byte_count(hdr));
2834 return ARCHIVE_FATAL;
2835 }
2836
2837 /* This should probably use bit reader interface in order to be more
2838 * future-proof. */
2839 *block_size = 0;
2840 switch(bf_byte_count(hdr)) {
2841 /* 1-byte block size */
2842 case 0:
2843 *block_size = *(const uint8_t*) &p[2];
2844 break;
2845
2846 /* 2-byte block size */
2847 case 1:
2848 *block_size = archive_le16dec(&p[2]);
2849 break;
2850
2851 /* 3-byte block size */
2852 case 2:
2853 *block_size = archive_le32dec(&p[2]);
2854 *block_size &= 0x00FFFFFF;
2855 break;
2856
2857 /* Other block sizes are not supported. This case is not
2858 * reached, because we have an 'if' guard before the switch
2859 * that makes sure of it. */
2860 default:
2861 return ARCHIVE_FATAL;
2862 }
2863
2864 /* Verify the block header checksum. 0x5A is a magic value and is
2865 * always * constant. */
2866 calculated_cksum = 0x5A
2867 ^ (uint8_t) hdr->block_flags_u8
2868 ^ (uint8_t) *block_size
2869 ^ (uint8_t) (*block_size >> 8)
2870 ^ (uint8_t) (*block_size >> 16);
2871
2872 if(calculated_cksum != hdr->block_cksum) {
2873 #ifndef DONT_FAIL_ON_CRC_ERROR
2874 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
2875 "Block checksum error: got 0x%x, expected 0x%x",
2876 hdr->block_cksum, calculated_cksum);
2877
2878 return ARCHIVE_FATAL;
2879 #endif
2880 }
2881
2882 return ARCHIVE_OK;
2883 }
2884
2885 /* Convenience function used during filter processing. */
parse_filter_data(struct archive_read * a,struct rar5 * rar,const uint8_t * p,uint32_t * filter_data)2886 static int parse_filter_data(struct archive_read* a, struct rar5* rar,
2887 const uint8_t* p, uint32_t* filter_data)
2888 {
2889 int i, bytes, ret;
2890 uint32_t data = 0;
2891
2892 if(ARCHIVE_OK != (ret = read_consume_bits(a, rar, p, 2, &bytes)))
2893 return ret;
2894
2895 bytes++;
2896
2897 for(i = 0; i < bytes; i++) {
2898 uint16_t byte;
2899
2900 if(ARCHIVE_OK != (ret = read_bits_16(a, rar, p, &byte))) {
2901 return ret;
2902 }
2903
2904 /* Cast to uint32_t will ensure the shift operation will not
2905 * produce undefined result. */
2906 data += ((uint32_t) byte >> 8) << (i * 8);
2907 skip_bits(rar, 8);
2908 }
2909
2910 *filter_data = data;
2911 return ARCHIVE_OK;
2912 }
2913
2914 /* Function is used during sanity checking. */
is_valid_filter_block_start(struct rar5 * rar,uint32_t start)2915 static int is_valid_filter_block_start(struct rar5* rar,
2916 uint32_t start)
2917 {
2918 const int64_t block_start = (ssize_t) start + rar->cstate.write_ptr;
2919 const int64_t last_bs = rar->cstate.last_block_start;
2920 const ssize_t last_bl = rar->cstate.last_block_length;
2921
2922 if(last_bs == 0 || last_bl == 0) {
2923 /* We didn't have any filters yet, so accept this offset. */
2924 return 1;
2925 }
2926
2927 if(block_start >= last_bs + last_bl) {
2928 /* Current offset is bigger than last block's end offset, so
2929 * accept current offset. */
2930 return 1;
2931 }
2932
2933 /* Any other case is not a normal situation and we should fail. */
2934 return 0;
2935 }
2936
2937 /* The function will create a new filter, read its parameters from the input
2938 * stream and add it to the filter collection. */
parse_filter(struct archive_read * ar,const uint8_t * p)2939 static int parse_filter(struct archive_read* ar, const uint8_t* p) {
2940 uint32_t block_start, block_length;
2941 uint16_t filter_type;
2942 struct filter_info* filt = NULL;
2943 struct rar5* rar = get_context(ar);
2944 int ret;
2945
2946 /* Read the parameters from the input stream. */
2947 if(ARCHIVE_OK != (ret = parse_filter_data(ar, rar, p, &block_start)))
2948 return ret;
2949
2950 if(ARCHIVE_OK != (ret = parse_filter_data(ar, rar, p, &block_length)))
2951 return ret;
2952
2953 if(ARCHIVE_OK != (ret = read_bits_16(ar, rar, p, &filter_type)))
2954 return ret;
2955
2956 filter_type >>= 13;
2957 skip_bits(rar, 3);
2958
2959 /* Perform some sanity checks on this filter parameters. Note that we
2960 * allow only DELTA, E8/E9 and ARM filters here, because rest of
2961 * filters are not used in RARv5. */
2962
2963 if(block_length < 4 ||
2964 block_length > 0x400000 ||
2965 filter_type > FILTER_ARM ||
2966 !is_valid_filter_block_start(rar, block_start))
2967 {
2968 archive_set_error(&ar->archive, ARCHIVE_ERRNO_FILE_FORMAT,
2969 "Invalid filter encountered");
2970 return ARCHIVE_FATAL;
2971 }
2972
2973 /* Allocate a new filter. */
2974 filt = add_new_filter(rar);
2975 if(filt == NULL) {
2976 archive_set_error(&ar->archive, ENOMEM,
2977 "Can't allocate memory for a filter descriptor.");
2978 return ARCHIVE_FATAL;
2979 }
2980
2981 filt->type = filter_type;
2982 filt->block_start = rar->cstate.write_ptr + block_start;
2983 filt->block_length = block_length;
2984
2985 rar->cstate.last_block_start = filt->block_start;
2986 rar->cstate.last_block_length = filt->block_length;
2987
2988 /* Read some more data in case this is a DELTA filter. Other filter
2989 * types don't require any additional data over what was already
2990 * read. */
2991 if(filter_type == FILTER_DELTA) {
2992 int channels;
2993
2994 if(ARCHIVE_OK != (ret = read_consume_bits(ar, rar, p, 5, &channels)))
2995 return ret;
2996
2997 filt->channels = channels + 1;
2998 }
2999
3000 return ARCHIVE_OK;
3001 }
3002
decode_code_length(struct archive_read * a,struct rar5 * rar,const uint8_t * p,uint16_t code)3003 static int decode_code_length(struct archive_read* a, struct rar5* rar,
3004 const uint8_t* p, uint16_t code)
3005 {
3006 int lbits, length = 2;
3007
3008 if(code < 8) {
3009 lbits = 0;
3010 length += code;
3011 } else {
3012 lbits = code / 4 - 1;
3013 length += (4 | (code & 3)) << lbits;
3014 }
3015
3016 if(lbits > 0) {
3017 int add;
3018
3019 if(ARCHIVE_OK != read_consume_bits(a, rar, p, lbits, &add))
3020 return -1;
3021
3022 length += add;
3023 }
3024
3025 return length;
3026 }
3027
copy_string(struct archive_read * a,int len,int dist)3028 static int copy_string(struct archive_read* a, int len, int dist) {
3029 struct rar5* rar = get_context(a);
3030 const ssize_t cmask = rar->cstate.window_mask;
3031 const uint64_t write_ptr = rar->cstate.write_ptr +
3032 rar->cstate.solid_offset;
3033 int i;
3034
3035 if (rar->cstate.window_buf == NULL)
3036 return ARCHIVE_FATAL;
3037
3038 /* The unpacker spends most of the time in this function. It would be
3039 * a good idea to introduce some optimizations here.
3040 *
3041 * Just remember that this loop treats buffers that overlap differently
3042 * than buffers that do not overlap. This is why a simple memcpy(3)
3043 * call will not be enough. */
3044
3045 for(i = 0; i < len; i++) {
3046 const ssize_t write_idx = (write_ptr + i) & cmask;
3047 const ssize_t read_idx = (write_ptr + i - dist) & cmask;
3048 rar->cstate.window_buf[write_idx] =
3049 rar->cstate.window_buf[read_idx];
3050 }
3051
3052 rar->cstate.write_ptr += len;
3053 return ARCHIVE_OK;
3054 }
3055
do_uncompress_block(struct archive_read * a,const uint8_t * p)3056 static int do_uncompress_block(struct archive_read* a, const uint8_t* p) {
3057 struct rar5* rar = get_context(a);
3058 uint16_t num;
3059 int ret;
3060
3061 const uint64_t cmask = rar->cstate.window_mask;
3062 const struct compressed_block_header* hdr = &rar->last_block_hdr;
3063 const uint8_t bit_size = 1 + bf_bit_size(hdr);
3064
3065 while(1) {
3066 if(rar->cstate.write_ptr - rar->cstate.last_write_ptr >
3067 (rar->cstate.window_size >> 1)) {
3068 /* Don't allow growing data by more than half of the
3069 * window size at a time. In such case, break the loop;
3070 * next call to this function will continue processing
3071 * from this moment. */
3072 break;
3073 }
3074
3075 if(rar->bits.in_addr > rar->cstate.cur_block_size - 1 ||
3076 (rar->bits.in_addr == rar->cstate.cur_block_size - 1 &&
3077 rar->bits.bit_addr >= bit_size))
3078 {
3079 /* If the program counter is here, it means the
3080 * function has finished processing the block. */
3081 rar->cstate.block_parsing_finished = 1;
3082 break;
3083 }
3084
3085 /* Decode the next literal. */
3086 if(ARCHIVE_OK != decode_number(a, &rar->cstate.ld, p, &num)) {
3087 return ARCHIVE_EOF;
3088 }
3089
3090 /* Num holds a decompression literal, or 'command code'.
3091 *
3092 * - Values lower than 256 are just bytes. Those codes
3093 * can be stored in the output buffer directly.
3094 *
3095 * - Code 256 defines a new filter, which is later used to
3096 * ransform the data block accordingly to the filter type.
3097 * The data block needs to be fully uncompressed first.
3098 *
3099 * - Code bigger than 257 and smaller than 262 define
3100 * a repetition pattern that should be copied from
3101 * an already uncompressed chunk of data.
3102 */
3103
3104 if(num < 256) {
3105 /* Directly store the byte. */
3106 int64_t write_idx = rar->cstate.solid_offset +
3107 rar->cstate.write_ptr++;
3108
3109 rar->cstate.window_buf[write_idx & cmask] =
3110 (uint8_t) num;
3111 continue;
3112 } else if(num >= 262) {
3113 uint16_t dist_slot;
3114 int len = decode_code_length(a, rar, p, num - 262),
3115 dbits,
3116 dist = 1;
3117
3118 if(len == -1) {
3119 archive_set_error(&a->archive,
3120 ARCHIVE_ERRNO_PROGRAMMER,
3121 "Failed to decode the code length");
3122
3123 return ARCHIVE_FATAL;
3124 }
3125
3126 if(ARCHIVE_OK != decode_number(a, &rar->cstate.dd, p,
3127 &dist_slot))
3128 {
3129 archive_set_error(&a->archive,
3130 ARCHIVE_ERRNO_PROGRAMMER,
3131 "Failed to decode the distance slot");
3132
3133 return ARCHIVE_FATAL;
3134 }
3135
3136 if(dist_slot < 4) {
3137 dbits = 0;
3138 dist += dist_slot;
3139 } else {
3140 dbits = dist_slot / 2 - 1;
3141
3142 /* Cast to uint32_t will make sure the shift
3143 * left operation won't produce undefined
3144 * result. Then, the uint32_t type will
3145 * be implicitly casted to int. */
3146 dist += (uint32_t) (2 |
3147 (dist_slot & 1)) << dbits;
3148 }
3149
3150 if(dbits > 0) {
3151 if(dbits >= 4) {
3152 uint32_t add = 0;
3153 uint16_t low_dist;
3154
3155 if(dbits > 4) {
3156 if(ARCHIVE_OK != (ret = read_bits_32(
3157 a, rar, p, &add))) {
3158 /* Return EOF if we
3159 * can't read more
3160 * data. */
3161 return ret;
3162 }
3163
3164 skip_bits(rar, dbits - 4);
3165 add = (add >> (
3166 36 - dbits)) << 4;
3167 dist += add;
3168 }
3169
3170 if(ARCHIVE_OK != decode_number(a,
3171 &rar->cstate.ldd, p, &low_dist))
3172 {
3173 archive_set_error(&a->archive,
3174 ARCHIVE_ERRNO_PROGRAMMER,
3175 "Failed to decode the "
3176 "distance slot");
3177
3178 return ARCHIVE_FATAL;
3179 }
3180
3181 if(dist >= INT_MAX - low_dist - 1) {
3182 /* This only happens in
3183 * invalid archives. */
3184 archive_set_error(&a->archive,
3185 ARCHIVE_ERRNO_FILE_FORMAT,
3186 "Distance pointer "
3187 "overflow");
3188 return ARCHIVE_FATAL;
3189 }
3190
3191 dist += low_dist;
3192 } else {
3193 /* dbits is one of [0,1,2,3] */
3194 int add;
3195
3196 if(ARCHIVE_OK != (ret = read_consume_bits(a, rar,
3197 p, dbits, &add))) {
3198 /* Return EOF if we can't read
3199 * more data. */
3200 return ret;
3201 }
3202
3203 dist += add;
3204 }
3205 }
3206
3207 if(dist > 0x100) {
3208 len++;
3209
3210 if(dist > 0x2000) {
3211 len++;
3212
3213 if(dist > 0x40000) {
3214 len++;
3215 }
3216 }
3217 }
3218
3219 dist_cache_push(rar, dist);
3220 rar->cstate.last_len = len;
3221
3222 if(ARCHIVE_OK != copy_string(a, len, dist))
3223 return ARCHIVE_FATAL;
3224
3225 continue;
3226 } else if(num == 256) {
3227 /* Create a filter. */
3228 ret = parse_filter(a, p);
3229 if(ret != ARCHIVE_OK)
3230 return ret;
3231
3232 continue;
3233 } else if(num == 257) {
3234 if(rar->cstate.last_len != 0) {
3235 if(ARCHIVE_OK != copy_string(a,
3236 rar->cstate.last_len,
3237 rar->cstate.dist_cache[0]))
3238 {
3239 return ARCHIVE_FATAL;
3240 }
3241 }
3242
3243 continue;
3244 } else {
3245 /* num < 262 */
3246 const int idx = num - 258;
3247 const int dist = dist_cache_touch(rar, idx);
3248
3249 uint16_t len_slot;
3250 int len;
3251
3252 if(ARCHIVE_OK != decode_number(a, &rar->cstate.rd, p,
3253 &len_slot)) {
3254 return ARCHIVE_FATAL;
3255 }
3256
3257 len = decode_code_length(a, rar, p, len_slot);
3258 if (len == -1) {
3259 return ARCHIVE_FATAL;
3260 }
3261
3262 rar->cstate.last_len = len;
3263
3264 if(ARCHIVE_OK != copy_string(a, len, dist))
3265 return ARCHIVE_FATAL;
3266
3267 continue;
3268 }
3269 }
3270
3271 return ARCHIVE_OK;
3272 }
3273
3274 /* Binary search for the RARv5 signature. */
scan_for_signature(struct archive_read * a)3275 static int scan_for_signature(struct archive_read* a) {
3276 const uint8_t* p;
3277 const int chunk_size = 512;
3278 ssize_t i;
3279 char signature[sizeof(rar5_signature_xor)];
3280
3281 /* If we're here, it means we're on an 'unknown territory' data.
3282 * There's no indication what kind of data we're reading here.
3283 * It could be some text comment, any kind of binary data,
3284 * digital sign, dragons, etc.
3285 *
3286 * We want to find a valid RARv5 magic header inside this unknown
3287 * data. */
3288
3289 /* Is it possible in libarchive to just skip everything until the
3290 * end of the file? If so, it would be a better approach than the
3291 * current implementation of this function. */
3292
3293 rar5_signature(signature);
3294
3295 while(1) {
3296 if(!read_ahead(a, chunk_size, &p))
3297 return ARCHIVE_EOF;
3298
3299 for(i = 0; i < chunk_size - (int)sizeof(rar5_signature_xor);
3300 i++) {
3301 if(memcmp(&p[i], signature,
3302 sizeof(rar5_signature_xor)) == 0) {
3303 /* Consume the number of bytes we've used to
3304 * search for the signature, as well as the
3305 * number of bytes used by the signature
3306 * itself. After this we should be standing
3307 * on a valid base block header. */
3308 (void) consume(a,
3309 i + sizeof(rar5_signature_xor));
3310 return ARCHIVE_OK;
3311 }
3312 }
3313
3314 consume(a, chunk_size);
3315 }
3316
3317 return ARCHIVE_FATAL;
3318 }
3319
3320 /* This function will switch the multivolume archive file to another file,
3321 * i.e. from part03 to part 04. */
advance_multivolume(struct archive_read * a)3322 static int advance_multivolume(struct archive_read* a) {
3323 int lret;
3324 struct rar5* rar = get_context(a);
3325
3326 /* A small state machine that will skip unnecessary data, needed to
3327 * switch from one multivolume to another. Such skipping is needed if
3328 * we want to be an stream-oriented (instead of file-oriented)
3329 * unpacker.
3330 *
3331 * The state machine starts with `rar->main.endarc` == 0. It also
3332 * assumes that current stream pointer points to some base block
3333 * header.
3334 *
3335 * The `endarc` field is being set when the base block parsing
3336 * function encounters the 'end of archive' marker.
3337 */
3338
3339 while(1) {
3340 if(rar->main.endarc == 1) {
3341 int looping = 1;
3342
3343 rar->main.endarc = 0;
3344
3345 while(looping) {
3346 lret = skip_base_block(a);
3347 switch(lret) {
3348 case ARCHIVE_RETRY:
3349 /* Continue looping. */
3350 break;
3351 case ARCHIVE_OK:
3352 /* Break loop. */
3353 looping = 0;
3354 break;
3355 default:
3356 /* Forward any errors to the
3357 * caller. */
3358 return lret;
3359 }
3360 }
3361
3362 break;
3363 } else {
3364 /* Skip current base block. In order to properly skip
3365 * it, we really need to simply parse it and discard
3366 * the results. */
3367
3368 lret = skip_base_block(a);
3369 if(lret == ARCHIVE_FATAL || lret == ARCHIVE_FAILED)
3370 return lret;
3371
3372 /* The `skip_base_block` function tells us if we
3373 * should continue with skipping, or we should stop
3374 * skipping. We're trying to skip everything up to
3375 * a base FILE block. */
3376
3377 if(lret != ARCHIVE_RETRY) {
3378 /* If there was an error during skipping, or we
3379 * have just skipped a FILE base block... */
3380
3381 if(rar->main.endarc == 0) {
3382 return lret;
3383 } else {
3384 continue;
3385 }
3386 }
3387 }
3388 }
3389
3390 return ARCHIVE_OK;
3391 }
3392
3393 /* Merges the partial block from the first multivolume archive file, and
3394 * partial block from the second multivolume archive file. The result is
3395 * a chunk of memory containing the whole block, and the stream pointer
3396 * is advanced to the next block in the second multivolume archive file. */
merge_block(struct archive_read * a,ssize_t block_size,const uint8_t ** p)3397 static int merge_block(struct archive_read* a, ssize_t block_size,
3398 const uint8_t** p)
3399 {
3400 struct rar5* rar = get_context(a);
3401 ssize_t cur_block_size, partial_offset = 0;
3402 const uint8_t* lp;
3403 int ret;
3404
3405 if(rar->merge_mode) {
3406 archive_set_error(&a->archive, ARCHIVE_ERRNO_PROGRAMMER,
3407 "Recursive merge is not allowed");
3408
3409 return ARCHIVE_FATAL;
3410 }
3411
3412 /* Set a flag that we're in the switching mode. */
3413 rar->cstate.switch_multivolume = 1;
3414
3415 /* Reallocate the memory which will hold the whole block. */
3416 if(rar->vol.push_buf)
3417 free((void*) rar->vol.push_buf);
3418
3419 /* Increasing the allocation block by 8 is due to bit reading functions,
3420 * which are using additional 2 or 4 bytes. Allocating the block size
3421 * by exact value would make bit reader perform reads from invalid
3422 * memory block when reading the last byte from the buffer. */
3423 rar->vol.push_buf = malloc(block_size + 8);
3424 if(!rar->vol.push_buf) {
3425 archive_set_error(&a->archive, ENOMEM,
3426 "Can't allocate memory for a merge block buffer.");
3427 return ARCHIVE_FATAL;
3428 }
3429
3430 /* Valgrind complains if the extension block for bit reader is not
3431 * initialized, so initialize it. */
3432 memset(&rar->vol.push_buf[block_size], 0, 8);
3433
3434 /* A single block can span across multiple multivolume archive files,
3435 * so we use a loop here. This loop will consume enough multivolume
3436 * archive files until the whole block is read. */
3437
3438 while(1) {
3439 /* Get the size of current block chunk in this multivolume
3440 * archive file and read it. */
3441 cur_block_size = rar5_min(rar->file.bytes_remaining,
3442 block_size - partial_offset);
3443
3444 if(cur_block_size == 0) {
3445 archive_set_error(&a->archive,
3446 ARCHIVE_ERRNO_FILE_FORMAT,
3447 "Encountered block size == 0 during block merge");
3448 return ARCHIVE_FATAL;
3449 }
3450
3451 if(!read_ahead(a, cur_block_size, &lp))
3452 return ARCHIVE_EOF;
3453
3454 /* Sanity check; there should never be a situation where this
3455 * function reads more data than the block's size. */
3456 if(partial_offset + cur_block_size > block_size) {
3457 archive_set_error(&a->archive,
3458 ARCHIVE_ERRNO_PROGRAMMER,
3459 "Consumed too much data when merging blocks.");
3460 return ARCHIVE_FATAL;
3461 }
3462
3463 /* Merge previous block chunk with current block chunk,
3464 * or create first block chunk if this is our first
3465 * iteration. */
3466 memcpy(&rar->vol.push_buf[partial_offset], lp, cur_block_size);
3467
3468 /* Advance the stream read pointer by this block chunk size. */
3469 if(ARCHIVE_OK != consume(a, cur_block_size))
3470 return ARCHIVE_EOF;
3471
3472 /* Update the pointers. `partial_offset` contains information
3473 * about the sum of merged block chunks. */
3474 partial_offset += cur_block_size;
3475 rar->file.bytes_remaining -= cur_block_size;
3476
3477 /* If `partial_offset` is the same as `block_size`, this means
3478 * we've merged all block chunks and we have a valid full
3479 * block. */
3480 if(partial_offset == block_size) {
3481 break;
3482 }
3483
3484 /* If we don't have any bytes to read, this means we should
3485 * switch to another multivolume archive file. */
3486 if(rar->file.bytes_remaining == 0) {
3487 rar->merge_mode++;
3488 ret = advance_multivolume(a);
3489 rar->merge_mode--;
3490 if(ret != ARCHIVE_OK) {
3491 return ret;
3492 }
3493 }
3494 }
3495
3496 *p = rar->vol.push_buf;
3497
3498 /* If we're here, we can resume unpacking by processing the block
3499 * pointed to by the `*p` memory pointer. */
3500
3501 return ARCHIVE_OK;
3502 }
3503
process_block(struct archive_read * a)3504 static int process_block(struct archive_read* a) {
3505 const uint8_t* p;
3506 struct rar5* rar = get_context(a);
3507 int ret;
3508
3509 /* If we don't have any data to be processed, this most probably means
3510 * we need to switch to the next volume. */
3511 if(rar->main.volume && rar->file.bytes_remaining == 0) {
3512 ret = advance_multivolume(a);
3513 if(ret != ARCHIVE_OK)
3514 return ret;
3515 }
3516
3517 if(rar->cstate.block_parsing_finished) {
3518 ssize_t block_size;
3519 ssize_t to_skip;
3520 ssize_t cur_block_size;
3521
3522 /* The header size won't be bigger than 6 bytes. */
3523 if(!read_ahead(a, 6, &p)) {
3524 /* Failed to prefetch data block header. */
3525 return ARCHIVE_EOF;
3526 }
3527
3528 /*
3529 * Read block_size by parsing block header. Validate the header
3530 * by calculating CRC byte stored inside the header. Size of
3531 * the header is not constant (block size can be stored either
3532 * in 1 or 2 bytes), that's why block size is left out from the
3533 * `compressed_block_header` structure and returned by
3534 * `parse_block_header` as the second argument. */
3535
3536 ret = parse_block_header(a, p, &block_size,
3537 &rar->last_block_hdr);
3538 if(ret != ARCHIVE_OK) {
3539 return ret;
3540 }
3541
3542 /* Skip block header. Next data is huffman tables,
3543 * if present. */
3544 to_skip = sizeof(struct compressed_block_header) +
3545 bf_byte_count(&rar->last_block_hdr) + 1;
3546
3547 if(ARCHIVE_OK != consume(a, to_skip))
3548 return ARCHIVE_EOF;
3549
3550 rar->file.bytes_remaining -= to_skip;
3551
3552 /* The block size gives information about the whole block size,
3553 * but the block could be stored in split form when using
3554 * multi-volume archives. In this case, the block size will be
3555 * bigger than the actual data stored in this file. Remaining
3556 * part of the data will be in another file. */
3557
3558 cur_block_size =
3559 rar5_min(rar->file.bytes_remaining, block_size);
3560
3561 if(block_size > rar->file.bytes_remaining) {
3562 /* If current blocks' size is bigger than our data
3563 * size, this means we have a multivolume archive.
3564 * In this case, skip all base headers until the end
3565 * of the file, proceed to next "partXXX.rar" volume,
3566 * find its signature, skip all headers up to the first
3567 * FILE base header, and continue from there.
3568 *
3569 * Note that `merge_block` will update the `rar`
3570 * context structure quite extensively. */
3571
3572 ret = merge_block(a, block_size, &p);
3573 if(ret != ARCHIVE_OK) {
3574 return ret;
3575 }
3576
3577 cur_block_size = block_size;
3578
3579 /* Current stream pointer should be now directly
3580 * *after* the block that spanned through multiple
3581 * archive files. `p` pointer should have the data of
3582 * the *whole* block (merged from partial blocks
3583 * stored in multiple archives files). */
3584 } else {
3585 rar->cstate.switch_multivolume = 0;
3586
3587 /* Read the whole block size into memory. This can take
3588 * up to 8 megabytes of memory in theoretical cases.
3589 * Might be worth to optimize this and use a standard
3590 * chunk of 4kb's. */
3591 if(!read_ahead(a, 4 + cur_block_size, &p)) {
3592 /* Failed to prefetch block data. */
3593 return ARCHIVE_EOF;
3594 }
3595 }
3596
3597 rar->cstate.block_buf = p;
3598 rar->cstate.cur_block_size = cur_block_size;
3599 rar->cstate.block_parsing_finished = 0;
3600
3601 rar->bits.in_addr = 0;
3602 rar->bits.bit_addr = 0;
3603
3604 if(bf_is_table_present(&rar->last_block_hdr)) {
3605 /* Load Huffman tables. */
3606 ret = parse_tables(a, rar, p);
3607 if(ret != ARCHIVE_OK) {
3608 /* Error during decompression of Huffman
3609 * tables. */
3610 return ret;
3611 }
3612 }
3613 } else {
3614 /* Block parsing not finished, reuse previous memory buffer. */
3615 p = rar->cstate.block_buf;
3616 }
3617
3618 /* Uncompress the block, or a part of it, depending on how many bytes
3619 * will be generated by uncompressing the block.
3620 *
3621 * In case too many bytes will be generated, calling this function
3622 * again will resume the uncompression operation. */
3623 ret = do_uncompress_block(a, p);
3624 if(ret != ARCHIVE_OK) {
3625 return ret;
3626 }
3627
3628 if(rar->cstate.block_parsing_finished &&
3629 rar->cstate.switch_multivolume == 0 &&
3630 rar->cstate.cur_block_size > 0)
3631 {
3632 /* If we're processing a normal block, consume the whole
3633 * block. We can do this because we've already read the whole
3634 * block to memory. */
3635 if(ARCHIVE_OK != consume(a, rar->cstate.cur_block_size))
3636 return ARCHIVE_FATAL;
3637
3638 rar->file.bytes_remaining -= rar->cstate.cur_block_size;
3639 } else if(rar->cstate.switch_multivolume) {
3640 /* Don't consume the block if we're doing multivolume
3641 * processing. The volume switching function will consume
3642 * the proper count of bytes instead. */
3643 rar->cstate.switch_multivolume = 0;
3644 }
3645
3646 return ARCHIVE_OK;
3647 }
3648
3649 /* Pops the `buf`, `size` and `offset` from the "data ready" stack.
3650 *
3651 * Returns ARCHIVE_OK when those arguments can be used, ARCHIVE_RETRY
3652 * when there is no data on the stack. */
use_data(struct rar5 * rar,const void ** buf,size_t * size,int64_t * offset)3653 static int use_data(struct rar5* rar, const void** buf, size_t* size,
3654 int64_t* offset)
3655 {
3656 int i;
3657
3658 for(i = 0; i < rar5_countof(rar->cstate.dready); i++) {
3659 struct data_ready *d = &rar->cstate.dready[i];
3660
3661 if(d->used) {
3662 if(buf) *buf = d->buf;
3663 if(size) *size = d->size;
3664 if(offset) *offset = d->offset;
3665
3666 d->used = 0;
3667 return ARCHIVE_OK;
3668 }
3669 }
3670
3671 return ARCHIVE_RETRY;
3672 }
3673
clear_data_ready_stack(struct rar5 * rar)3674 static void clear_data_ready_stack(struct rar5* rar) {
3675 memset(&rar->cstate.dready, 0, sizeof(rar->cstate.dready));
3676 }
3677
3678 /* Pushes the `buf`, `size` and `offset` arguments to the rar->cstate.dready
3679 * FIFO stack. Those values will be popped from this stack by the `use_data`
3680 * function. */
push_data_ready(struct archive_read * a,struct rar5 * rar,const uint8_t * buf,size_t size,int64_t offset)3681 static int push_data_ready(struct archive_read* a, struct rar5* rar,
3682 const uint8_t* buf, size_t size, int64_t offset)
3683 {
3684 int i;
3685
3686 /* Don't push if we're in skip mode. This is needed because solid
3687 * streams need full processing even if we're skipping data. After
3688 * fully processing the stream, we need to discard the generated bytes,
3689 * because we're interested only in the side effect: building up the
3690 * internal window circular buffer. This window buffer will be used
3691 * later during unpacking of requested data. */
3692 if(rar->skip_mode)
3693 return ARCHIVE_OK;
3694
3695 /* Sanity check. */
3696 if(offset != rar->file.last_offset + rar->file.last_size) {
3697 archive_set_error(&a->archive, ARCHIVE_ERRNO_PROGRAMMER,
3698 "Sanity check error: output stream is not continuous");
3699 return ARCHIVE_FATAL;
3700 }
3701
3702 for(i = 0; i < rar5_countof(rar->cstate.dready); i++) {
3703 struct data_ready* d = &rar->cstate.dready[i];
3704 if(!d->used) {
3705 d->used = 1;
3706 d->buf = buf;
3707 d->size = size;
3708 d->offset = offset;
3709
3710 /* These fields are used only in sanity checking. */
3711 rar->file.last_offset = offset;
3712 rar->file.last_size = size;
3713
3714 /* Calculate the checksum of this new block before
3715 * submitting data to libarchive's engine. */
3716 update_crc(rar, d->buf, d->size);
3717
3718 return ARCHIVE_OK;
3719 }
3720 }
3721
3722 /* Program counter will reach this code if the `rar->cstate.data_ready`
3723 * stack will be filled up so that no new entries will be allowed. The
3724 * code shouldn't allow such situation to occur. So we treat this case
3725 * as an internal error. */
3726
3727 archive_set_error(&a->archive, ARCHIVE_ERRNO_PROGRAMMER,
3728 "Error: premature end of data_ready stack");
3729 return ARCHIVE_FATAL;
3730 }
3731
3732 /* This function uncompresses the data that is stored in the <FILE> base
3733 * block.
3734 *
3735 * The FILE base block looks like this:
3736 *
3737 * <header><huffman tables><block_1><block_2>...<block_n>
3738 *
3739 * The <header> is a block header, that is parsed in parse_block_header().
3740 * It's a "compressed_block_header" structure, containing metadata needed
3741 * to know when we should stop looking for more <block_n> blocks.
3742 *
3743 * <huffman tables> contain data needed to set up the huffman tables, needed
3744 * for the actual decompression.
3745 *
3746 * Each <block_n> consists of series of literals:
3747 *
3748 * <literal><literal><literal>...<literal>
3749 *
3750 * Those literals generate the uncompression data. They operate on a circular
3751 * buffer, sometimes writing raw data into it, sometimes referencing
3752 * some previous data inside this buffer, and sometimes declaring a filter
3753 * that will need to be executed on the data stored in the circular buffer.
3754 * It all depends on the literal that is used.
3755 *
3756 * Sometimes blocks produce output data, sometimes they don't. For example, for
3757 * some huge files that use lots of filters, sometimes a block is filled with
3758 * only filter declaration literals. Such blocks won't produce any data in the
3759 * circular buffer.
3760 *
3761 * Sometimes blocks will produce 4 bytes of data, and sometimes 1 megabyte,
3762 * because a literal can reference previously decompressed data. For example,
3763 * there can be a literal that says: 'append a byte 0xFE here', and after
3764 * it another literal can say 'append 1 megabyte of data from circular buffer
3765 * offset 0x12345'. This is how RAR format handles compressing repeated
3766 * patterns.
3767 *
3768 * The RAR compressor creates those literals and the actual efficiency of
3769 * compression depends on what those literals are. The literals can also
3770 * be seen as a kind of a non-turing-complete virtual machine that simply
3771 * tells the decompressor what it should do.
3772 * */
3773
do_uncompress_file(struct archive_read * a)3774 static int do_uncompress_file(struct archive_read* a) {
3775 struct rar5* rar = get_context(a);
3776 int ret;
3777 int64_t max_end_pos;
3778
3779 if(!rar->cstate.initialized) {
3780 /* Don't perform full context reinitialization if we're
3781 * processing a solid archive. */
3782 if(!rar->main.solid || !rar->cstate.window_buf) {
3783 init_unpack(rar);
3784 }
3785
3786 rar->cstate.initialized = 1;
3787 }
3788
3789 /* Don't allow extraction if window_size is invalid. */
3790 if(rar->cstate.window_size == 0) {
3791 archive_set_error(&a->archive,
3792 ARCHIVE_ERRNO_FILE_FORMAT,
3793 "Invalid window size declaration in this file");
3794
3795 /* This should never happen in valid files. */
3796 return ARCHIVE_FATAL;
3797 }
3798
3799 if(rar->cstate.all_filters_applied == 1) {
3800 /* We use while(1) here, but standard case allows for just 1
3801 * iteration. The loop will iterate if process_block() didn't
3802 * generate any data at all. This can happen if the block
3803 * contains only filter definitions (this is common in big
3804 * files). */
3805 while(1) {
3806 ret = process_block(a);
3807 if(ret == ARCHIVE_EOF || ret == ARCHIVE_FATAL)
3808 return ret;
3809
3810 if(rar->cstate.last_write_ptr ==
3811 rar->cstate.write_ptr) {
3812 /* The block didn't generate any new data,
3813 * so just process a new block if this one
3814 * wasn't the last block in the file. */
3815 if (bf_is_last_block(&rar->last_block_hdr)) {
3816 return ARCHIVE_EOF;
3817 }
3818
3819 continue;
3820 }
3821
3822 /* The block has generated some new data, so break
3823 * the loop. */
3824 break;
3825 }
3826 }
3827
3828 /* Try to run filters. If filters won't be applied, it means that
3829 * insufficient data was generated. */
3830 ret = apply_filters(a);
3831 if(ret == ARCHIVE_RETRY) {
3832 return ARCHIVE_OK;
3833 } else if(ret == ARCHIVE_FATAL) {
3834 return ARCHIVE_FATAL;
3835 }
3836
3837 /* If apply_filters() will return ARCHIVE_OK, we can continue here. */
3838
3839 if(cdeque_size(&rar->cstate.filters) > 0) {
3840 /* Check if we can write something before hitting first
3841 * filter. */
3842 struct filter_info* flt;
3843
3844 /* Get the block_start offset from the first filter. */
3845 if(CDE_OK != cdeque_front(&rar->cstate.filters,
3846 cdeque_filter_p(&flt)))
3847 {
3848 archive_set_error(&a->archive,
3849 ARCHIVE_ERRNO_PROGRAMMER,
3850 "Can't read first filter");
3851 return ARCHIVE_FATAL;
3852 }
3853
3854 max_end_pos = rar5_min(flt->block_start,
3855 rar->cstate.write_ptr);
3856 } else {
3857 /* There are no filters defined, or all filters were applied.
3858 * This means we can just store the data without any
3859 * postprocessing. */
3860 max_end_pos = rar->cstate.write_ptr;
3861 }
3862
3863 if(max_end_pos == rar->cstate.last_write_ptr) {
3864 /* We can't write anything yet. The block uncompression
3865 * function did not generate enough data, and no filter can be
3866 * applied. At the same time we don't have any data that can be
3867 * stored without filter postprocessing. This means we need to
3868 * wait for more data to be generated, so we can apply the
3869 * filters.
3870 *
3871 * Signal the caller that we need more data to be able to do
3872 * anything.
3873 */
3874 return ARCHIVE_RETRY;
3875 } else {
3876 /* We can write the data before hitting the first filter.
3877 * So let's do it. The push_window_data() function will
3878 * effectively return the selected data block to the user
3879 * application. */
3880 push_window_data(a, rar, rar->cstate.last_write_ptr,
3881 max_end_pos);
3882 rar->cstate.last_write_ptr = max_end_pos;
3883 }
3884
3885 return ARCHIVE_OK;
3886 }
3887
uncompress_file(struct archive_read * a)3888 static int uncompress_file(struct archive_read* a) {
3889 int ret;
3890
3891 while(1) {
3892 /* Sometimes the uncompression function will return a
3893 * 'retry' signal. If this will happen, we have to retry
3894 * the function. */
3895 ret = do_uncompress_file(a);
3896 if(ret != ARCHIVE_RETRY)
3897 return ret;
3898 }
3899 }
3900
3901
do_unstore_file(struct archive_read * a,struct rar5 * rar,const void ** buf,size_t * size,int64_t * offset)3902 static int do_unstore_file(struct archive_read* a,
3903 struct rar5* rar, const void** buf, size_t* size, int64_t* offset)
3904 {
3905 size_t to_read;
3906 const uint8_t* p;
3907
3908 if(rar->file.bytes_remaining == 0 && rar->main.volume > 0 &&
3909 rar->generic.split_after > 0)
3910 {
3911 int ret;
3912
3913 rar->cstate.switch_multivolume = 1;
3914 ret = advance_multivolume(a);
3915 rar->cstate.switch_multivolume = 0;
3916
3917 if(ret != ARCHIVE_OK) {
3918 /* Failed to advance to next multivolume archive
3919 * file. */
3920 return ret;
3921 }
3922 }
3923
3924 to_read = rar5_min(rar->file.bytes_remaining, 64 * 1024);
3925 if(to_read == 0) {
3926 return ARCHIVE_EOF;
3927 }
3928
3929 if(!read_ahead(a, to_read, &p)) {
3930 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
3931 "I/O error when unstoring file");
3932 return ARCHIVE_FATAL;
3933 }
3934
3935 if(ARCHIVE_OK != consume(a, to_read)) {
3936 return ARCHIVE_EOF;
3937 }
3938
3939 if(buf) *buf = p;
3940 if(size) *size = to_read;
3941 if(offset) *offset = rar->cstate.last_unstore_ptr;
3942
3943 rar->file.bytes_remaining -= to_read;
3944 rar->cstate.last_unstore_ptr += to_read;
3945
3946 update_crc(rar, p, to_read);
3947 return ARCHIVE_OK;
3948 }
3949
do_unpack(struct archive_read * a,struct rar5 * rar,const void ** buf,size_t * size,int64_t * offset)3950 static int do_unpack(struct archive_read* a, struct rar5* rar,
3951 const void** buf, size_t* size, int64_t* offset)
3952 {
3953 enum COMPRESSION_METHOD {
3954 STORE = 0, FASTEST = 1, FAST = 2, NORMAL = 3, GOOD = 4,
3955 BEST = 5
3956 };
3957
3958 if(rar->file.service > 0) {
3959 return do_unstore_file(a, rar, buf, size, offset);
3960 } else {
3961 switch(rar->cstate.method) {
3962 case STORE:
3963 return do_unstore_file(a, rar, buf, size,
3964 offset);
3965 case FASTEST:
3966 /* fallthrough */
3967 case FAST:
3968 /* fallthrough */
3969 case NORMAL:
3970 /* fallthrough */
3971 case GOOD:
3972 /* fallthrough */
3973 case BEST:
3974 /* No data is returned here. But because a sparse-file aware
3975 * caller (like archive_read_data_into_fd) may treat zero-size
3976 * as a sparse file block, we need to update the offset
3977 * accordingly. At this point the decoder doesn't have any
3978 * pending uncompressed data blocks, so the current position in
3979 * the output file should be last_write_ptr. */
3980 if (offset) *offset = rar->cstate.last_write_ptr;
3981 return uncompress_file(a);
3982 default:
3983 archive_set_error(&a->archive,
3984 ARCHIVE_ERRNO_FILE_FORMAT,
3985 "Compression method not supported: 0x%x",
3986 rar->cstate.method);
3987
3988 return ARCHIVE_FATAL;
3989 }
3990 }
3991
3992 #if !defined WIN32
3993 /* Not reached. */
3994 return ARCHIVE_OK;
3995 #endif
3996 }
3997
verify_checksums(struct archive_read * a)3998 static int verify_checksums(struct archive_read* a) {
3999 int verify_crc;
4000 struct rar5* rar = get_context(a);
4001
4002 /* Check checksums only when actually unpacking the data. There's no
4003 * need to calculate checksum when we're skipping data in solid archives
4004 * (skipping in solid archives is the same thing as unpacking compressed
4005 * data and discarding the result). */
4006
4007 if(!rar->skip_mode) {
4008 /* Always check checksums if we're not in skip mode */
4009 verify_crc = 1;
4010 } else {
4011 /* We can override the logic above with a compile-time option
4012 * NO_CRC_ON_SOLID_SKIP. This option is used during debugging,
4013 * and it will check checksums of unpacked data even when
4014 * we're skipping it. */
4015
4016 #if defined CHECK_CRC_ON_SOLID_SKIP
4017 /* Debug case */
4018 verify_crc = 1;
4019 #else
4020 /* Normal case */
4021 verify_crc = 0;
4022 #endif
4023 }
4024
4025 if(verify_crc) {
4026 /* During unpacking, on each unpacked block we're calling the
4027 * update_crc() function. Since we are here, the unpacking
4028 * process is already over and we can check if calculated
4029 * checksum (CRC32 or BLAKE2sp) is the same as what is stored
4030 * in the archive. */
4031 if(rar->file.stored_crc32 > 0) {
4032 /* Check CRC32 only when the file contains a CRC32
4033 * value for this file. */
4034
4035 if(rar->file.calculated_crc32 !=
4036 rar->file.stored_crc32) {
4037 /* Checksums do not match; the unpacked file
4038 * is corrupted. */
4039
4040 DEBUG_CODE {
4041 printf("Checksum error: CRC32 "
4042 "(was: %08" PRIx32 ", expected: %08" PRIx32 ")\n",
4043 rar->file.calculated_crc32,
4044 rar->file.stored_crc32);
4045 }
4046
4047 #ifndef DONT_FAIL_ON_CRC_ERROR
4048 archive_set_error(&a->archive,
4049 ARCHIVE_ERRNO_FILE_FORMAT,
4050 "Checksum error: CRC32");
4051 return ARCHIVE_FATAL;
4052 #endif
4053 } else {
4054 DEBUG_CODE {
4055 printf("Checksum OK: CRC32 "
4056 "(%08" PRIx32 "/%08" PRIx32 ")\n",
4057 rar->file.stored_crc32,
4058 rar->file.calculated_crc32);
4059 }
4060 }
4061 }
4062
4063 if(rar->file.has_blake2 > 0) {
4064 /* BLAKE2sp is an optional checksum algorithm that is
4065 * added to RARv5 archives when using the `-htb` switch
4066 * during creation of archive.
4067 *
4068 * We now finalize the hash calculation by calling the
4069 * `final` function. This will generate the final hash
4070 * value we can use to compare it with the BLAKE2sp
4071 * checksum that is stored in the archive.
4072 *
4073 * The return value of this `final` function is not
4074 * very helpful, as it guards only against improper use.
4075 * This is why we're explicitly ignoring it. */
4076
4077 uint8_t b2_buf[32];
4078 (void) blake2sp_final(&rar->file.b2state, b2_buf, 32);
4079
4080 if(memcmp(&rar->file.blake2sp, b2_buf, 32) != 0) {
4081 #ifndef DONT_FAIL_ON_CRC_ERROR
4082 archive_set_error(&a->archive,
4083 ARCHIVE_ERRNO_FILE_FORMAT,
4084 "Checksum error: BLAKE2");
4085
4086 return ARCHIVE_FATAL;
4087 #endif
4088 }
4089 }
4090 }
4091
4092 /* Finalization for this file has been successfully completed. */
4093 return ARCHIVE_OK;
4094 }
4095
verify_global_checksums(struct archive_read * a)4096 static int verify_global_checksums(struct archive_read* a) {
4097 return verify_checksums(a);
4098 }
4099
4100 /*
4101 * Decryption function for the magic signature pattern. Check the comment near
4102 * the `rar5_signature_xor` symbol to read the rationale behind this.
4103 */
rar5_signature(char * buf)4104 static void rar5_signature(char *buf) {
4105 size_t i;
4106
4107 for(i = 0; i < sizeof(rar5_signature_xor); i++) {
4108 buf[i] = rar5_signature_xor[i] ^ 0xA1;
4109 }
4110 }
4111
rar5_read_data(struct archive_read * a,const void ** buff,size_t * size,int64_t * offset)4112 static int rar5_read_data(struct archive_read *a, const void **buff,
4113 size_t *size, int64_t *offset) {
4114 int ret;
4115 struct rar5* rar = get_context(a);
4116
4117 if (size)
4118 *size = 0;
4119
4120 if (rar->has_encrypted_entries == ARCHIVE_READ_FORMAT_ENCRYPTION_DONT_KNOW) {
4121 rar->has_encrypted_entries = 0;
4122 }
4123
4124 if (rar->headers_are_encrypted || rar->cstate.data_encrypted) {
4125 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
4126 "Reading encrypted data is not currently supported");
4127 return ARCHIVE_FATAL;
4128 }
4129
4130 if(rar->file.dir > 0) {
4131 /* Don't process any data if this file entry was declared
4132 * as a directory. This is needed, because entries marked as
4133 * directory doesn't have any dictionary buffer allocated, so
4134 * it's impossible to perform any decompression. */
4135 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
4136 "Can't decompress an entry marked as a directory");
4137 return ARCHIVE_FAILED;
4138 }
4139
4140 if(!rar->skip_mode && (rar->cstate.last_write_ptr > rar->file.unpacked_size)) {
4141 archive_set_error(&a->archive, ARCHIVE_ERRNO_PROGRAMMER,
4142 "Unpacker has written too many bytes");
4143 return ARCHIVE_FATAL;
4144 }
4145
4146 ret = use_data(rar, buff, size, offset);
4147 if(ret == ARCHIVE_OK) {
4148 return ret;
4149 }
4150
4151 if(rar->file.eof == 1) {
4152 return ARCHIVE_EOF;
4153 }
4154
4155 ret = do_unpack(a, rar, buff, size, offset);
4156 if(ret != ARCHIVE_OK) {
4157 return ret;
4158 }
4159
4160 if(rar->file.bytes_remaining == 0 &&
4161 rar->cstate.last_write_ptr == rar->file.unpacked_size)
4162 {
4163 /* If all bytes of current file were processed, run
4164 * finalization.
4165 *
4166 * Finalization will check checksum against proper values. If
4167 * some of the checksums will not match, we'll return an error
4168 * value in the last `archive_read_data` call to signal an error
4169 * to the user. */
4170
4171 rar->file.eof = 1;
4172 return verify_global_checksums(a);
4173 }
4174
4175 return ARCHIVE_OK;
4176 }
4177
rar5_read_data_skip(struct archive_read * a)4178 static int rar5_read_data_skip(struct archive_read *a) {
4179 struct rar5* rar = get_context(a);
4180
4181 if(rar->main.solid && (rar->cstate.data_encrypted == 0)) {
4182 /* In solid archives, instead of skipping the data, we need to
4183 * extract it, and dispose the result. The side effect of this
4184 * operation will be setting up the initial window buffer state
4185 * needed to be able to extract the selected file. Note that
4186 * this is only possible when data withing this solid block is
4187 * not encrypted, in which case we'll skip and fail if the user
4188 * tries to read data. */
4189
4190 int ret;
4191
4192 /* Make sure to process all blocks in the compressed stream. */
4193 while(rar->file.bytes_remaining > 0) {
4194 /* Setting the "skip mode" will allow us to skip
4195 * checksum checks during data skipping. Checking the
4196 * checksum of skipped data isn't really necessary and
4197 * it's only slowing things down.
4198 *
4199 * This is incremented instead of setting to 1 because
4200 * this data skipping function can be called
4201 * recursively. */
4202 rar->skip_mode++;
4203
4204 /* We're disposing 1 block of data, so we use triple
4205 * NULLs in arguments. */
4206 ret = rar5_read_data(a, NULL, NULL, NULL);
4207
4208 /* Turn off "skip mode". */
4209 rar->skip_mode--;
4210
4211 if(ret < 0 || ret == ARCHIVE_EOF) {
4212 /* Propagate any potential error conditions
4213 * to the caller. */
4214 return ret;
4215 }
4216 }
4217 } else {
4218 /* In standard archives, we can just jump over the compressed
4219 * stream. Each file in non-solid archives starts from an empty
4220 * window buffer. */
4221
4222 if(ARCHIVE_OK != consume(a, rar->file.bytes_remaining)) {
4223 return ARCHIVE_FATAL;
4224 }
4225
4226 rar->file.bytes_remaining = 0;
4227 }
4228
4229 return ARCHIVE_OK;
4230 }
4231
rar5_seek_data(struct archive_read * a,int64_t offset,int whence)4232 static int64_t rar5_seek_data(struct archive_read *a, int64_t offset,
4233 int whence)
4234 {
4235 (void) a;
4236 (void) offset;
4237 (void) whence;
4238
4239 /* We're a streaming unpacker, and we don't support seeking. */
4240
4241 return ARCHIVE_FATAL;
4242 }
4243
rar5_cleanup(struct archive_read * a)4244 static int rar5_cleanup(struct archive_read *a) {
4245 struct rar5* rar = get_context(a);
4246
4247 free(rar->cstate.window_buf);
4248 free(rar->cstate.filtered_buf);
4249 clear_data_ready_stack(rar);
4250
4251 free(rar->vol.push_buf);
4252
4253 free_filters(rar);
4254 cdeque_free(&rar->cstate.filters);
4255
4256 free(rar);
4257 a->format->data = NULL;
4258
4259 return ARCHIVE_OK;
4260 }
4261
rar5_capabilities(struct archive_read * a)4262 static int rar5_capabilities(struct archive_read * a) {
4263 (void) a;
4264 return (ARCHIVE_READ_FORMAT_CAPS_ENCRYPT_DATA
4265 | ARCHIVE_READ_FORMAT_CAPS_ENCRYPT_METADATA);
4266 }
4267
rar5_has_encrypted_entries(struct archive_read * _a)4268 static int rar5_has_encrypted_entries(struct archive_read *_a) {
4269 if (_a && _a->format) {
4270 struct rar5 *rar = (struct rar5 *)_a->format->data;
4271 if (rar) {
4272 return rar->has_encrypted_entries;
4273 }
4274 }
4275
4276 return ARCHIVE_READ_FORMAT_ENCRYPTION_DONT_KNOW;
4277 }
4278
rar5_init(struct rar5 * rar)4279 static int rar5_init(struct rar5* rar) {
4280 memset(rar, 0, sizeof(struct rar5));
4281
4282 if(CDE_OK != cdeque_init(&rar->cstate.filters, 8192))
4283 return ARCHIVE_FATAL;
4284
4285 /*
4286 * Until enough data has been read, we cannot tell about
4287 * any encrypted entries yet.
4288 */
4289 rar->has_encrypted_entries = ARCHIVE_READ_FORMAT_ENCRYPTION_DONT_KNOW;
4290
4291 return ARCHIVE_OK;
4292 }
4293
archive_read_support_format_rar5(struct archive * _a)4294 int archive_read_support_format_rar5(struct archive *_a) {
4295 struct archive_read* ar;
4296 int ret;
4297 struct rar5* rar;
4298
4299 if(ARCHIVE_OK != (ret = get_archive_read(_a, &ar)))
4300 return ret;
4301
4302 rar = malloc(sizeof(*rar));
4303 if(rar == NULL) {
4304 archive_set_error(&ar->archive, ENOMEM,
4305 "Can't allocate rar5 data");
4306 return ARCHIVE_FATAL;
4307 }
4308
4309 if(ARCHIVE_OK != rar5_init(rar)) {
4310 archive_set_error(&ar->archive, ENOMEM,
4311 "Can't allocate rar5 filter buffer");
4312 free(rar);
4313 return ARCHIVE_FATAL;
4314 }
4315
4316 ret = __archive_read_register_format(ar,
4317 rar,
4318 "rar5",
4319 rar5_bid,
4320 rar5_options,
4321 rar5_read_header,
4322 rar5_read_data,
4323 rar5_read_data_skip,
4324 rar5_seek_data,
4325 rar5_cleanup,
4326 rar5_capabilities,
4327 rar5_has_encrypted_entries);
4328
4329 if(ret != ARCHIVE_OK) {
4330 (void) rar5_cleanup(ar);
4331 }
4332
4333 return ret;
4334 }
4335