1 // SPDX-License-Identifier: 0BSD
2
3 /*
4 * Branch/Call/Jump (BCJ) filter decoders
5 *
6 * Authors: Lasse Collin <lasse.collin@tukaani.org>
7 * Igor Pavlov <https://7-zip.org/>
8 */
9
10 #include "xz_private.h"
11
12 /*
13 * The rest of the file is inside this ifdef. It makes things a little more
14 * convenient when building without support for any BCJ filters.
15 */
16 #ifdef XZ_DEC_BCJ
17
18 struct xz_dec_bcj {
19 /* Type of the BCJ filter being used */
20 enum {
21 BCJ_X86 = 4, /* x86 or x86-64 */
22 BCJ_POWERPC = 5, /* Big endian only */
23 BCJ_ARM = 7, /* Little endian only */
24 BCJ_ARMTHUMB = 8, /* Little endian only */
25 BCJ_SPARC = 9, /* Big or little endian */
26 BCJ_ARM64 = 10, /* AArch64 */
27 BCJ_RISCV = 11 /* RV32GQC_Zfh, RV64GQC_Zfh */
28 } type;
29
30 /*
31 * Return value of the next filter in the chain. We need to preserve
32 * this information across calls, because we must not call the next
33 * filter anymore once it has returned XZ_STREAM_END.
34 */
35 enum xz_ret ret;
36
37 /* True if we are operating in single-call mode. */
38 bool single_call;
39
40 /*
41 * Absolute position relative to the beginning of the uncompressed
42 * data (in a single .xz Block). We care only about the lowest 32
43 * bits so this doesn't need to be uint64_t even with big files.
44 */
45 uint32_t pos;
46
47 /* x86 filter state */
48 uint32_t x86_prev_mask;
49
50 /* Temporary space to hold the variables from struct xz_buf */
51 uint8_t *out;
52 size_t out_pos;
53 size_t out_size;
54
55 struct {
56 /* Amount of already filtered data in the beginning of buf */
57 size_t filtered;
58
59 /* Total amount of data currently stored in buf */
60 size_t size;
61
62 /*
63 * Buffer to hold a mix of filtered and unfiltered data. This
64 * needs to be big enough to hold Alignment + 2 * Look-ahead:
65 *
66 * Type Alignment Look-ahead
67 * x86 1 4
68 * PowerPC 4 0
69 * IA-64 16 0
70 * ARM 4 0
71 * ARM-Thumb 2 2
72 * SPARC 4 0
73 */
74 uint8_t buf[16];
75 } temp;
76 };
77
78 #ifdef XZ_DEC_X86
79 /*
80 * This is used to test the most significant byte of a memory address
81 * in an x86 instruction.
82 */
bcj_x86_test_msbyte(uint8_t b)83 static inline int bcj_x86_test_msbyte(uint8_t b)
84 {
85 return b == 0x00 || b == 0xFF;
86 }
87
bcj_x86(struct xz_dec_bcj * s,uint8_t * buf,size_t size)88 static size_t bcj_x86(struct xz_dec_bcj *s, uint8_t *buf, size_t size)
89 {
90 static const bool mask_to_allowed_status[8]
91 = { true, true, true, false, true, false, false, false };
92
93 static const uint8_t mask_to_bit_num[8] = { 0, 1, 2, 2, 3, 3, 3, 3 };
94
95 size_t i;
96 size_t prev_pos = (size_t)-1;
97 uint32_t prev_mask = s->x86_prev_mask;
98 uint32_t src;
99 uint32_t dest;
100 uint32_t j;
101 uint8_t b;
102
103 if (size <= 4)
104 return 0;
105
106 size -= 4;
107 for (i = 0; i < size; ++i) {
108 if ((buf[i] & 0xFE) != 0xE8)
109 continue;
110
111 prev_pos = i - prev_pos;
112 if (prev_pos > 3) {
113 prev_mask = 0;
114 } else {
115 prev_mask = (prev_mask << (prev_pos - 1)) & 7;
116 if (prev_mask != 0) {
117 b = buf[i + 4 - mask_to_bit_num[prev_mask]];
118 if (!mask_to_allowed_status[prev_mask]
119 || bcj_x86_test_msbyte(b)) {
120 prev_pos = i;
121 prev_mask = (prev_mask << 1) | 1;
122 continue;
123 }
124 }
125 }
126
127 prev_pos = i;
128
129 if (bcj_x86_test_msbyte(buf[i + 4])) {
130 src = get_unaligned_le32(buf + i + 1);
131 while (true) {
132 dest = src - (s->pos + (uint32_t)i + 5);
133 if (prev_mask == 0)
134 break;
135
136 j = mask_to_bit_num[prev_mask] * 8;
137 b = (uint8_t)(dest >> (24 - j));
138 if (!bcj_x86_test_msbyte(b))
139 break;
140
141 src = dest ^ (((uint32_t)1 << (32 - j)) - 1);
142 }
143
144 dest &= 0x01FFFFFF;
145 dest |= (uint32_t)0 - (dest & 0x01000000);
146 put_unaligned_le32(dest, buf + i + 1);
147 i += 4;
148 } else {
149 prev_mask = (prev_mask << 1) | 1;
150 }
151 }
152
153 prev_pos = i - prev_pos;
154 s->x86_prev_mask = prev_pos > 3 ? 0 : prev_mask << (prev_pos - 1);
155 return i;
156 }
157 #endif
158
159 #ifdef XZ_DEC_POWERPC
bcj_powerpc(struct xz_dec_bcj * s,uint8_t * buf,size_t size)160 static size_t bcj_powerpc(struct xz_dec_bcj *s, uint8_t *buf, size_t size)
161 {
162 size_t i;
163 uint32_t instr;
164
165 size &= ~(size_t)3;
166
167 for (i = 0; i < size; i += 4) {
168 instr = get_unaligned_be32(buf + i);
169 if ((instr & 0xFC000003) == 0x48000001) {
170 instr &= 0x03FFFFFC;
171 instr -= s->pos + (uint32_t)i;
172 instr &= 0x03FFFFFC;
173 instr |= 0x48000001;
174 put_unaligned_be32(instr, buf + i);
175 }
176 }
177
178 return i;
179 }
180 #endif
181
182 #ifdef XZ_DEC_ARM
bcj_arm(struct xz_dec_bcj * s,uint8_t * buf,size_t size)183 static size_t bcj_arm(struct xz_dec_bcj *s, uint8_t *buf, size_t size)
184 {
185 size_t i;
186 uint32_t addr;
187
188 size &= ~(size_t)3;
189
190 for (i = 0; i < size; i += 4) {
191 if (buf[i + 3] == 0xEB) {
192 addr = (uint32_t)buf[i] | ((uint32_t)buf[i + 1] << 8)
193 | ((uint32_t)buf[i + 2] << 16);
194 addr <<= 2;
195 addr -= s->pos + (uint32_t)i + 8;
196 addr >>= 2;
197 buf[i] = (uint8_t)addr;
198 buf[i + 1] = (uint8_t)(addr >> 8);
199 buf[i + 2] = (uint8_t)(addr >> 16);
200 }
201 }
202
203 return i;
204 }
205 #endif
206
207 #ifdef XZ_DEC_ARMTHUMB
bcj_armthumb(struct xz_dec_bcj * s,uint8_t * buf,size_t size)208 static size_t bcj_armthumb(struct xz_dec_bcj *s, uint8_t *buf, size_t size)
209 {
210 size_t i;
211 uint32_t addr;
212
213 if (size < 4)
214 return 0;
215
216 size -= 4;
217
218 for (i = 0; i <= size; i += 2) {
219 if ((buf[i + 1] & 0xF8) == 0xF0
220 && (buf[i + 3] & 0xF8) == 0xF8) {
221 addr = (((uint32_t)buf[i + 1] & 0x07) << 19)
222 | ((uint32_t)buf[i] << 11)
223 | (((uint32_t)buf[i + 3] & 0x07) << 8)
224 | (uint32_t)buf[i + 2];
225 addr <<= 1;
226 addr -= s->pos + (uint32_t)i + 4;
227 addr >>= 1;
228 buf[i + 1] = (uint8_t)(0xF0 | ((addr >> 19) & 0x07));
229 buf[i] = (uint8_t)(addr >> 11);
230 buf[i + 3] = (uint8_t)(0xF8 | ((addr >> 8) & 0x07));
231 buf[i + 2] = (uint8_t)addr;
232 i += 2;
233 }
234 }
235
236 return i;
237 }
238 #endif
239
240 #ifdef XZ_DEC_SPARC
bcj_sparc(struct xz_dec_bcj * s,uint8_t * buf,size_t size)241 static size_t bcj_sparc(struct xz_dec_bcj *s, uint8_t *buf, size_t size)
242 {
243 size_t i;
244 uint32_t instr;
245
246 size &= ~(size_t)3;
247
248 for (i = 0; i < size; i += 4) {
249 instr = get_unaligned_be32(buf + i);
250 if ((instr >> 22) == 0x100 || (instr >> 22) == 0x1FF) {
251 instr <<= 2;
252 instr -= s->pos + (uint32_t)i;
253 instr >>= 2;
254 instr = ((uint32_t)0x40000000 - (instr & 0x400000))
255 | 0x40000000 | (instr & 0x3FFFFF);
256 put_unaligned_be32(instr, buf + i);
257 }
258 }
259
260 return i;
261 }
262 #endif
263
264 #ifdef XZ_DEC_ARM64
bcj_arm64(struct xz_dec_bcj * s,uint8_t * buf,size_t size)265 static size_t bcj_arm64(struct xz_dec_bcj *s, uint8_t *buf, size_t size)
266 {
267 size_t i;
268 uint32_t instr;
269 uint32_t addr;
270
271 size &= ~(size_t)3;
272
273 for (i = 0; i < size; i += 4) {
274 instr = get_unaligned_le32(buf + i);
275
276 if ((instr >> 26) == 0x25) {
277 /* BL instruction */
278 addr = instr - ((s->pos + (uint32_t)i) >> 2);
279 instr = 0x94000000 | (addr & 0x03FFFFFF);
280 put_unaligned_le32(instr, buf + i);
281
282 } else if ((instr & 0x9F000000) == 0x90000000) {
283 /* ADRP instruction */
284 addr = ((instr >> 29) & 3) | ((instr >> 3) & 0x1FFFFC);
285
286 /* Only convert values in the range +/-512 MiB. */
287 if ((addr + 0x020000) & 0x1C0000)
288 continue;
289
290 addr -= (s->pos + (uint32_t)i) >> 12;
291
292 instr &= 0x9000001F;
293 instr |= (addr & 3) << 29;
294 instr |= (addr & 0x03FFFC) << 3;
295 instr |= (0U - (addr & 0x020000)) & 0xE00000;
296
297 put_unaligned_le32(instr, buf + i);
298 }
299 }
300
301 return i;
302 }
303 #endif
304
305 #ifdef XZ_DEC_RISCV
bcj_riscv(struct xz_dec_bcj * s,uint8_t * buf,size_t size)306 static size_t bcj_riscv(struct xz_dec_bcj *s, uint8_t *buf, size_t size)
307 {
308 size_t i;
309 uint32_t b1;
310 uint32_t b2;
311 uint32_t b3;
312 uint32_t instr;
313 uint32_t instr2;
314 uint32_t instr2_rs1;
315 uint32_t addr;
316
317 if (size < 8)
318 return 0;
319
320 size -= 8;
321
322 for (i = 0; i <= size; i += 2) {
323 instr = buf[i];
324
325 if (instr == 0xEF) {
326 /* JAL */
327 b1 = buf[i + 1];
328 if ((b1 & 0x0D) != 0)
329 continue;
330
331 b2 = buf[i + 2];
332 b3 = buf[i + 3];
333
334 addr = ((b1 & 0xF0) << 13) | (b2 << 9) | (b3 << 1);
335 addr -= s->pos + (uint32_t)i;
336
337 buf[i + 1] = (uint8_t)((b1 & 0x0F)
338 | ((addr >> 8) & 0xF0));
339
340 buf[i + 2] = (uint8_t)(((addr >> 16) & 0x0F)
341 | ((addr >> 7) & 0x10)
342 | ((addr << 4) & 0xE0));
343
344 buf[i + 3] = (uint8_t)(((addr >> 4) & 0x7F)
345 | ((addr >> 13) & 0x80));
346
347 i += 4 - 2;
348
349 } else if ((instr & 0x7F) == 0x17) {
350 /* AUIPC */
351 instr |= (uint32_t)buf[i + 1] << 8;
352 instr |= (uint32_t)buf[i + 2] << 16;
353 instr |= (uint32_t)buf[i + 3] << 24;
354
355 if (instr & 0xE80) {
356 /* AUIPC's rd doesn't equal x0 or x2. */
357 instr2 = get_unaligned_le32(buf + i + 4);
358
359 if (((instr << 8) ^ (instr2 - 3)) & 0xF8003) {
360 i += 6 - 2;
361 continue;
362 }
363
364 addr = (instr & 0xFFFFF000) + (instr2 >> 20);
365
366 instr = 0x17 | (2 << 7) | (instr2 << 12);
367 instr2 = addr;
368 } else {
369 /* AUIPC's rd equals x0 or x2. */
370 instr2_rs1 = instr >> 27;
371
372 if ((uint32_t)((instr - 0x3117) << 18)
373 >= (instr2_rs1 & 0x1D)) {
374 i += 4 - 2;
375 continue;
376 }
377
378 addr = get_unaligned_be32(buf + i + 4);
379 addr -= s->pos + (uint32_t)i;
380
381 instr2 = (instr >> 12) | (addr << 20);
382
383 instr = 0x17 | (instr2_rs1 << 7)
384 | ((addr + 0x800) & 0xFFFFF000);
385 }
386
387 put_unaligned_le32(instr, buf + i);
388 put_unaligned_le32(instr2, buf + i + 4);
389
390 i += 8 - 2;
391 }
392 }
393
394 return i;
395 }
396 #endif
397
398 /*
399 * Apply the selected BCJ filter. Update *pos and s->pos to match the amount
400 * of data that got filtered.
401 *
402 * NOTE: This is implemented as a switch statement to avoid using function
403 * pointers, which could be problematic in the kernel boot code, which must
404 * avoid pointers to static data (at least on x86).
405 */
bcj_apply(struct xz_dec_bcj * s,uint8_t * buf,size_t * pos,size_t size)406 static void bcj_apply(struct xz_dec_bcj *s,
407 uint8_t *buf, size_t *pos, size_t size)
408 {
409 size_t filtered;
410
411 buf += *pos;
412 size -= *pos;
413
414 switch (s->type) {
415 #ifdef XZ_DEC_X86
416 case BCJ_X86:
417 filtered = bcj_x86(s, buf, size);
418 break;
419 #endif
420 #ifdef XZ_DEC_POWERPC
421 case BCJ_POWERPC:
422 filtered = bcj_powerpc(s, buf, size);
423 break;
424 #endif
425 #ifdef XZ_DEC_ARM
426 case BCJ_ARM:
427 filtered = bcj_arm(s, buf, size);
428 break;
429 #endif
430 #ifdef XZ_DEC_ARMTHUMB
431 case BCJ_ARMTHUMB:
432 filtered = bcj_armthumb(s, buf, size);
433 break;
434 #endif
435 #ifdef XZ_DEC_SPARC
436 case BCJ_SPARC:
437 filtered = bcj_sparc(s, buf, size);
438 break;
439 #endif
440 #ifdef XZ_DEC_ARM64
441 case BCJ_ARM64:
442 filtered = bcj_arm64(s, buf, size);
443 break;
444 #endif
445 #ifdef XZ_DEC_RISCV
446 case BCJ_RISCV:
447 filtered = bcj_riscv(s, buf, size);
448 break;
449 #endif
450 default:
451 /* Never reached but silence compiler warnings. */
452 filtered = 0;
453 break;
454 }
455
456 *pos += filtered;
457 s->pos += filtered;
458 }
459
460 /*
461 * Flush pending filtered data from temp to the output buffer.
462 * Move the remaining mixture of possibly filtered and unfiltered
463 * data to the beginning of temp.
464 */
bcj_flush(struct xz_dec_bcj * s,struct xz_buf * b)465 static void bcj_flush(struct xz_dec_bcj *s, struct xz_buf *b)
466 {
467 size_t copy_size;
468
469 copy_size = min_t(size_t, s->temp.filtered, b->out_size - b->out_pos);
470 memcpy(b->out + b->out_pos, s->temp.buf, copy_size);
471 b->out_pos += copy_size;
472
473 s->temp.filtered -= copy_size;
474 s->temp.size -= copy_size;
475 memmove(s->temp.buf, s->temp.buf + copy_size, s->temp.size);
476 }
477
478 /*
479 * The BCJ filter functions are primitive in sense that they process the
480 * data in chunks of 1-16 bytes. To hide this issue, this function does
481 * some buffering.
482 */
xz_dec_bcj_run(struct xz_dec_bcj * s,struct xz_dec_lzma2 * lzma2,struct xz_buf * b)483 enum xz_ret xz_dec_bcj_run(struct xz_dec_bcj *s, struct xz_dec_lzma2 *lzma2,
484 struct xz_buf *b)
485 {
486 size_t out_start;
487
488 /*
489 * Flush pending already filtered data to the output buffer. Return
490 * immediately if we couldn't flush everything, or if the next
491 * filter in the chain had already returned XZ_STREAM_END.
492 */
493 if (s->temp.filtered > 0) {
494 bcj_flush(s, b);
495 if (s->temp.filtered > 0)
496 return XZ_OK;
497
498 if (s->ret == XZ_STREAM_END)
499 return XZ_STREAM_END;
500 }
501
502 /*
503 * If we have more output space than what is currently pending in
504 * temp, copy the unfiltered data from temp to the output buffer
505 * and try to fill the output buffer by decoding more data from the
506 * next filter in the chain. Apply the BCJ filter on the new data
507 * in the output buffer. If everything cannot be filtered, copy it
508 * to temp and rewind the output buffer position accordingly.
509 *
510 * This needs to be always run when temp.size == 0 to handle a special
511 * case where the output buffer is full and the next filter has no
512 * more output coming but hasn't returned XZ_STREAM_END yet.
513 */
514 if (s->temp.size < b->out_size - b->out_pos || s->temp.size == 0) {
515 out_start = b->out_pos;
516 memcpy(b->out + b->out_pos, s->temp.buf, s->temp.size);
517 b->out_pos += s->temp.size;
518
519 s->ret = xz_dec_lzma2_run(lzma2, b);
520 if (s->ret != XZ_STREAM_END
521 && (s->ret != XZ_OK || s->single_call))
522 return s->ret;
523
524 bcj_apply(s, b->out, &out_start, b->out_pos);
525
526 /*
527 * As an exception, if the next filter returned XZ_STREAM_END,
528 * we can do that too, since the last few bytes that remain
529 * unfiltered are meant to remain unfiltered.
530 */
531 if (s->ret == XZ_STREAM_END)
532 return XZ_STREAM_END;
533
534 s->temp.size = b->out_pos - out_start;
535 b->out_pos -= s->temp.size;
536 memcpy(s->temp.buf, b->out + b->out_pos, s->temp.size);
537
538 /*
539 * If there wasn't enough input to the next filter to fill
540 * the output buffer with unfiltered data, there's no point
541 * to try decoding more data to temp.
542 */
543 if (b->out_pos + s->temp.size < b->out_size)
544 return XZ_OK;
545 }
546
547 /*
548 * We have unfiltered data in temp. If the output buffer isn't full
549 * yet, try to fill the temp buffer by decoding more data from the
550 * next filter. Apply the BCJ filter on temp. Then we hopefully can
551 * fill the actual output buffer by copying filtered data from temp.
552 * A mix of filtered and unfiltered data may be left in temp; it will
553 * be taken care on the next call to this function.
554 */
555 if (b->out_pos < b->out_size) {
556 /* Make b->out{,_pos,_size} temporarily point to s->temp. */
557 s->out = b->out;
558 s->out_pos = b->out_pos;
559 s->out_size = b->out_size;
560 b->out = s->temp.buf;
561 b->out_pos = s->temp.size;
562 b->out_size = sizeof(s->temp.buf);
563
564 s->ret = xz_dec_lzma2_run(lzma2, b);
565
566 s->temp.size = b->out_pos;
567 b->out = s->out;
568 b->out_pos = s->out_pos;
569 b->out_size = s->out_size;
570
571 if (s->ret != XZ_OK && s->ret != XZ_STREAM_END)
572 return s->ret;
573
574 bcj_apply(s, s->temp.buf, &s->temp.filtered, s->temp.size);
575
576 /*
577 * If the next filter returned XZ_STREAM_END, we mark that
578 * everything is filtered, since the last unfiltered bytes
579 * of the stream are meant to be left as is.
580 */
581 if (s->ret == XZ_STREAM_END)
582 s->temp.filtered = s->temp.size;
583
584 bcj_flush(s, b);
585 if (s->temp.filtered > 0)
586 return XZ_OK;
587 }
588
589 return s->ret;
590 }
591
xz_dec_bcj_create(bool single_call)592 struct xz_dec_bcj *xz_dec_bcj_create(bool single_call)
593 {
594 struct xz_dec_bcj *s = kmalloc(sizeof(*s), GFP_KERNEL);
595 if (s != NULL)
596 s->single_call = single_call;
597
598 return s;
599 }
600
xz_dec_bcj_reset(struct xz_dec_bcj * s,uint8_t id)601 enum xz_ret xz_dec_bcj_reset(struct xz_dec_bcj *s, uint8_t id)
602 {
603 switch (id) {
604 #ifdef XZ_DEC_X86
605 case BCJ_X86:
606 #endif
607 #ifdef XZ_DEC_POWERPC
608 case BCJ_POWERPC:
609 #endif
610 #ifdef XZ_DEC_ARM
611 case BCJ_ARM:
612 #endif
613 #ifdef XZ_DEC_ARMTHUMB
614 case BCJ_ARMTHUMB:
615 #endif
616 #ifdef XZ_DEC_SPARC
617 case BCJ_SPARC:
618 #endif
619 #ifdef XZ_DEC_ARM64
620 case BCJ_ARM64:
621 #endif
622 #ifdef XZ_DEC_RISCV
623 case BCJ_RISCV:
624 #endif
625 break;
626
627 default:
628 /* Unsupported Filter ID */
629 return XZ_OPTIONS_ERROR;
630 }
631
632 s->type = id;
633 s->ret = XZ_OK;
634 s->pos = 0;
635 s->x86_prev_mask = 0;
636 s->temp.filtered = 0;
637 s->temp.size = 0;
638
639 return XZ_OK;
640 }
641
642 #endif
643