1 // SPDX-License-Identifier: 0BSD 2 3 /////////////////////////////////////////////////////////////////////////////// 4 // 5 /// \file string_conversion.c 6 /// \brief Conversion of strings to filter chain and vice versa 7 // 8 // Author: Lasse Collin 9 // 10 /////////////////////////////////////////////////////////////////////////////// 11 12 #include "filter_common.h" 13 14 15 ///////////////////// 16 // String building // 17 ///////////////////// 18 19 /// How much memory to allocate for strings. For now, no realloc is used 20 /// so this needs to be big enough even though there of course is 21 /// an overflow check still. 22 /// 23 /// FIXME? Using a fixed size is wasteful if the application doesn't free 24 /// the string fairly quickly but this can be improved later if needed. 25 #define STR_ALLOC_SIZE 800 26 27 28 typedef struct { 29 char *buf; 30 size_t pos; 31 } lzma_str; 32 33 34 static lzma_ret 35 str_init(lzma_str *str, const lzma_allocator *allocator) 36 { 37 str->buf = lzma_alloc(STR_ALLOC_SIZE, allocator); 38 if (str->buf == NULL) 39 return LZMA_MEM_ERROR; 40 41 str->pos = 0; 42 return LZMA_OK; 43 } 44 45 46 static void 47 str_free(lzma_str *str, const lzma_allocator *allocator) 48 { 49 lzma_free(str->buf, allocator); 50 return; 51 } 52 53 54 static bool 55 str_is_full(const lzma_str *str) 56 { 57 return str->pos == STR_ALLOC_SIZE - 1; 58 } 59 60 61 static lzma_ret 62 str_finish(char **dest, lzma_str *str, const lzma_allocator *allocator) 63 { 64 if (str_is_full(str)) { 65 // The preallocated buffer was too small. 66 // This shouldn't happen as STR_ALLOC_SIZE should 67 // be adjusted if new filters are added. 68 lzma_free(str->buf, allocator); 69 *dest = NULL; 70 assert(0); 71 return LZMA_PROG_ERROR; 72 } 73 74 str->buf[str->pos] = '\0'; 75 *dest = str->buf; 76 return LZMA_OK; 77 } 78 79 80 static void 81 str_append_str(lzma_str *str, const char *s) 82 { 83 const size_t len = strlen(s); 84 const size_t limit = STR_ALLOC_SIZE - 1 - str->pos; 85 const size_t copy_size = my_min(len, limit); 86 87 memcpy(str->buf + str->pos, s, copy_size); 88 str->pos += copy_size; 89 return; 90 } 91 92 93 static void 94 str_append_u32(lzma_str *str, uint32_t v, bool use_byte_suffix) 95 { 96 if (v == 0) { 97 str_append_str(str, "0"); 98 } else { 99 // NOTE: Don't use plain "B" because xz and the parser in this 100 // file don't support it and at glance it may look like 8 101 // (there cannot be a space before the suffix). 102 static const char suffixes[4][4] = { "", "KiB", "MiB", "GiB" }; 103 104 size_t suf = 0; 105 if (use_byte_suffix) { 106 while ((v & 1023) == 0 107 && suf < ARRAY_SIZE(suffixes) - 1) { 108 v >>= 10; 109 ++suf; 110 } 111 } 112 113 // UINT32_MAX in base 10 would need 10 + 1 bytes. Remember 114 // that initializing to "" initializes all elements to 115 // zero so '\0'-termination gets handled by this. 116 char buf[16] = ""; 117 size_t pos = sizeof(buf) - 1; 118 119 do { 120 buf[--pos] = '0' + (v % 10); 121 v /= 10; 122 } while (v != 0); 123 124 str_append_str(str, buf + pos); 125 str_append_str(str, suffixes[suf]); 126 } 127 128 return; 129 } 130 131 132 ////////////////////////////////////////////// 133 // Parsing and stringification declarations // 134 ////////////////////////////////////////////// 135 136 /// Maximum length for filter and option names. 137 /// 11 chars + terminating '\0' + sizeof(uint32_t) = 16 bytes 138 #define NAME_LEN_MAX 11 139 140 141 /// For option_map.flags: Use .u.map to do convert the input value 142 /// to an integer. Without this flag, .u.range.{min,max} are used 143 /// as the allowed range for the integer. 144 #define OPTMAP_USE_NAME_VALUE_MAP 0x01 145 146 /// For option_map.flags: Allow KiB/MiB/GiB in input string and use them in 147 /// the stringified output if the value is an exact multiple of these. 148 /// This is used e.g. for LZMA1/2 dictionary size. 149 #define OPTMAP_USE_BYTE_SUFFIX 0x02 150 151 /// For option_map.flags: If the integer value is zero then this option 152 /// won't be included in the stringified output. It's used e.g. for 153 /// BCJ filter start offset which usually is zero. 154 #define OPTMAP_NO_STRFY_ZERO 0x04 155 156 /// Possible values for option_map.type. Since OPTMAP_TYPE_UINT32 is 0, 157 /// it doesn't need to be specified in the initializers as it is 158 /// the implicit value. 159 enum { 160 OPTMAP_TYPE_UINT32, 161 OPTMAP_TYPE_LZMA_MODE, 162 OPTMAP_TYPE_LZMA_MATCH_FINDER, 163 OPTMAP_TYPE_LZMA_PRESET, 164 }; 165 166 167 /// This is for mapping string values in options to integers. 168 /// The last element of an array must have "" as the name. 169 /// It's used e.g. for match finder names in LZMA1/2. 170 typedef struct { 171 const char name[NAME_LEN_MAX + 1]; 172 const uint32_t value; 173 } name_value_map; 174 175 176 /// Each filter that has options needs an array of option_map structures. 177 /// The array doesn't need to be terminated as the functions take the 178 /// length of the array as an argument. 179 /// 180 /// When converting a string to filter options structure, option values 181 /// will be handled in a few different ways: 182 /// 183 /// (1) If .type equals OPTMAP_TYPE_LZMA_PRESET then LZMA1/2 preset string 184 /// is handled specially. 185 /// 186 /// (2) If .flags has OPTMAP_USE_NAME_VALUE_MAP set then the string is 187 /// converted to an integer using the name_value_map pointed by .u.map. 188 /// The last element in .u.map must have .name = "" as the terminator. 189 /// 190 /// (3) Otherwise the string is treated as a non-negative unsigned decimal 191 /// integer which must be in the range set in .u.range. If .flags has 192 /// OPTMAP_USE_BYTE_SUFFIX then KiB, MiB, and GiB suffixes are allowed. 193 /// 194 /// The integer value from (2) or (3) is then stored to filter_options 195 /// at the offset specified in .offset using the type specified in .type 196 /// (default is uint32_t). 197 /// 198 /// Stringifying a filter is done by processing a given number of options 199 /// in order from the beginning of an option_map array. The integer is 200 /// read from filter_options at .offset using the type from .type. 201 /// 202 /// If the integer is zero and .flags has OPTMAP_NO_STRFY_ZERO then the 203 /// option is skipped. 204 /// 205 /// If .flags has OPTMAP_USE_NAME_VALUE_MAP set then .u.map will be used 206 /// to convert the option to a string. If the map doesn't contain a string 207 /// for the integer value then "UNKNOWN" is used. 208 /// 209 /// If .flags doesn't have OPTMAP_USE_NAME_VALUE_MAP set then the integer is 210 /// converted to a decimal value. If OPTMAP_USE_BYTE_SUFFIX is used then KiB, 211 /// MiB, or GiB suffix is used if the value is an exact multiple of these. 212 /// Plain "B" suffix is never used. 213 typedef struct { 214 char name[NAME_LEN_MAX + 1]; 215 uint8_t type; 216 uint8_t flags; 217 uint16_t offset; 218 219 union { 220 // NVHPC has problems with unions that contain pointers that 221 // are not the first members, so keep "map" at the top. 222 const name_value_map *map; 223 224 struct { 225 uint32_t min; 226 uint32_t max; 227 } range; 228 } u; 229 } option_map; 230 231 232 static const char *parse_options(const char **const str, const char *str_end, 233 void *filter_options, 234 const option_map *const optmap, const size_t optmap_size); 235 236 237 ///////// 238 // BCJ // 239 ///////// 240 241 #if defined(HAVE_ENCODER_X86) \ 242 || defined(HAVE_DECODER_X86) \ 243 || defined(HAVE_ENCODER_ARM) \ 244 || defined(HAVE_DECODER_ARM) \ 245 || defined(HAVE_ENCODER_ARMTHUMB) \ 246 || defined(HAVE_DECODER_ARMTHUMB) \ 247 || defined(HAVE_ENCODER_ARM64) \ 248 || defined(HAVE_DECODER_ARM64) \ 249 || defined(HAVE_ENCODER_POWERPC) \ 250 || defined(HAVE_DECODER_POWERPC) \ 251 || defined(HAVE_ENCODER_IA64) \ 252 || defined(HAVE_DECODER_IA64) \ 253 || defined(HAVE_ENCODER_SPARC) \ 254 || defined(HAVE_DECODER_SPARC) \ 255 || defined(HAVE_ENCODER_RISCV) \ 256 || defined(HAVE_DECODER_RISCV) 257 static const option_map bcj_optmap[] = { 258 { 259 .name = "start", 260 .flags = OPTMAP_NO_STRFY_ZERO | OPTMAP_USE_BYTE_SUFFIX, 261 .offset = offsetof(lzma_options_bcj, start_offset), 262 .u.range.min = 0, 263 .u.range.max = UINT32_MAX, 264 } 265 }; 266 267 268 static const char * 269 parse_bcj(const char **const str, const char *str_end, void *filter_options) 270 { 271 // filter_options was zeroed on allocation and that is enough 272 // for the default value. 273 return parse_options(str, str_end, filter_options, 274 bcj_optmap, ARRAY_SIZE(bcj_optmap)); 275 } 276 #endif 277 278 279 /////////// 280 // Delta // 281 /////////// 282 283 #if defined(HAVE_ENCODER_DELTA) || defined(HAVE_DECODER_DELTA) 284 static const option_map delta_optmap[] = { 285 { 286 .name = "dist", 287 .offset = offsetof(lzma_options_delta, dist), 288 .u.range.min = LZMA_DELTA_DIST_MIN, 289 .u.range.max = LZMA_DELTA_DIST_MAX, 290 } 291 }; 292 293 294 static const char * 295 parse_delta(const char **const str, const char *str_end, void *filter_options) 296 { 297 lzma_options_delta *opts = filter_options; 298 opts->type = LZMA_DELTA_TYPE_BYTE; 299 opts->dist = LZMA_DELTA_DIST_MIN; 300 301 return parse_options(str, str_end, filter_options, 302 delta_optmap, ARRAY_SIZE(delta_optmap)); 303 } 304 #endif 305 306 307 /////////////////// 308 // LZMA1 & LZMA2 // 309 /////////////////// 310 311 /// Help string for presets 312 #define LZMA12_PRESET_STR "0-9[e]" 313 314 315 static const char * 316 parse_lzma12_preset(const char **const str, const char *str_end, 317 uint32_t *preset) 318 { 319 assert(*str < str_end); 320 *preset = (uint32_t)(**str - '0'); 321 322 // NOTE: Remember to update LZMA12_PRESET_STR if this is modified! 323 while (++*str < str_end) { 324 switch (**str) { 325 case 'e': 326 *preset |= LZMA_PRESET_EXTREME; 327 break; 328 329 default: 330 return "Unsupported preset flag"; 331 } 332 } 333 334 return NULL; 335 } 336 337 338 static const char * 339 set_lzma12_preset(const char **const str, const char *str_end, 340 void *filter_options) 341 { 342 uint32_t preset; 343 const char *errmsg = parse_lzma12_preset(str, str_end, &preset); 344 if (errmsg != NULL) 345 return errmsg; 346 347 lzma_options_lzma *opts = filter_options; 348 if (lzma_lzma_preset(opts, preset)) 349 return "Unsupported preset"; 350 351 return NULL; 352 } 353 354 355 static const name_value_map lzma12_mode_map[] = { 356 { "fast", LZMA_MODE_FAST }, 357 { "normal", LZMA_MODE_NORMAL }, 358 { "", 0 } 359 }; 360 361 362 static const name_value_map lzma12_mf_map[] = { 363 { "hc3", LZMA_MF_HC3 }, 364 { "hc4", LZMA_MF_HC4 }, 365 { "bt2", LZMA_MF_BT2 }, 366 { "bt3", LZMA_MF_BT3 }, 367 { "bt4", LZMA_MF_BT4 }, 368 { "", 0 } 369 }; 370 371 372 static const option_map lzma12_optmap[] = { 373 { 374 .name = "preset", 375 .type = OPTMAP_TYPE_LZMA_PRESET, 376 }, { 377 .name = "dict", 378 .flags = OPTMAP_USE_BYTE_SUFFIX, 379 .offset = offsetof(lzma_options_lzma, dict_size), 380 .u.range.min = LZMA_DICT_SIZE_MIN, 381 // FIXME? The max is really max for encoding but decoding 382 // would allow 4 GiB - 1 B. 383 .u.range.max = (UINT32_C(1) << 30) + (UINT32_C(1) << 29), 384 }, { 385 .name = "lc", 386 .offset = offsetof(lzma_options_lzma, lc), 387 .u.range.min = LZMA_LCLP_MIN, 388 .u.range.max = LZMA_LCLP_MAX, 389 }, { 390 .name = "lp", 391 .offset = offsetof(lzma_options_lzma, lp), 392 .u.range.min = LZMA_LCLP_MIN, 393 .u.range.max = LZMA_LCLP_MAX, 394 }, { 395 .name = "pb", 396 .offset = offsetof(lzma_options_lzma, pb), 397 .u.range.min = LZMA_PB_MIN, 398 .u.range.max = LZMA_PB_MAX, 399 }, { 400 .name = "mode", 401 .type = OPTMAP_TYPE_LZMA_MODE, 402 .flags = OPTMAP_USE_NAME_VALUE_MAP, 403 .offset = offsetof(lzma_options_lzma, mode), 404 .u.map = lzma12_mode_map, 405 }, { 406 .name = "nice", 407 .offset = offsetof(lzma_options_lzma, nice_len), 408 .u.range.min = 2, 409 .u.range.max = 273, 410 }, { 411 .name = "mf", 412 .type = OPTMAP_TYPE_LZMA_MATCH_FINDER, 413 .flags = OPTMAP_USE_NAME_VALUE_MAP, 414 .offset = offsetof(lzma_options_lzma, mf), 415 .u.map = lzma12_mf_map, 416 }, { 417 .name = "depth", 418 .offset = offsetof(lzma_options_lzma, depth), 419 .u.range.min = 0, 420 .u.range.max = UINT32_MAX, 421 } 422 }; 423 424 425 static const char * 426 parse_lzma12(const char **const str, const char *str_end, void *filter_options) 427 { 428 lzma_options_lzma *opts = filter_options; 429 430 // It cannot fail. 431 const bool preset_ret = lzma_lzma_preset(opts, LZMA_PRESET_DEFAULT); 432 assert(!preset_ret); 433 (void)preset_ret; 434 435 const char *errmsg = parse_options(str, str_end, filter_options, 436 lzma12_optmap, ARRAY_SIZE(lzma12_optmap)); 437 if (errmsg != NULL) 438 return errmsg; 439 440 if (opts->lc + opts->lp > LZMA_LCLP_MAX) 441 return "The sum of lc and lp must not exceed 4"; 442 443 return NULL; 444 } 445 446 447 ///////////////////////////////////////// 448 // Generic parsing and stringification // 449 ///////////////////////////////////////// 450 451 static const struct { 452 /// Name of the filter 453 char name[NAME_LEN_MAX + 1]; 454 455 /// For lzma_str_to_filters: 456 /// Size of the filter-specific options structure. 457 uint32_t opts_size; 458 459 /// Filter ID 460 lzma_vli id; 461 462 /// For lzma_str_to_filters: 463 /// Function to parse the filter-specific options. The filter_options 464 /// will already have been allocated using lzma_alloc_zero(). 465 const char *(*parse)(const char **str, const char *str_end, 466 void *filter_options); 467 468 /// For lzma_str_from_filters: 469 /// If the flag LZMA_STR_ENCODER is used then the first 470 /// strfy_encoder elements of optmap are stringified. 471 /// With LZMA_STR_DECODER strfy_decoder is used. 472 /// Currently encoders use all options that decoders do but if 473 /// that changes then this needs to be changed too, for example, 474 /// add a new OPTMAP flag to skip printing some decoder-only options. 475 const option_map *optmap; 476 uint8_t strfy_encoder; 477 uint8_t strfy_decoder; 478 479 /// For lzma_str_from_filters: 480 /// If true, lzma_filter.options is allowed to be NULL. In that case, 481 /// only the filter name is printed without any options. 482 bool allow_null; 483 484 } filter_name_map[] = { 485 #if defined (HAVE_ENCODER_LZMA1) || defined(HAVE_DECODER_LZMA1) 486 { "lzma1", sizeof(lzma_options_lzma), LZMA_FILTER_LZMA1, 487 &parse_lzma12, lzma12_optmap, 9, 5, false }, 488 #endif 489 490 #if defined(HAVE_ENCODER_LZMA2) || defined(HAVE_DECODER_LZMA2) 491 { "lzma2", sizeof(lzma_options_lzma), LZMA_FILTER_LZMA2, 492 &parse_lzma12, lzma12_optmap, 9, 2, false }, 493 #endif 494 495 #if defined(HAVE_ENCODER_X86) || defined(HAVE_DECODER_X86) 496 { "x86", sizeof(lzma_options_bcj), LZMA_FILTER_X86, 497 &parse_bcj, bcj_optmap, 1, 1, true }, 498 #endif 499 500 #if defined(HAVE_ENCODER_ARM) || defined(HAVE_DECODER_ARM) 501 { "arm", sizeof(lzma_options_bcj), LZMA_FILTER_ARM, 502 &parse_bcj, bcj_optmap, 1, 1, true }, 503 #endif 504 505 #if defined(HAVE_ENCODER_ARMTHUMB) || defined(HAVE_DECODER_ARMTHUMB) 506 { "armthumb", sizeof(lzma_options_bcj), LZMA_FILTER_ARMTHUMB, 507 &parse_bcj, bcj_optmap, 1, 1, true }, 508 #endif 509 510 #if defined(HAVE_ENCODER_ARM64) || defined(HAVE_DECODER_ARM64) 511 { "arm64", sizeof(lzma_options_bcj), LZMA_FILTER_ARM64, 512 &parse_bcj, bcj_optmap, 1, 1, true }, 513 #endif 514 515 #if defined(HAVE_ENCODER_RISCV) || defined(HAVE_DECODER_RISCV) 516 { "riscv", sizeof(lzma_options_bcj), LZMA_FILTER_RISCV, 517 &parse_bcj, bcj_optmap, 1, 1, true }, 518 #endif 519 520 #if defined(HAVE_ENCODER_POWERPC) || defined(HAVE_DECODER_POWERPC) 521 { "powerpc", sizeof(lzma_options_bcj), LZMA_FILTER_POWERPC, 522 &parse_bcj, bcj_optmap, 1, 1, true }, 523 #endif 524 525 #if defined(HAVE_ENCODER_IA64) || defined(HAVE_DECODER_IA64) 526 { "ia64", sizeof(lzma_options_bcj), LZMA_FILTER_IA64, 527 &parse_bcj, bcj_optmap, 1, 1, true }, 528 #endif 529 530 #if defined(HAVE_ENCODER_SPARC) || defined(HAVE_DECODER_SPARC) 531 { "sparc", sizeof(lzma_options_bcj), LZMA_FILTER_SPARC, 532 &parse_bcj, bcj_optmap, 1, 1, true }, 533 #endif 534 535 #if defined(HAVE_ENCODER_DELTA) || defined(HAVE_DECODER_DELTA) 536 { "delta", sizeof(lzma_options_delta), LZMA_FILTER_DELTA, 537 &parse_delta, delta_optmap, 1, 1, false }, 538 #endif 539 }; 540 541 542 /// Decodes options from a string for one filter (name1=value1,name2=value2). 543 /// Caller must have allocated memory for filter_options already and set 544 /// the initial default values. This is called from the filter-specific 545 /// parse_* functions. 546 /// 547 /// The input string starts at *str and the address in str_end is the first 548 /// char that is not part of the string anymore. So no '\0' terminator is 549 /// used. *str is advanced every time something has been decoded successfully. 550 static const char * 551 parse_options(const char **const str, const char *str_end, 552 void *filter_options, 553 const option_map *const optmap, const size_t optmap_size) 554 { 555 while (*str < str_end && **str != '\0') { 556 // Each option is of the form name=value. 557 // Commas (',') separate options. Extra commas are ignored. 558 // Ignoring extra commas makes it simpler if an optional 559 // option stored in a shell variable which can be empty. 560 if (**str == ',') { 561 ++*str; 562 continue; 563 } 564 565 // Find where the next name=value ends. 566 const size_t str_len = (size_t)(str_end - *str); 567 const char *name_eq_value_end = memchr(*str, ',', str_len); 568 if (name_eq_value_end == NULL) 569 name_eq_value_end = str_end; 570 571 const char *equals_sign = memchr(*str, '=', 572 (size_t)(name_eq_value_end - *str)); 573 574 // Fail if the '=' wasn't found or the option name is missing 575 // (the first char is '='). 576 if (equals_sign == NULL || **str == '=') 577 return "Options must be 'name=value' pairs separated " 578 "with commas"; 579 580 // Reject a too long option name so that the memcmp() 581 // in the loop below won't read past the end of the 582 // string in optmap[i].name. 583 const size_t name_len = (size_t)(equals_sign - *str); 584 if (name_len > NAME_LEN_MAX) 585 return "Unknown option name"; 586 587 // Find the option name from optmap[]. 588 size_t i = 0; 589 while (true) { 590 if (i == optmap_size) 591 return "Unknown option name"; 592 593 if (memcmp(*str, optmap[i].name, name_len) == 0 594 && optmap[i].name[name_len] == '\0') 595 break; 596 597 ++i; 598 } 599 600 // The input string is good at least until the start of 601 // the option value. 602 *str = equals_sign + 1; 603 604 // The code assumes that the option value isn't an empty 605 // string so check it here. 606 const size_t value_len = (size_t)(name_eq_value_end - *str); 607 if (value_len == 0) 608 return "Option value cannot be empty"; 609 610 // LZMA1/2 preset has its own parsing function. 611 if (optmap[i].type == OPTMAP_TYPE_LZMA_PRESET) { 612 const char *errmsg = set_lzma12_preset(str, 613 name_eq_value_end, filter_options); 614 if (errmsg != NULL) 615 return errmsg; 616 617 continue; 618 } 619 620 // It's an integer value. 621 uint32_t v; 622 if (optmap[i].flags & OPTMAP_USE_NAME_VALUE_MAP) { 623 // The integer is picked from a string-to-integer map. 624 // 625 // Reject a too long value string so that the memcmp() 626 // in the loop below won't read past the end of the 627 // string in optmap[i].u.map[j].name. 628 if (value_len > NAME_LEN_MAX) 629 return "Invalid option value"; 630 631 const name_value_map *map = optmap[i].u.map; 632 size_t j = 0; 633 while (true) { 634 // The array is terminated with an empty name. 635 if (map[j].name[0] == '\0') 636 return "Invalid option value"; 637 638 if (memcmp(*str, map[j].name, value_len) == 0 639 && map[j].name[value_len] 640 == '\0') { 641 v = map[j].value; 642 break; 643 } 644 645 ++j; 646 } 647 } else if (**str < '0' || **str > '9') { 648 // Note that "max" isn't supported while it is 649 // supported in xz. It's not useful here. 650 return "Value is not a non-negative decimal integer"; 651 } else { 652 // strtoul() has locale-specific behavior so it cannot 653 // be relied on to get reproducible results since we 654 // cannot change the locate in a thread-safe library. 655 // It also needs '\0'-termination. 656 // 657 // Use a temporary pointer so that *str will point 658 // to the beginning of the value string in case 659 // an error occurs. 660 const char *p = *str; 661 v = 0; 662 do { 663 if (v > UINT32_MAX / 10) 664 return "Value out of range"; 665 666 v *= 10; 667 668 const uint32_t add = (uint32_t)(*p - '0'); 669 if (UINT32_MAX - add < v) 670 return "Value out of range"; 671 672 v += add; 673 ++p; 674 } while (p < name_eq_value_end 675 && *p >= '0' && *p <= '9'); 676 677 if (p < name_eq_value_end) { 678 // Remember this position so that it can be 679 // used for error messages that are 680 // specifically about the suffix. (Out of 681 // range values are about the whole value 682 // and those error messages point to the 683 // beginning of the number part, 684 // not to the suffix.) 685 const char *multiplier_start = p; 686 687 // If multiplier suffix shouldn't be used 688 // then don't allow them even if the value 689 // would stay within limits. This is a somewhat 690 // unnecessary check but it rejects silly 691 // things like lzma2:pb=0MiB which xz allows. 692 if ((optmap[i].flags & OPTMAP_USE_BYTE_SUFFIX) 693 == 0) { 694 *str = multiplier_start; 695 return "This option does not support " 696 "any integer suffixes"; 697 } 698 699 uint32_t shift; 700 701 switch (*p) { 702 case 'k': 703 case 'K': 704 shift = 10; 705 break; 706 707 case 'm': 708 case 'M': 709 shift = 20; 710 break; 711 712 case 'g': 713 case 'G': 714 shift = 30; 715 break; 716 717 default: 718 *str = multiplier_start; 719 return "Invalid multiplier suffix " 720 "(KiB, MiB, or GiB)"; 721 } 722 723 ++p; 724 725 // Allow "M", "Mi", "MB", "MiB" and the same 726 // for the other five characters from the 727 // switch-statement above. All are handled 728 // as base-2 (perhaps a mistake, perhaps not). 729 // Note that 'i' and 'B' are case sensitive. 730 if (p < name_eq_value_end && *p == 'i') 731 ++p; 732 733 if (p < name_eq_value_end && *p == 'B') 734 ++p; 735 736 // Now we must have no chars remaining. 737 if (p < name_eq_value_end) { 738 *str = multiplier_start; 739 return "Invalid multiplier suffix " 740 "(KiB, MiB, or GiB)"; 741 } 742 743 if (v > (UINT32_MAX >> shift)) 744 return "Value out of range"; 745 746 v <<= shift; 747 } 748 749 if (v < optmap[i].u.range.min 750 || v > optmap[i].u.range.max) 751 return "Value out of range"; 752 } 753 754 // Set the value in filter_options. Enums are handled 755 // specially since the underlying type isn't the same 756 // as uint32_t on all systems. 757 void *ptr = (char *)filter_options + optmap[i].offset; 758 switch (optmap[i].type) { 759 case OPTMAP_TYPE_LZMA_MODE: 760 *(lzma_mode *)ptr = (lzma_mode)v; 761 break; 762 763 case OPTMAP_TYPE_LZMA_MATCH_FINDER: 764 *(lzma_match_finder *)ptr = (lzma_match_finder)v; 765 break; 766 767 default: 768 *(uint32_t *)ptr = v; 769 break; 770 } 771 772 // This option has been successfully handled. 773 *str = name_eq_value_end; 774 } 775 776 // No errors. 777 return NULL; 778 } 779 780 781 /// Finds the name of the filter at the beginning of the string and 782 /// calls filter_name_map[i].parse() to decode the filter-specific options. 783 /// The caller must have set str_end so that exactly one filter and its 784 /// options are present without any trailing characters. 785 static const char * 786 parse_filter(const char **const str, const char *str_end, lzma_filter *filter, 787 const lzma_allocator *allocator, bool only_xz) 788 { 789 // Search for a colon or equals sign that would separate the filter 790 // name from filter options. If neither is found, then the input 791 // string only contains a filter name and there are no options. 792 // 793 // First assume that a colon or equals sign won't be found: 794 const char *name_end = str_end; 795 const char *opts_start = str_end; 796 797 for (const char *p = *str; p < str_end; ++p) { 798 if (*p == ':' || *p == '=') { 799 name_end = p; 800 801 // Filter options (name1=value1,name2=value2,...) 802 // begin after the colon or equals sign. 803 opts_start = p + 1; 804 break; 805 } 806 } 807 808 // Reject a too long filter name so that the memcmp() 809 // in the loop below won't read past the end of the 810 // string in filter_name_map[i].name. 811 const size_t name_len = (size_t)(name_end - *str); 812 if (name_len > NAME_LEN_MAX) 813 return "Unknown filter name"; 814 815 for (size_t i = 0; i < ARRAY_SIZE(filter_name_map); ++i) { 816 if (memcmp(*str, filter_name_map[i].name, name_len) == 0 817 && filter_name_map[i].name[name_len] == '\0') { 818 if (only_xz && filter_name_map[i].id 819 >= LZMA_FILTER_RESERVED_START) 820 return "This filter cannot be used in " 821 "the .xz format"; 822 823 // Allocate the filter-specific options and 824 // initialize the memory with zeros. 825 void *options = lzma_alloc_zero( 826 filter_name_map[i].opts_size, 827 allocator); 828 if (options == NULL) 829 return "Memory allocation failed"; 830 831 // Filter name was found so the input string is good 832 // at least this far. 833 *str = opts_start; 834 835 const char *errmsg = filter_name_map[i].parse( 836 str, str_end, options); 837 if (errmsg != NULL) { 838 lzma_free(options, allocator); 839 return errmsg; 840 } 841 842 // *filter is modified only when parsing is successful. 843 filter->id = filter_name_map[i].id; 844 filter->options = options; 845 return NULL; 846 } 847 } 848 849 return "Unknown filter name"; 850 } 851 852 853 /// Converts the string to a filter chain (array of lzma_filter structures). 854 /// 855 /// *str is advanced every time something has been decoded successfully. 856 /// This way the caller knows where in the string a possible error occurred. 857 static const char * 858 str_to_filters(const char **const str, lzma_filter *filters, uint32_t flags, 859 const lzma_allocator *allocator) 860 { 861 const char *errmsg; 862 863 // Skip leading spaces. 864 while (**str == ' ') 865 ++*str; 866 867 if (**str == '\0') 868 return "Empty string is not allowed, " 869 "try \"6\" if a default value is needed"; 870 871 // Detect the type of the string. 872 // 873 // A string beginning with a digit or a string beginning with 874 // one dash and a digit are treated as presets. Trailing spaces 875 // will be ignored too (leading spaces were already ignored above). 876 // 877 // For example, "6", "7 ", "-9e", or " -3 " are treated as presets. 878 // Strings like "-" or "- " aren't preset. 879 #define MY_IS_DIGIT(c) ((c) >= '0' && (c) <= '9') 880 if (MY_IS_DIGIT(**str) || (**str == '-' && MY_IS_DIGIT((*str)[1]))) { 881 if (**str == '-') 882 ++*str; 883 884 // Ignore trailing spaces. 885 const size_t str_len = strlen(*str); 886 const char *str_end = memchr(*str, ' ', str_len); 887 if (str_end != NULL) { 888 // There is at least one trailing space. Check that 889 // there are no chars other than spaces. 890 for (size_t i = 1; str_end[i] != '\0'; ++i) 891 if (str_end[i] != ' ') 892 return "Unsupported preset"; 893 } else { 894 // There are no trailing spaces. Use the whole string. 895 str_end = *str + str_len; 896 } 897 898 uint32_t preset; 899 errmsg = parse_lzma12_preset(str, str_end, &preset); 900 if (errmsg != NULL) 901 return errmsg; 902 903 lzma_options_lzma *opts = lzma_alloc(sizeof(*opts), allocator); 904 if (opts == NULL) 905 return "Memory allocation failed"; 906 907 if (lzma_lzma_preset(opts, preset)) { 908 lzma_free(opts, allocator); 909 return "Unsupported preset"; 910 } 911 912 filters[0].id = LZMA_FILTER_LZMA2; 913 filters[0].options = opts; 914 filters[1].id = LZMA_VLI_UNKNOWN; 915 filters[1].options = NULL; 916 917 return NULL; 918 } 919 920 // Not a preset so it must be a filter chain. 921 // 922 // If LZMA_STR_ALL_FILTERS isn't used we allow only filters that 923 // can be used in .xz. 924 const bool only_xz = (flags & LZMA_STR_ALL_FILTERS) == 0; 925 926 // Use a temporary array so that we don't modify the caller-supplied 927 // one until we know that no errors occurred. 928 lzma_filter temp_filters[LZMA_FILTERS_MAX + 1]; 929 930 size_t i = 0; 931 do { 932 if (i == LZMA_FILTERS_MAX) { 933 errmsg = "The maximum number of filters is four"; 934 goto error; 935 } 936 937 // Skip "--" if present. 938 if ((*str)[0] == '-' && (*str)[1] == '-') 939 *str += 2; 940 941 // Locate the end of "filter:name1=value1,name2=value2", 942 // stopping at the first "--" or a single space. 943 const char *filter_end = *str; 944 while (filter_end[0] != '\0') { 945 if ((filter_end[0] == '-' && filter_end[1] == '-') 946 || filter_end[0] == ' ') 947 break; 948 949 ++filter_end; 950 } 951 952 // Inputs that have "--" at the end or "-- " in the middle 953 // will result in an empty filter name. 954 if (filter_end == *str) { 955 errmsg = "Filter name is missing"; 956 goto error; 957 } 958 959 errmsg = parse_filter(str, filter_end, &temp_filters[i], 960 allocator, only_xz); 961 if (errmsg != NULL) 962 goto error; 963 964 // Skip trailing spaces. 965 while (**str == ' ') 966 ++*str; 967 968 ++i; 969 } while (**str != '\0'); 970 971 // Seems to be good, terminate the array so that 972 // basic validation can be done. 973 temp_filters[i].id = LZMA_VLI_UNKNOWN; 974 temp_filters[i].options = NULL; 975 976 // Do basic validation if the application didn't prohibit it. 977 if ((flags & LZMA_STR_NO_VALIDATION) == 0) { 978 size_t dummy; 979 const lzma_ret ret = lzma_validate_chain(temp_filters, &dummy); 980 assert(ret == LZMA_OK || ret == LZMA_OPTIONS_ERROR); 981 if (ret != LZMA_OK) { 982 errmsg = "Invalid filter chain " 983 "('lzma2' missing at the end?)"; 984 goto error; 985 } 986 } 987 988 // All good. Copy the filters to the application supplied array. 989 memcpy(filters, temp_filters, (i + 1) * sizeof(lzma_filter)); 990 return NULL; 991 992 error: 993 // Free the filter options that were successfully decoded. 994 while (i-- > 0) 995 lzma_free(temp_filters[i].options, allocator); 996 997 return errmsg; 998 } 999 1000 1001 extern LZMA_API(const char *) 1002 lzma_str_to_filters(const char *str, int *error_pos, lzma_filter *filters, 1003 uint32_t flags, const lzma_allocator *allocator) 1004 { 1005 // If error_pos isn't NULL, *error_pos must always be set. 1006 // liblzma <= 5.4.6 and <= 5.6.1 have a bug and don't do this 1007 // when str == NULL or filters == NULL or flags are unsupported. 1008 if (error_pos != NULL) 1009 *error_pos = 0; 1010 1011 if (str == NULL || filters == NULL) 1012 return "Unexpected NULL pointer argument(s) " 1013 "to lzma_str_to_filters()"; 1014 1015 // Validate the flags. 1016 const uint32_t supported_flags 1017 = LZMA_STR_ALL_FILTERS 1018 | LZMA_STR_NO_VALIDATION; 1019 1020 if (flags & ~supported_flags) 1021 return "Unsupported flags to lzma_str_to_filters()"; 1022 1023 const char *used = str; 1024 const char *errmsg = str_to_filters(&used, filters, flags, allocator); 1025 1026 if (error_pos != NULL) { 1027 const size_t n = (size_t)(used - str); 1028 *error_pos = n > INT_MAX ? INT_MAX : (int)n; 1029 } 1030 1031 return errmsg; 1032 } 1033 1034 1035 /// Converts options of one filter to a string. 1036 /// 1037 /// The caller must have already put the filter name in the destination 1038 /// string. Since it is possible that no options will be needed, the caller 1039 /// won't have put a delimiter character (':' or '=') in the string yet. 1040 /// We will add it if at least one option will be added to the string. 1041 static void 1042 strfy_filter(lzma_str *dest, const char *delimiter, 1043 const option_map *optmap, size_t optmap_count, 1044 const void *filter_options) 1045 { 1046 for (size_t i = 0; i < optmap_count; ++i) { 1047 // No attempt is made to reverse LZMA1/2 preset. 1048 if (optmap[i].type == OPTMAP_TYPE_LZMA_PRESET) 1049 continue; 1050 1051 // All options have integer values, some just are mapped 1052 // to a string with a name_value_map. LZMA1/2 preset 1053 // isn't reversed back to preset=PRESET form. 1054 uint32_t v; 1055 const void *ptr 1056 = (const char *)filter_options + optmap[i].offset; 1057 switch (optmap[i].type) { 1058 case OPTMAP_TYPE_LZMA_MODE: 1059 v = *(const lzma_mode *)ptr; 1060 break; 1061 1062 case OPTMAP_TYPE_LZMA_MATCH_FINDER: 1063 v = *(const lzma_match_finder *)ptr; 1064 break; 1065 1066 default: 1067 v = *(const uint32_t *)ptr; 1068 break; 1069 } 1070 1071 // Skip this if this option should be omitted from 1072 // the string when the value is zero. 1073 if (v == 0 && (optmap[i].flags & OPTMAP_NO_STRFY_ZERO)) 1074 continue; 1075 1076 // Before the first option we add whatever delimiter 1077 // the caller gave us. For later options a comma is used. 1078 str_append_str(dest, delimiter); 1079 delimiter = ","; 1080 1081 // Add the option name and equals sign. 1082 str_append_str(dest, optmap[i].name); 1083 str_append_str(dest, "="); 1084 1085 if (optmap[i].flags & OPTMAP_USE_NAME_VALUE_MAP) { 1086 const name_value_map *map = optmap[i].u.map; 1087 size_t j = 0; 1088 while (true) { 1089 if (map[j].name[0] == '\0') { 1090 str_append_str(dest, "UNKNOWN"); 1091 break; 1092 } 1093 1094 if (map[j].value == v) { 1095 str_append_str(dest, map[j].name); 1096 break; 1097 } 1098 1099 ++j; 1100 } 1101 } else { 1102 str_append_u32(dest, v, 1103 optmap[i].flags & OPTMAP_USE_BYTE_SUFFIX); 1104 } 1105 } 1106 1107 return; 1108 } 1109 1110 1111 extern LZMA_API(lzma_ret) 1112 lzma_str_from_filters(char **output_str, const lzma_filter *filters, 1113 uint32_t flags, const lzma_allocator *allocator) 1114 { 1115 // On error *output_str is always set to NULL. 1116 // Do it as the very first step. 1117 if (output_str == NULL) 1118 return LZMA_PROG_ERROR; 1119 1120 *output_str = NULL; 1121 1122 if (filters == NULL) 1123 return LZMA_PROG_ERROR; 1124 1125 // Validate the flags. 1126 const uint32_t supported_flags 1127 = LZMA_STR_ENCODER 1128 | LZMA_STR_DECODER 1129 | LZMA_STR_GETOPT_LONG 1130 | LZMA_STR_NO_SPACES; 1131 1132 if (flags & ~supported_flags) 1133 return LZMA_OPTIONS_ERROR; 1134 1135 // There must be at least one filter. 1136 if (filters[0].id == LZMA_VLI_UNKNOWN) 1137 return LZMA_OPTIONS_ERROR; 1138 1139 // Allocate memory for the output string. 1140 lzma_str dest; 1141 return_if_error(str_init(&dest, allocator)); 1142 1143 const bool show_opts = (flags & (LZMA_STR_ENCODER | LZMA_STR_DECODER)); 1144 1145 const char *opt_delim = (flags & LZMA_STR_GETOPT_LONG) ? "=" : ":"; 1146 1147 for (size_t i = 0; filters[i].id != LZMA_VLI_UNKNOWN; ++i) { 1148 // If we reach LZMA_FILTERS_MAX, then the filters array 1149 // is too large since the ID cannot be LZMA_VLI_UNKNOWN here. 1150 if (i == LZMA_FILTERS_MAX) { 1151 str_free(&dest, allocator); 1152 return LZMA_OPTIONS_ERROR; 1153 } 1154 1155 // Don't add a space between filters if the caller 1156 // doesn't want them. 1157 if (i > 0 && !(flags & LZMA_STR_NO_SPACES)) 1158 str_append_str(&dest, " "); 1159 1160 // Use dashes for xz getopt_long() compatible syntax but also 1161 // use dashes to separate filters when spaces weren't wanted. 1162 if ((flags & LZMA_STR_GETOPT_LONG) 1163 || (i > 0 && (flags & LZMA_STR_NO_SPACES))) 1164 str_append_str(&dest, "--"); 1165 1166 size_t j = 0; 1167 while (true) { 1168 if (j == ARRAY_SIZE(filter_name_map)) { 1169 // Filter ID in filters[i].id isn't supported. 1170 str_free(&dest, allocator); 1171 return LZMA_OPTIONS_ERROR; 1172 } 1173 1174 if (filter_name_map[j].id == filters[i].id) { 1175 // Add the filter name. 1176 str_append_str(&dest, filter_name_map[j].name); 1177 1178 // If only the filter names were wanted then 1179 // skip to the next filter. In this case 1180 // .options is ignored and may be NULL even 1181 // when the filter doesn't allow NULL options. 1182 if (!show_opts) 1183 break; 1184 1185 if (filters[i].options == NULL) { 1186 if (!filter_name_map[j].allow_null) { 1187 // Filter-specific options 1188 // are missing but with 1189 // this filter the options 1190 // structure is mandatory. 1191 str_free(&dest, allocator); 1192 return LZMA_OPTIONS_ERROR; 1193 } 1194 1195 // .options is allowed to be NULL. 1196 // There is no need to add any 1197 // options to the string. 1198 break; 1199 } 1200 1201 // Options structure is available. Add 1202 // the filter options to the string. 1203 const size_t optmap_count 1204 = (flags & LZMA_STR_ENCODER) 1205 ? filter_name_map[j].strfy_encoder 1206 : filter_name_map[j].strfy_decoder; 1207 strfy_filter(&dest, opt_delim, 1208 filter_name_map[j].optmap, 1209 optmap_count, 1210 filters[i].options); 1211 break; 1212 } 1213 1214 ++j; 1215 } 1216 } 1217 1218 return str_finish(output_str, &dest, allocator); 1219 } 1220 1221 1222 extern LZMA_API(lzma_ret) 1223 lzma_str_list_filters(char **output_str, lzma_vli filter_id, uint32_t flags, 1224 const lzma_allocator *allocator) 1225 { 1226 // On error *output_str is always set to NULL. 1227 // Do it as the very first step. 1228 if (output_str == NULL) 1229 return LZMA_PROG_ERROR; 1230 1231 *output_str = NULL; 1232 1233 // Validate the flags. 1234 const uint32_t supported_flags 1235 = LZMA_STR_ALL_FILTERS 1236 | LZMA_STR_ENCODER 1237 | LZMA_STR_DECODER 1238 | LZMA_STR_GETOPT_LONG; 1239 1240 if (flags & ~supported_flags) 1241 return LZMA_OPTIONS_ERROR; 1242 1243 // Allocate memory for the output string. 1244 lzma_str dest; 1245 return_if_error(str_init(&dest, allocator)); 1246 1247 // If only listing the filter names then separate them with spaces. 1248 // Otherwise use newlines. 1249 const bool show_opts = (flags & (LZMA_STR_ENCODER | LZMA_STR_DECODER)); 1250 const char *filter_delim = show_opts ? "\n" : " "; 1251 1252 const char *opt_delim = (flags & LZMA_STR_GETOPT_LONG) ? "=" : ":"; 1253 bool first_filter_printed = false; 1254 1255 for (size_t i = 0; i < ARRAY_SIZE(filter_name_map); ++i) { 1256 // If we are printing only one filter then skip others. 1257 if (filter_id != LZMA_VLI_UNKNOWN 1258 && filter_id != filter_name_map[i].id) 1259 continue; 1260 1261 // If we are printing only .xz filters then skip the others. 1262 if (filter_name_map[i].id >= LZMA_FILTER_RESERVED_START 1263 && (flags & LZMA_STR_ALL_FILTERS) == 0 1264 && filter_id == LZMA_VLI_UNKNOWN) 1265 continue; 1266 1267 // Add a new line if this isn't the first filter being 1268 // written to the string. 1269 if (first_filter_printed) 1270 str_append_str(&dest, filter_delim); 1271 1272 first_filter_printed = true; 1273 1274 if (flags & LZMA_STR_GETOPT_LONG) 1275 str_append_str(&dest, "--"); 1276 1277 str_append_str(&dest, filter_name_map[i].name); 1278 1279 // If only the filter names were wanted then continue 1280 // to the next filter. 1281 if (!show_opts) 1282 continue; 1283 1284 const option_map *optmap = filter_name_map[i].optmap; 1285 const char *d = opt_delim; 1286 1287 const size_t end = (flags & LZMA_STR_ENCODER) 1288 ? filter_name_map[i].strfy_encoder 1289 : filter_name_map[i].strfy_decoder; 1290 1291 for (size_t j = 0; j < end; ++j) { 1292 // The first option is delimited from the filter 1293 // name using "=" or ":" and the rest of the options 1294 // are separated with ",". 1295 str_append_str(&dest, d); 1296 d = ","; 1297 1298 // optname=<possible_values> 1299 str_append_str(&dest, optmap[j].name); 1300 str_append_str(&dest, "=<"); 1301 1302 if (optmap[j].type == OPTMAP_TYPE_LZMA_PRESET) { 1303 // LZMA1/2 preset has its custom help string. 1304 str_append_str(&dest, LZMA12_PRESET_STR); 1305 } else if (optmap[j].flags 1306 & OPTMAP_USE_NAME_VALUE_MAP) { 1307 // Separate the possible option values by "|". 1308 const name_value_map *m = optmap[j].u.map; 1309 for (size_t k = 0; m[k].name[0] != '\0'; ++k) { 1310 if (k > 0) 1311 str_append_str(&dest, "|"); 1312 1313 str_append_str(&dest, m[k].name); 1314 } 1315 } else { 1316 // Integer range is shown as min-max. 1317 const bool use_byte_suffix = optmap[j].flags 1318 & OPTMAP_USE_BYTE_SUFFIX; 1319 str_append_u32(&dest, optmap[j].u.range.min, 1320 use_byte_suffix); 1321 str_append_str(&dest, "-"); 1322 str_append_u32(&dest, optmap[j].u.range.max, 1323 use_byte_suffix); 1324 } 1325 1326 str_append_str(&dest, ">"); 1327 } 1328 } 1329 1330 // If no filters were added to the string then it must be because 1331 // the caller provided an unsupported Filter ID. 1332 if (!first_filter_printed) { 1333 str_free(&dest, allocator); 1334 return LZMA_OPTIONS_ERROR; 1335 } 1336 1337 return str_finish(output_str, &dest, allocator); 1338 } 1339