1 // SPDX-License-Identifier: 0BSD 2 3 /////////////////////////////////////////////////////////////////////////////// 4 // 5 /// \file string_conversion.c 6 /// \brief Conversion of strings to filter chain and vice versa 7 // 8 // Author: Lasse Collin 9 // 10 /////////////////////////////////////////////////////////////////////////////// 11 12 #include "filter_common.h" 13 14 15 ///////////////////// 16 // String building // 17 ///////////////////// 18 19 /// How much memory to allocate for strings. For now, no realloc is used 20 /// so this needs to be big enough even though there of course is 21 /// an overflow check still. 22 /// 23 /// FIXME? Using a fixed size is wasteful if the application doesn't free 24 /// the string fairly quickly but this can be improved later if needed. 25 #define STR_ALLOC_SIZE 800 26 27 28 typedef struct { 29 char *buf; 30 size_t pos; 31 } lzma_str; 32 33 34 static lzma_ret 35 str_init(lzma_str *str, const lzma_allocator *allocator) 36 { 37 str->buf = lzma_alloc(STR_ALLOC_SIZE, allocator); 38 if (str->buf == NULL) 39 return LZMA_MEM_ERROR; 40 41 str->pos = 0; 42 return LZMA_OK; 43 } 44 45 46 static void 47 str_free(lzma_str *str, const lzma_allocator *allocator) 48 { 49 lzma_free(str->buf, allocator); 50 return; 51 } 52 53 54 static bool 55 str_is_full(const lzma_str *str) 56 { 57 return str->pos == STR_ALLOC_SIZE - 1; 58 } 59 60 61 static lzma_ret 62 str_finish(char **dest, lzma_str *str, const lzma_allocator *allocator) 63 { 64 if (str_is_full(str)) { 65 // The preallocated buffer was too small. 66 // This shouldn't happen as STR_ALLOC_SIZE should 67 // be adjusted if new filters are added. 68 lzma_free(str->buf, allocator); 69 *dest = NULL; 70 assert(0); 71 return LZMA_PROG_ERROR; 72 } 73 74 str->buf[str->pos] = '\0'; 75 *dest = str->buf; 76 return LZMA_OK; 77 } 78 79 80 static void 81 str_append_str(lzma_str *str, const char *s) 82 { 83 const size_t len = strlen(s); 84 const size_t limit = STR_ALLOC_SIZE - 1 - str->pos; 85 const size_t copy_size = my_min(len, limit); 86 87 memcpy(str->buf + str->pos, s, copy_size); 88 str->pos += copy_size; 89 return; 90 } 91 92 93 static void 94 str_append_u32(lzma_str *str, uint32_t v, bool use_byte_suffix) 95 { 96 if (v == 0) { 97 str_append_str(str, "0"); 98 } else { 99 // NOTE: Don't use plain "B" because xz and the parser in this 100 // file don't support it and at glance it may look like 8 101 // (there cannot be a space before the suffix). 102 static const char suffixes[4][4] = { "", "KiB", "MiB", "GiB" }; 103 104 size_t suf = 0; 105 if (use_byte_suffix) { 106 while ((v & 1023) == 0 107 && suf < ARRAY_SIZE(suffixes) - 1) { 108 v >>= 10; 109 ++suf; 110 } 111 } 112 113 // UINT32_MAX in base 10 would need 10 + 1 bytes. Remember 114 // that initializing to "" initializes all elements to 115 // zero so '\0'-termination gets handled by this. 116 char buf[16] = ""; 117 size_t pos = sizeof(buf) - 1; 118 119 do { 120 buf[--pos] = '0' + (v % 10); 121 v /= 10; 122 } while (v != 0); 123 124 str_append_str(str, buf + pos); 125 str_append_str(str, suffixes[suf]); 126 } 127 128 return; 129 } 130 131 132 ////////////////////////////////////////////// 133 // Parsing and stringification declarations // 134 ////////////////////////////////////////////// 135 136 /// Maximum length for filter and option names. 137 /// 11 chars + terminating '\0' + sizeof(uint32_t) = 16 bytes 138 #define NAME_LEN_MAX 11 139 140 141 /// For option_map.flags: Use .u.map to do convert the input value 142 /// to an integer. Without this flag, .u.range.{min,max} are used 143 /// as the allowed range for the integer. 144 #define OPTMAP_USE_NAME_VALUE_MAP 0x01 145 146 /// For option_map.flags: Allow KiB/MiB/GiB in input string and use them in 147 /// the stringified output if the value is an exact multiple of these. 148 /// This is used e.g. for LZMA1/2 dictionary size. 149 #define OPTMAP_USE_BYTE_SUFFIX 0x02 150 151 /// For option_map.flags: If the integer value is zero then this option 152 /// won't be included in the stringified output. It's used e.g. for 153 /// BCJ filter start offset which usually is zero. 154 #define OPTMAP_NO_STRFY_ZERO 0x04 155 156 /// Possible values for option_map.type. Since OPTMAP_TYPE_UINT32 is 0, 157 /// it doesn't need to be specified in the initializers as it is 158 /// the implicit value. 159 enum { 160 OPTMAP_TYPE_UINT32, 161 OPTMAP_TYPE_LZMA_MODE, 162 OPTMAP_TYPE_LZMA_MATCH_FINDER, 163 OPTMAP_TYPE_LZMA_PRESET, 164 }; 165 166 167 /// This is for mapping string values in options to integers. 168 /// The last element of an array must have "" as the name. 169 /// It's used e.g. for match finder names in LZMA1/2. 170 typedef struct { 171 const char name[NAME_LEN_MAX + 1]; 172 const uint32_t value; 173 } name_value_map; 174 175 176 /// Each filter that has options needs an array of option_map structures. 177 /// The array doesn't need to be terminated as the functions take the 178 /// length of the array as an argument. 179 /// 180 /// When converting a string to filter options structure, option values 181 /// will be handled in a few different ways: 182 /// 183 /// (1) If .type equals OPTMAP_TYPE_LZMA_PRESET then LZMA1/2 preset string 184 /// is handled specially. 185 /// 186 /// (2) If .flags has OPTMAP_USE_NAME_VALUE_MAP set then the string is 187 /// converted to an integer using the name_value_map pointed by .u.map. 188 /// The last element in .u.map must have .name = "" as the terminator. 189 /// 190 /// (3) Otherwise the string is treated as a non-negative unsigned decimal 191 /// integer which must be in the range set in .u.range. If .flags has 192 /// OPTMAP_USE_BYTE_SUFFIX then KiB, MiB, and GiB suffixes are allowed. 193 /// 194 /// The integer value from (2) or (3) is then stored to filter_options 195 /// at the offset specified in .offset using the type specified in .type 196 /// (default is uint32_t). 197 /// 198 /// Stringifying a filter is done by processing a given number of options 199 /// in order from the beginning of an option_map array. The integer is 200 /// read from filter_options at .offset using the type from .type. 201 /// 202 /// If the integer is zero and .flags has OPTMAP_NO_STRFY_ZERO then the 203 /// option is skipped. 204 /// 205 /// If .flags has OPTMAP_USE_NAME_VALUE_MAP set then .u.map will be used 206 /// to convert the option to a string. If the map doesn't contain a string 207 /// for the integer value then "UNKNOWN" is used. 208 /// 209 /// If .flags doesn't have OPTMAP_USE_NAME_VALUE_MAP set then the integer is 210 /// converted to a decimal value. If OPTMAP_USE_BYTE_SUFFIX is used then KiB, 211 /// MiB, or GiB suffix is used if the value is an exact multiple of these. 212 /// Plain "B" suffix is never used. 213 typedef struct { 214 char name[NAME_LEN_MAX + 1]; 215 uint8_t type; 216 uint8_t flags; 217 uint16_t offset; 218 219 union { 220 struct { 221 uint32_t min; 222 uint32_t max; 223 } range; 224 225 const name_value_map *map; 226 } u; 227 } option_map; 228 229 230 static const char *parse_options(const char **const str, const char *str_end, 231 void *filter_options, 232 const option_map *const optmap, const size_t optmap_size); 233 234 235 ///////// 236 // BCJ // 237 ///////// 238 239 #if defined(HAVE_ENCODER_X86) \ 240 || defined(HAVE_DECODER_X86) \ 241 || defined(HAVE_ENCODER_ARM) \ 242 || defined(HAVE_DECODER_ARM) \ 243 || defined(HAVE_ENCODER_ARMTHUMB) \ 244 || defined(HAVE_DECODER_ARMTHUMB) \ 245 || defined(HAVE_ENCODER_ARM64) \ 246 || defined(HAVE_DECODER_ARM64) \ 247 || defined(HAVE_ENCODER_POWERPC) \ 248 || defined(HAVE_DECODER_POWERPC) \ 249 || defined(HAVE_ENCODER_IA64) \ 250 || defined(HAVE_DECODER_IA64) \ 251 || defined(HAVE_ENCODER_SPARC) \ 252 || defined(HAVE_DECODER_SPARC) \ 253 || defined(HAVE_ENCODER_RISCV) \ 254 || defined(HAVE_DECODER_RISCV) 255 static const option_map bcj_optmap[] = { 256 { 257 .name = "start", 258 .flags = OPTMAP_NO_STRFY_ZERO | OPTMAP_USE_BYTE_SUFFIX, 259 .offset = offsetof(lzma_options_bcj, start_offset), 260 .u.range.min = 0, 261 .u.range.max = UINT32_MAX, 262 } 263 }; 264 265 266 static const char * 267 parse_bcj(const char **const str, const char *str_end, void *filter_options) 268 { 269 // filter_options was zeroed on allocation and that is enough 270 // for the default value. 271 return parse_options(str, str_end, filter_options, 272 bcj_optmap, ARRAY_SIZE(bcj_optmap)); 273 } 274 #endif 275 276 277 /////////// 278 // Delta // 279 /////////// 280 281 #if defined(HAVE_ENCODER_DELTA) || defined(HAVE_DECODER_DELTA) 282 static const option_map delta_optmap[] = { 283 { 284 .name = "dist", 285 .offset = offsetof(lzma_options_delta, dist), 286 .u.range.min = LZMA_DELTA_DIST_MIN, 287 .u.range.max = LZMA_DELTA_DIST_MAX, 288 } 289 }; 290 291 292 static const char * 293 parse_delta(const char **const str, const char *str_end, void *filter_options) 294 { 295 lzma_options_delta *opts = filter_options; 296 opts->type = LZMA_DELTA_TYPE_BYTE; 297 opts->dist = LZMA_DELTA_DIST_MIN; 298 299 return parse_options(str, str_end, filter_options, 300 delta_optmap, ARRAY_SIZE(delta_optmap)); 301 } 302 #endif 303 304 305 /////////////////// 306 // LZMA1 & LZMA2 // 307 /////////////////// 308 309 /// Help string for presets 310 #define LZMA12_PRESET_STR "0-9[e]" 311 312 313 static const char * 314 parse_lzma12_preset(const char **const str, const char *str_end, 315 uint32_t *preset) 316 { 317 assert(*str < str_end); 318 *preset = (uint32_t)(**str - '0'); 319 320 // NOTE: Remember to update LZMA12_PRESET_STR if this is modified! 321 while (++*str < str_end) { 322 switch (**str) { 323 case 'e': 324 *preset |= LZMA_PRESET_EXTREME; 325 break; 326 327 default: 328 return "Unsupported preset flag"; 329 } 330 } 331 332 return NULL; 333 } 334 335 336 static const char * 337 set_lzma12_preset(const char **const str, const char *str_end, 338 void *filter_options) 339 { 340 uint32_t preset; 341 const char *errmsg = parse_lzma12_preset(str, str_end, &preset); 342 if (errmsg != NULL) 343 return errmsg; 344 345 lzma_options_lzma *opts = filter_options; 346 if (lzma_lzma_preset(opts, preset)) 347 return "Unsupported preset"; 348 349 return NULL; 350 } 351 352 353 static const name_value_map lzma12_mode_map[] = { 354 { "fast", LZMA_MODE_FAST }, 355 { "normal", LZMA_MODE_NORMAL }, 356 { "", 0 } 357 }; 358 359 360 static const name_value_map lzma12_mf_map[] = { 361 { "hc3", LZMA_MF_HC3 }, 362 { "hc4", LZMA_MF_HC4 }, 363 { "bt2", LZMA_MF_BT2 }, 364 { "bt3", LZMA_MF_BT3 }, 365 { "bt4", LZMA_MF_BT4 }, 366 { "", 0 } 367 }; 368 369 370 static const option_map lzma12_optmap[] = { 371 { 372 .name = "preset", 373 .type = OPTMAP_TYPE_LZMA_PRESET, 374 }, { 375 .name = "dict", 376 .flags = OPTMAP_USE_BYTE_SUFFIX, 377 .offset = offsetof(lzma_options_lzma, dict_size), 378 .u.range.min = LZMA_DICT_SIZE_MIN, 379 // FIXME? The max is really max for encoding but decoding 380 // would allow 4 GiB - 1 B. 381 .u.range.max = (UINT32_C(1) << 30) + (UINT32_C(1) << 29), 382 }, { 383 .name = "lc", 384 .offset = offsetof(lzma_options_lzma, lc), 385 .u.range.min = LZMA_LCLP_MIN, 386 .u.range.max = LZMA_LCLP_MAX, 387 }, { 388 .name = "lp", 389 .offset = offsetof(lzma_options_lzma, lp), 390 .u.range.min = LZMA_LCLP_MIN, 391 .u.range.max = LZMA_LCLP_MAX, 392 }, { 393 .name = "pb", 394 .offset = offsetof(lzma_options_lzma, pb), 395 .u.range.min = LZMA_PB_MIN, 396 .u.range.max = LZMA_PB_MAX, 397 }, { 398 .name = "mode", 399 .type = OPTMAP_TYPE_LZMA_MODE, 400 .flags = OPTMAP_USE_NAME_VALUE_MAP, 401 .offset = offsetof(lzma_options_lzma, mode), 402 .u.map = lzma12_mode_map, 403 }, { 404 .name = "nice", 405 .offset = offsetof(lzma_options_lzma, nice_len), 406 .u.range.min = 2, 407 .u.range.max = 273, 408 }, { 409 .name = "mf", 410 .type = OPTMAP_TYPE_LZMA_MATCH_FINDER, 411 .flags = OPTMAP_USE_NAME_VALUE_MAP, 412 .offset = offsetof(lzma_options_lzma, mf), 413 .u.map = lzma12_mf_map, 414 }, { 415 .name = "depth", 416 .offset = offsetof(lzma_options_lzma, depth), 417 .u.range.min = 0, 418 .u.range.max = UINT32_MAX, 419 } 420 }; 421 422 423 static const char * 424 parse_lzma12(const char **const str, const char *str_end, void *filter_options) 425 { 426 lzma_options_lzma *opts = filter_options; 427 428 // It cannot fail. 429 const bool preset_ret = lzma_lzma_preset(opts, LZMA_PRESET_DEFAULT); 430 assert(!preset_ret); 431 (void)preset_ret; 432 433 const char *errmsg = parse_options(str, str_end, filter_options, 434 lzma12_optmap, ARRAY_SIZE(lzma12_optmap)); 435 if (errmsg != NULL) 436 return errmsg; 437 438 if (opts->lc + opts->lp > LZMA_LCLP_MAX) 439 return "The sum of lc and lp must not exceed 4"; 440 441 return NULL; 442 } 443 444 445 ///////////////////////////////////////// 446 // Generic parsing and stringification // 447 ///////////////////////////////////////// 448 449 static const struct { 450 /// Name of the filter 451 char name[NAME_LEN_MAX + 1]; 452 453 /// For lzma_str_to_filters: 454 /// Size of the filter-specific options structure. 455 uint32_t opts_size; 456 457 /// Filter ID 458 lzma_vli id; 459 460 /// For lzma_str_to_filters: 461 /// Function to parse the filter-specific options. The filter_options 462 /// will already have been allocated using lzma_alloc_zero(). 463 const char *(*parse)(const char **str, const char *str_end, 464 void *filter_options); 465 466 /// For lzma_str_from_filters: 467 /// If the flag LZMA_STR_ENCODER is used then the first 468 /// strfy_encoder elements of optmap are stringified. 469 /// With LZMA_STR_DECODER strfy_decoder is used. 470 /// Currently encoders use all options that decoders do but if 471 /// that changes then this needs to be changed too, for example, 472 /// add a new OPTMAP flag to skip printing some decoder-only options. 473 const option_map *optmap; 474 uint8_t strfy_encoder; 475 uint8_t strfy_decoder; 476 477 /// For lzma_str_from_filters: 478 /// If true, lzma_filter.options is allowed to be NULL. In that case, 479 /// only the filter name is printed without any options. 480 bool allow_null; 481 482 } filter_name_map[] = { 483 #if defined (HAVE_ENCODER_LZMA1) || defined(HAVE_DECODER_LZMA1) 484 { "lzma1", sizeof(lzma_options_lzma), LZMA_FILTER_LZMA1, 485 &parse_lzma12, lzma12_optmap, 9, 5, false }, 486 #endif 487 488 #if defined(HAVE_ENCODER_LZMA2) || defined(HAVE_DECODER_LZMA2) 489 { "lzma2", sizeof(lzma_options_lzma), LZMA_FILTER_LZMA2, 490 &parse_lzma12, lzma12_optmap, 9, 2, false }, 491 #endif 492 493 #if defined(HAVE_ENCODER_X86) || defined(HAVE_DECODER_X86) 494 { "x86", sizeof(lzma_options_bcj), LZMA_FILTER_X86, 495 &parse_bcj, bcj_optmap, 1, 1, true }, 496 #endif 497 498 #if defined(HAVE_ENCODER_ARM) || defined(HAVE_DECODER_ARM) 499 { "arm", sizeof(lzma_options_bcj), LZMA_FILTER_ARM, 500 &parse_bcj, bcj_optmap, 1, 1, true }, 501 #endif 502 503 #if defined(HAVE_ENCODER_ARMTHUMB) || defined(HAVE_DECODER_ARMTHUMB) 504 { "armthumb", sizeof(lzma_options_bcj), LZMA_FILTER_ARMTHUMB, 505 &parse_bcj, bcj_optmap, 1, 1, true }, 506 #endif 507 508 #if defined(HAVE_ENCODER_ARM64) || defined(HAVE_DECODER_ARM64) 509 { "arm64", sizeof(lzma_options_bcj), LZMA_FILTER_ARM64, 510 &parse_bcj, bcj_optmap, 1, 1, true }, 511 #endif 512 513 #if defined(HAVE_ENCODER_RISCV) || defined(HAVE_DECODER_RISCV) 514 { "riscv", sizeof(lzma_options_bcj), LZMA_FILTER_RISCV, 515 &parse_bcj, bcj_optmap, 1, 1, true }, 516 #endif 517 518 #if defined(HAVE_ENCODER_POWERPC) || defined(HAVE_DECODER_POWERPC) 519 { "powerpc", sizeof(lzma_options_bcj), LZMA_FILTER_POWERPC, 520 &parse_bcj, bcj_optmap, 1, 1, true }, 521 #endif 522 523 #if defined(HAVE_ENCODER_IA64) || defined(HAVE_DECODER_IA64) 524 { "ia64", sizeof(lzma_options_bcj), LZMA_FILTER_IA64, 525 &parse_bcj, bcj_optmap, 1, 1, true }, 526 #endif 527 528 #if defined(HAVE_ENCODER_SPARC) || defined(HAVE_DECODER_SPARC) 529 { "sparc", sizeof(lzma_options_bcj), LZMA_FILTER_SPARC, 530 &parse_bcj, bcj_optmap, 1, 1, true }, 531 #endif 532 533 #if defined(HAVE_ENCODER_DELTA) || defined(HAVE_DECODER_DELTA) 534 { "delta", sizeof(lzma_options_delta), LZMA_FILTER_DELTA, 535 &parse_delta, delta_optmap, 1, 1, false }, 536 #endif 537 }; 538 539 540 /// Decodes options from a string for one filter (name1=value1,name2=value2). 541 /// Caller must have allocated memory for filter_options already and set 542 /// the initial default values. This is called from the filter-specific 543 /// parse_* functions. 544 /// 545 /// The input string starts at *str and the address in str_end is the first 546 /// char that is not part of the string anymore. So no '\0' terminator is 547 /// used. *str is advanced every time something has been decoded successfully. 548 static const char * 549 parse_options(const char **const str, const char *str_end, 550 void *filter_options, 551 const option_map *const optmap, const size_t optmap_size) 552 { 553 while (*str < str_end && **str != '\0') { 554 // Each option is of the form name=value. 555 // Commas (',') separate options. Extra commas are ignored. 556 // Ignoring extra commas makes it simpler if an optional 557 // option stored in a shell variable which can be empty. 558 if (**str == ',') { 559 ++*str; 560 continue; 561 } 562 563 // Find where the next name=value ends. 564 const size_t str_len = (size_t)(str_end - *str); 565 const char *name_eq_value_end = memchr(*str, ',', str_len); 566 if (name_eq_value_end == NULL) 567 name_eq_value_end = str_end; 568 569 const char *equals_sign = memchr(*str, '=', 570 (size_t)(name_eq_value_end - *str)); 571 572 // Fail if the '=' wasn't found or the option name is missing 573 // (the first char is '='). 574 if (equals_sign == NULL || **str == '=') 575 return "Options must be 'name=value' pairs separated " 576 "with commas"; 577 578 // Reject a too long option name so that the memcmp() 579 // in the loop below won't read past the end of the 580 // string in optmap[i].name. 581 const size_t name_len = (size_t)(equals_sign - *str); 582 if (name_len > NAME_LEN_MAX) 583 return "Unknown option name"; 584 585 // Find the option name from optmap[]. 586 size_t i = 0; 587 while (true) { 588 if (i == optmap_size) 589 return "Unknown option name"; 590 591 if (memcmp(*str, optmap[i].name, name_len) == 0 592 && optmap[i].name[name_len] == '\0') 593 break; 594 595 ++i; 596 } 597 598 // The input string is good at least until the start of 599 // the option value. 600 *str = equals_sign + 1; 601 602 // The code assumes that the option value isn't an empty 603 // string so check it here. 604 const size_t value_len = (size_t)(name_eq_value_end - *str); 605 if (value_len == 0) 606 return "Option value cannot be empty"; 607 608 // LZMA1/2 preset has its own parsing function. 609 if (optmap[i].type == OPTMAP_TYPE_LZMA_PRESET) { 610 const char *errmsg = set_lzma12_preset(str, 611 name_eq_value_end, filter_options); 612 if (errmsg != NULL) 613 return errmsg; 614 615 continue; 616 } 617 618 // It's an integer value. 619 uint32_t v; 620 if (optmap[i].flags & OPTMAP_USE_NAME_VALUE_MAP) { 621 // The integer is picked from a string-to-integer map. 622 // 623 // Reject a too long value string so that the memcmp() 624 // in the loop below won't read past the end of the 625 // string in optmap[i].u.map[j].name. 626 if (value_len > NAME_LEN_MAX) 627 return "Invalid option value"; 628 629 const name_value_map *map = optmap[i].u.map; 630 size_t j = 0; 631 while (true) { 632 // The array is terminated with an empty name. 633 if (map[j].name[0] == '\0') 634 return "Invalid option value"; 635 636 if (memcmp(*str, map[j].name, value_len) == 0 637 && map[j].name[value_len] 638 == '\0') { 639 v = map[j].value; 640 break; 641 } 642 643 ++j; 644 } 645 } else if (**str < '0' || **str > '9') { 646 // Note that "max" isn't supported while it is 647 // supported in xz. It's not useful here. 648 return "Value is not a non-negative decimal integer"; 649 } else { 650 // strtoul() has locale-specific behavior so it cannot 651 // be relied on to get reproducible results since we 652 // cannot change the locate in a thread-safe library. 653 // It also needs '\0'-termination. 654 // 655 // Use a temporary pointer so that *str will point 656 // to the beginning of the value string in case 657 // an error occurs. 658 const char *p = *str; 659 v = 0; 660 do { 661 if (v > UINT32_MAX / 10) 662 return "Value out of range"; 663 664 v *= 10; 665 666 const uint32_t add = (uint32_t)(*p - '0'); 667 if (UINT32_MAX - add < v) 668 return "Value out of range"; 669 670 v += add; 671 ++p; 672 } while (p < name_eq_value_end 673 && *p >= '0' && *p <= '9'); 674 675 if (p < name_eq_value_end) { 676 // Remember this position so that it can be 677 // used for error messages that are 678 // specifically about the suffix. (Out of 679 // range values are about the whole value 680 // and those error messages point to the 681 // beginning of the number part, 682 // not to the suffix.) 683 const char *multiplier_start = p; 684 685 // If multiplier suffix shouldn't be used 686 // then don't allow them even if the value 687 // would stay within limits. This is a somewhat 688 // unnecessary check but it rejects silly 689 // things like lzma2:pb=0MiB which xz allows. 690 if ((optmap[i].flags & OPTMAP_USE_BYTE_SUFFIX) 691 == 0) { 692 *str = multiplier_start; 693 return "This option does not support " 694 "any integer suffixes"; 695 } 696 697 uint32_t shift; 698 699 switch (*p) { 700 case 'k': 701 case 'K': 702 shift = 10; 703 break; 704 705 case 'm': 706 case 'M': 707 shift = 20; 708 break; 709 710 case 'g': 711 case 'G': 712 shift = 30; 713 break; 714 715 default: 716 *str = multiplier_start; 717 return "Invalid multiplier suffix " 718 "(KiB, MiB, or GiB)"; 719 } 720 721 ++p; 722 723 // Allow "M", "Mi", "MB", "MiB" and the same 724 // for the other five characters from the 725 // switch-statement above. All are handled 726 // as base-2 (perhaps a mistake, perhaps not). 727 // Note that 'i' and 'B' are case sensitive. 728 if (p < name_eq_value_end && *p == 'i') 729 ++p; 730 731 if (p < name_eq_value_end && *p == 'B') 732 ++p; 733 734 // Now we must have no chars remaining. 735 if (p < name_eq_value_end) { 736 *str = multiplier_start; 737 return "Invalid multiplier suffix " 738 "(KiB, MiB, or GiB)"; 739 } 740 741 if (v > (UINT32_MAX >> shift)) 742 return "Value out of range"; 743 744 v <<= shift; 745 } 746 747 if (v < optmap[i].u.range.min 748 || v > optmap[i].u.range.max) 749 return "Value out of range"; 750 } 751 752 // Set the value in filter_options. Enums are handled 753 // specially since the underlying type isn't the same 754 // as uint32_t on all systems. 755 void *ptr = (char *)filter_options + optmap[i].offset; 756 switch (optmap[i].type) { 757 case OPTMAP_TYPE_LZMA_MODE: 758 *(lzma_mode *)ptr = (lzma_mode)v; 759 break; 760 761 case OPTMAP_TYPE_LZMA_MATCH_FINDER: 762 *(lzma_match_finder *)ptr = (lzma_match_finder)v; 763 break; 764 765 default: 766 *(uint32_t *)ptr = v; 767 break; 768 } 769 770 // This option has been successfully handled. 771 *str = name_eq_value_end; 772 } 773 774 // No errors. 775 return NULL; 776 } 777 778 779 /// Finds the name of the filter at the beginning of the string and 780 /// calls filter_name_map[i].parse() to decode the filter-specific options. 781 /// The caller must have set str_end so that exactly one filter and its 782 /// options are present without any trailing characters. 783 static const char * 784 parse_filter(const char **const str, const char *str_end, lzma_filter *filter, 785 const lzma_allocator *allocator, bool only_xz) 786 { 787 // Search for a colon or equals sign that would separate the filter 788 // name from filter options. If neither is found, then the input 789 // string only contains a filter name and there are no options. 790 // 791 // First assume that a colon or equals sign won't be found: 792 const char *name_end = str_end; 793 const char *opts_start = str_end; 794 795 for (const char *p = *str; p < str_end; ++p) { 796 if (*p == ':' || *p == '=') { 797 name_end = p; 798 799 // Filter options (name1=value1,name2=value2,...) 800 // begin after the colon or equals sign. 801 opts_start = p + 1; 802 break; 803 } 804 } 805 806 // Reject a too long filter name so that the memcmp() 807 // in the loop below won't read past the end of the 808 // string in filter_name_map[i].name. 809 const size_t name_len = (size_t)(name_end - *str); 810 if (name_len > NAME_LEN_MAX) 811 return "Unknown filter name"; 812 813 for (size_t i = 0; i < ARRAY_SIZE(filter_name_map); ++i) { 814 if (memcmp(*str, filter_name_map[i].name, name_len) == 0 815 && filter_name_map[i].name[name_len] == '\0') { 816 if (only_xz && filter_name_map[i].id 817 >= LZMA_FILTER_RESERVED_START) 818 return "This filter cannot be used in " 819 "the .xz format"; 820 821 // Allocate the filter-specific options and 822 // initialize the memory with zeros. 823 void *options = lzma_alloc_zero( 824 filter_name_map[i].opts_size, 825 allocator); 826 if (options == NULL) 827 return "Memory allocation failed"; 828 829 // Filter name was found so the input string is good 830 // at least this far. 831 *str = opts_start; 832 833 const char *errmsg = filter_name_map[i].parse( 834 str, str_end, options); 835 if (errmsg != NULL) { 836 lzma_free(options, allocator); 837 return errmsg; 838 } 839 840 // *filter is modified only when parsing is successful. 841 filter->id = filter_name_map[i].id; 842 filter->options = options; 843 return NULL; 844 } 845 } 846 847 return "Unknown filter name"; 848 } 849 850 851 /// Converts the string to a filter chain (array of lzma_filter structures). 852 /// 853 /// *str is advanced every time something has been decoded successfully. 854 /// This way the caller knows where in the string a possible error occurred. 855 static const char * 856 str_to_filters(const char **const str, lzma_filter *filters, uint32_t flags, 857 const lzma_allocator *allocator) 858 { 859 const char *errmsg; 860 861 // Skip leading spaces. 862 while (**str == ' ') 863 ++*str; 864 865 if (**str == '\0') 866 return "Empty string is not allowed, " 867 "try \"6\" if a default value is needed"; 868 869 // Detect the type of the string. 870 // 871 // A string beginning with a digit or a string beginning with 872 // one dash and a digit are treated as presets. Trailing spaces 873 // will be ignored too (leading spaces were already ignored above). 874 // 875 // For example, "6", "7 ", "-9e", or " -3 " are treated as presets. 876 // Strings like "-" or "- " aren't preset. 877 #define MY_IS_DIGIT(c) ((c) >= '0' && (c) <= '9') 878 if (MY_IS_DIGIT(**str) || (**str == '-' && MY_IS_DIGIT((*str)[1]))) { 879 if (**str == '-') 880 ++*str; 881 882 // Ignore trailing spaces. 883 const size_t str_len = strlen(*str); 884 const char *str_end = memchr(*str, ' ', str_len); 885 if (str_end != NULL) { 886 // There is at least one trailing space. Check that 887 // there are no chars other than spaces. 888 for (size_t i = 1; str_end[i] != '\0'; ++i) 889 if (str_end[i] != ' ') 890 return "Unsupported preset"; 891 } else { 892 // There are no trailing spaces. Use the whole string. 893 str_end = *str + str_len; 894 } 895 896 uint32_t preset; 897 errmsg = parse_lzma12_preset(str, str_end, &preset); 898 if (errmsg != NULL) 899 return errmsg; 900 901 lzma_options_lzma *opts = lzma_alloc(sizeof(*opts), allocator); 902 if (opts == NULL) 903 return "Memory allocation failed"; 904 905 if (lzma_lzma_preset(opts, preset)) { 906 lzma_free(opts, allocator); 907 return "Unsupported preset"; 908 } 909 910 filters[0].id = LZMA_FILTER_LZMA2; 911 filters[0].options = opts; 912 filters[1].id = LZMA_VLI_UNKNOWN; 913 filters[1].options = NULL; 914 915 return NULL; 916 } 917 918 // Not a preset so it must be a filter chain. 919 // 920 // If LZMA_STR_ALL_FILTERS isn't used we allow only filters that 921 // can be used in .xz. 922 const bool only_xz = (flags & LZMA_STR_ALL_FILTERS) == 0; 923 924 // Use a temporary array so that we don't modify the caller-supplied 925 // one until we know that no errors occurred. 926 lzma_filter temp_filters[LZMA_FILTERS_MAX + 1]; 927 928 size_t i = 0; 929 do { 930 if (i == LZMA_FILTERS_MAX) { 931 errmsg = "The maximum number of filters is four"; 932 goto error; 933 } 934 935 // Skip "--" if present. 936 if ((*str)[0] == '-' && (*str)[1] == '-') 937 *str += 2; 938 939 // Locate the end of "filter:name1=value1,name2=value2", 940 // stopping at the first "--" or a single space. 941 const char *filter_end = *str; 942 while (filter_end[0] != '\0') { 943 if ((filter_end[0] == '-' && filter_end[1] == '-') 944 || filter_end[0] == ' ') 945 break; 946 947 ++filter_end; 948 } 949 950 // Inputs that have "--" at the end or "-- " in the middle 951 // will result in an empty filter name. 952 if (filter_end == *str) { 953 errmsg = "Filter name is missing"; 954 goto error; 955 } 956 957 errmsg = parse_filter(str, filter_end, &temp_filters[i], 958 allocator, only_xz); 959 if (errmsg != NULL) 960 goto error; 961 962 // Skip trailing spaces. 963 while (**str == ' ') 964 ++*str; 965 966 ++i; 967 } while (**str != '\0'); 968 969 // Seems to be good, terminate the array so that 970 // basic validation can be done. 971 temp_filters[i].id = LZMA_VLI_UNKNOWN; 972 temp_filters[i].options = NULL; 973 974 // Do basic validation if the application didn't prohibit it. 975 if ((flags & LZMA_STR_NO_VALIDATION) == 0) { 976 size_t dummy; 977 const lzma_ret ret = lzma_validate_chain(temp_filters, &dummy); 978 assert(ret == LZMA_OK || ret == LZMA_OPTIONS_ERROR); 979 if (ret != LZMA_OK) { 980 errmsg = "Invalid filter chain " 981 "('lzma2' missing at the end?)"; 982 goto error; 983 } 984 } 985 986 // All good. Copy the filters to the application supplied array. 987 memcpy(filters, temp_filters, (i + 1) * sizeof(lzma_filter)); 988 return NULL; 989 990 error: 991 // Free the filter options that were successfully decoded. 992 while (i-- > 0) 993 lzma_free(temp_filters[i].options, allocator); 994 995 return errmsg; 996 } 997 998 999 extern LZMA_API(const char *) 1000 lzma_str_to_filters(const char *str, int *error_pos, lzma_filter *filters, 1001 uint32_t flags, const lzma_allocator *allocator) 1002 { 1003 if (str == NULL || filters == NULL) 1004 return "Unexpected NULL pointer argument(s) " 1005 "to lzma_str_to_filters()"; 1006 1007 // Validate the flags. 1008 const uint32_t supported_flags 1009 = LZMA_STR_ALL_FILTERS 1010 | LZMA_STR_NO_VALIDATION; 1011 1012 if (flags & ~supported_flags) 1013 return "Unsupported flags to lzma_str_to_filters()"; 1014 1015 const char *used = str; 1016 const char *errmsg = str_to_filters(&used, filters, flags, allocator); 1017 1018 if (error_pos != NULL) { 1019 const size_t n = (size_t)(used - str); 1020 *error_pos = n > INT_MAX ? INT_MAX : (int)n; 1021 } 1022 1023 return errmsg; 1024 } 1025 1026 1027 /// Converts options of one filter to a string. 1028 /// 1029 /// The caller must have already put the filter name in the destination 1030 /// string. Since it is possible that no options will be needed, the caller 1031 /// won't have put a delimiter character (':' or '=') in the string yet. 1032 /// We will add it if at least one option will be added to the string. 1033 static void 1034 strfy_filter(lzma_str *dest, const char *delimiter, 1035 const option_map *optmap, size_t optmap_count, 1036 const void *filter_options) 1037 { 1038 for (size_t i = 0; i < optmap_count; ++i) { 1039 // No attempt is made to reverse LZMA1/2 preset. 1040 if (optmap[i].type == OPTMAP_TYPE_LZMA_PRESET) 1041 continue; 1042 1043 // All options have integer values, some just are mapped 1044 // to a string with a name_value_map. LZMA1/2 preset 1045 // isn't reversed back to preset=PRESET form. 1046 uint32_t v; 1047 const void *ptr 1048 = (const char *)filter_options + optmap[i].offset; 1049 switch (optmap[i].type) { 1050 case OPTMAP_TYPE_LZMA_MODE: 1051 v = *(const lzma_mode *)ptr; 1052 break; 1053 1054 case OPTMAP_TYPE_LZMA_MATCH_FINDER: 1055 v = *(const lzma_match_finder *)ptr; 1056 break; 1057 1058 default: 1059 v = *(const uint32_t *)ptr; 1060 break; 1061 } 1062 1063 // Skip this if this option should be omitted from 1064 // the string when the value is zero. 1065 if (v == 0 && (optmap[i].flags & OPTMAP_NO_STRFY_ZERO)) 1066 continue; 1067 1068 // Before the first option we add whatever delimiter 1069 // the caller gave us. For later options a comma is used. 1070 str_append_str(dest, delimiter); 1071 delimiter = ","; 1072 1073 // Add the option name and equals sign. 1074 str_append_str(dest, optmap[i].name); 1075 str_append_str(dest, "="); 1076 1077 if (optmap[i].flags & OPTMAP_USE_NAME_VALUE_MAP) { 1078 const name_value_map *map = optmap[i].u.map; 1079 size_t j = 0; 1080 while (true) { 1081 if (map[j].name[0] == '\0') { 1082 str_append_str(dest, "UNKNOWN"); 1083 break; 1084 } 1085 1086 if (map[j].value == v) { 1087 str_append_str(dest, map[j].name); 1088 break; 1089 } 1090 1091 ++j; 1092 } 1093 } else { 1094 str_append_u32(dest, v, 1095 optmap[i].flags & OPTMAP_USE_BYTE_SUFFIX); 1096 } 1097 } 1098 1099 return; 1100 } 1101 1102 1103 extern LZMA_API(lzma_ret) 1104 lzma_str_from_filters(char **output_str, const lzma_filter *filters, 1105 uint32_t flags, const lzma_allocator *allocator) 1106 { 1107 // On error *output_str is always set to NULL. 1108 // Do it as the very first step. 1109 if (output_str == NULL) 1110 return LZMA_PROG_ERROR; 1111 1112 *output_str = NULL; 1113 1114 if (filters == NULL) 1115 return LZMA_PROG_ERROR; 1116 1117 // Validate the flags. 1118 const uint32_t supported_flags 1119 = LZMA_STR_ENCODER 1120 | LZMA_STR_DECODER 1121 | LZMA_STR_GETOPT_LONG 1122 | LZMA_STR_NO_SPACES; 1123 1124 if (flags & ~supported_flags) 1125 return LZMA_OPTIONS_ERROR; 1126 1127 // There must be at least one filter. 1128 if (filters[0].id == LZMA_VLI_UNKNOWN) 1129 return LZMA_OPTIONS_ERROR; 1130 1131 // Allocate memory for the output string. 1132 lzma_str dest; 1133 return_if_error(str_init(&dest, allocator)); 1134 1135 const bool show_opts = (flags & (LZMA_STR_ENCODER | LZMA_STR_DECODER)); 1136 1137 const char *opt_delim = (flags & LZMA_STR_GETOPT_LONG) ? "=" : ":"; 1138 1139 for (size_t i = 0; filters[i].id != LZMA_VLI_UNKNOWN; ++i) { 1140 // If we reach LZMA_FILTERS_MAX, then the filters array 1141 // is too large since the ID cannot be LZMA_VLI_UNKNOWN here. 1142 if (i == LZMA_FILTERS_MAX) { 1143 str_free(&dest, allocator); 1144 return LZMA_OPTIONS_ERROR; 1145 } 1146 1147 // Don't add a space between filters if the caller 1148 // doesn't want them. 1149 if (i > 0 && !(flags & LZMA_STR_NO_SPACES)) 1150 str_append_str(&dest, " "); 1151 1152 // Use dashes for xz getopt_long() compatible syntax but also 1153 // use dashes to separate filters when spaces weren't wanted. 1154 if ((flags & LZMA_STR_GETOPT_LONG) 1155 || (i > 0 && (flags & LZMA_STR_NO_SPACES))) 1156 str_append_str(&dest, "--"); 1157 1158 size_t j = 0; 1159 while (true) { 1160 if (j == ARRAY_SIZE(filter_name_map)) { 1161 // Filter ID in filters[i].id isn't supported. 1162 str_free(&dest, allocator); 1163 return LZMA_OPTIONS_ERROR; 1164 } 1165 1166 if (filter_name_map[j].id == filters[i].id) { 1167 // Add the filter name. 1168 str_append_str(&dest, filter_name_map[j].name); 1169 1170 // If only the filter names were wanted then 1171 // skip to the next filter. In this case 1172 // .options is ignored and may be NULL even 1173 // when the filter doesn't allow NULL options. 1174 if (!show_opts) 1175 break; 1176 1177 if (filters[i].options == NULL) { 1178 if (!filter_name_map[j].allow_null) { 1179 // Filter-specific options 1180 // are missing but with 1181 // this filter the options 1182 // structure is mandatory. 1183 str_free(&dest, allocator); 1184 return LZMA_OPTIONS_ERROR; 1185 } 1186 1187 // .options is allowed to be NULL. 1188 // There is no need to add any 1189 // options to the string. 1190 break; 1191 } 1192 1193 // Options structure is available. Add 1194 // the filter options to the string. 1195 const size_t optmap_count 1196 = (flags & LZMA_STR_ENCODER) 1197 ? filter_name_map[j].strfy_encoder 1198 : filter_name_map[j].strfy_decoder; 1199 strfy_filter(&dest, opt_delim, 1200 filter_name_map[j].optmap, 1201 optmap_count, 1202 filters[i].options); 1203 break; 1204 } 1205 1206 ++j; 1207 } 1208 } 1209 1210 return str_finish(output_str, &dest, allocator); 1211 } 1212 1213 1214 extern LZMA_API(lzma_ret) 1215 lzma_str_list_filters(char **output_str, lzma_vli filter_id, uint32_t flags, 1216 const lzma_allocator *allocator) 1217 { 1218 // On error *output_str is always set to NULL. 1219 // Do it as the very first step. 1220 if (output_str == NULL) 1221 return LZMA_PROG_ERROR; 1222 1223 *output_str = NULL; 1224 1225 // Validate the flags. 1226 const uint32_t supported_flags 1227 = LZMA_STR_ALL_FILTERS 1228 | LZMA_STR_ENCODER 1229 | LZMA_STR_DECODER 1230 | LZMA_STR_GETOPT_LONG; 1231 1232 if (flags & ~supported_flags) 1233 return LZMA_OPTIONS_ERROR; 1234 1235 // Allocate memory for the output string. 1236 lzma_str dest; 1237 return_if_error(str_init(&dest, allocator)); 1238 1239 // If only listing the filter names then separate them with spaces. 1240 // Otherwise use newlines. 1241 const bool show_opts = (flags & (LZMA_STR_ENCODER | LZMA_STR_DECODER)); 1242 const char *filter_delim = show_opts ? "\n" : " "; 1243 1244 const char *opt_delim = (flags & LZMA_STR_GETOPT_LONG) ? "=" : ":"; 1245 bool first_filter_printed = false; 1246 1247 for (size_t i = 0; i < ARRAY_SIZE(filter_name_map); ++i) { 1248 // If we are printing only one filter then skip others. 1249 if (filter_id != LZMA_VLI_UNKNOWN 1250 && filter_id != filter_name_map[i].id) 1251 continue; 1252 1253 // If we are printing only .xz filters then skip the others. 1254 if (filter_name_map[i].id >= LZMA_FILTER_RESERVED_START 1255 && (flags & LZMA_STR_ALL_FILTERS) == 0 1256 && filter_id == LZMA_VLI_UNKNOWN) 1257 continue; 1258 1259 // Add a new line if this isn't the first filter being 1260 // written to the string. 1261 if (first_filter_printed) 1262 str_append_str(&dest, filter_delim); 1263 1264 first_filter_printed = true; 1265 1266 if (flags & LZMA_STR_GETOPT_LONG) 1267 str_append_str(&dest, "--"); 1268 1269 str_append_str(&dest, filter_name_map[i].name); 1270 1271 // If only the filter names were wanted then continue 1272 // to the next filter. 1273 if (!show_opts) 1274 continue; 1275 1276 const option_map *optmap = filter_name_map[i].optmap; 1277 const char *d = opt_delim; 1278 1279 const size_t end = (flags & LZMA_STR_ENCODER) 1280 ? filter_name_map[i].strfy_encoder 1281 : filter_name_map[i].strfy_decoder; 1282 1283 for (size_t j = 0; j < end; ++j) { 1284 // The first option is delimited from the filter 1285 // name using "=" or ":" and the rest of the options 1286 // are separated with ",". 1287 str_append_str(&dest, d); 1288 d = ","; 1289 1290 // optname=<possible_values> 1291 str_append_str(&dest, optmap[j].name); 1292 str_append_str(&dest, "=<"); 1293 1294 if (optmap[j].type == OPTMAP_TYPE_LZMA_PRESET) { 1295 // LZMA1/2 preset has its custom help string. 1296 str_append_str(&dest, LZMA12_PRESET_STR); 1297 } else if (optmap[j].flags 1298 & OPTMAP_USE_NAME_VALUE_MAP) { 1299 // Separate the possible option values by "|". 1300 const name_value_map *m = optmap[j].u.map; 1301 for (size_t k = 0; m[k].name[0] != '\0'; ++k) { 1302 if (k > 0) 1303 str_append_str(&dest, "|"); 1304 1305 str_append_str(&dest, m[k].name); 1306 } 1307 } else { 1308 // Integer range is shown as min-max. 1309 const bool use_byte_suffix = optmap[j].flags 1310 & OPTMAP_USE_BYTE_SUFFIX; 1311 str_append_u32(&dest, optmap[j].u.range.min, 1312 use_byte_suffix); 1313 str_append_str(&dest, "-"); 1314 str_append_u32(&dest, optmap[j].u.range.max, 1315 use_byte_suffix); 1316 } 1317 1318 str_append_str(&dest, ">"); 1319 } 1320 } 1321 1322 // If no filters were added to the string then it must be because 1323 // the caller provided an unsupported Filter ID. 1324 if (!first_filter_printed) { 1325 str_free(&dest, allocator); 1326 return LZMA_OPTIONS_ERROR; 1327 } 1328 1329 return str_finish(output_str, &dest, allocator); 1330 } 1331