1 /////////////////////////////////////////////////////////////////////////////// 2 // 3 /// \file string_conversion.c 4 /// \brief Conversion of strings to filter chain and vice versa 5 // 6 // Author: Lasse Collin 7 // 8 // This file has been put into the public domain. 9 // You can do whatever you want with this file. 10 // 11 /////////////////////////////////////////////////////////////////////////////// 12 13 #include "filter_common.h" 14 15 16 ///////////////////// 17 // String building // 18 ///////////////////// 19 20 /// How much memory to allocate for strings. For now, no realloc is used 21 /// so this needs to be big enough even though there of course is 22 /// an overflow check still. 23 /// 24 /// FIXME? Using a fixed size is wasteful if the application doesn't free 25 /// the string fairly quickly but this can be improved later if needed. 26 #define STR_ALLOC_SIZE 800 27 28 29 typedef struct { 30 char *buf; 31 size_t pos; 32 } lzma_str; 33 34 35 static lzma_ret 36 str_init(lzma_str *str, const lzma_allocator *allocator) 37 { 38 str->buf = lzma_alloc(STR_ALLOC_SIZE, allocator); 39 if (str->buf == NULL) 40 return LZMA_MEM_ERROR; 41 42 str->pos = 0; 43 return LZMA_OK; 44 } 45 46 47 static void 48 str_free(lzma_str *str, const lzma_allocator *allocator) 49 { 50 lzma_free(str->buf, allocator); 51 return; 52 } 53 54 55 static bool 56 str_is_full(const lzma_str *str) 57 { 58 return str->pos == STR_ALLOC_SIZE - 1; 59 } 60 61 62 static lzma_ret 63 str_finish(char **dest, lzma_str *str, const lzma_allocator *allocator) 64 { 65 if (str_is_full(str)) { 66 // The preallocated buffer was too small. 67 // This shouldn't happen as STR_ALLOC_SIZE should 68 // be adjusted if new filters are added. 69 lzma_free(str->buf, allocator); 70 *dest = NULL; 71 assert(0); 72 return LZMA_PROG_ERROR; 73 } 74 75 str->buf[str->pos] = '\0'; 76 *dest = str->buf; 77 return LZMA_OK; 78 } 79 80 81 static void 82 str_append_str(lzma_str *str, const char *s) 83 { 84 const size_t len = strlen(s); 85 const size_t limit = STR_ALLOC_SIZE - 1 - str->pos; 86 const size_t copy_size = my_min(len, limit); 87 88 memcpy(str->buf + str->pos, s, copy_size); 89 str->pos += copy_size; 90 return; 91 } 92 93 94 static void 95 str_append_u32(lzma_str *str, uint32_t v, bool use_byte_suffix) 96 { 97 if (v == 0) { 98 str_append_str(str, "0"); 99 } else { 100 // NOTE: Don't use plain "B" because xz and the parser in this 101 // file don't support it and at glance it may look like 8 102 // (there cannot be a space before the suffix). 103 static const char suffixes[4][4] = { "", "KiB", "MiB", "GiB" }; 104 105 size_t suf = 0; 106 if (use_byte_suffix) { 107 while ((v & 1023) == 0 108 && suf < ARRAY_SIZE(suffixes) - 1) { 109 v >>= 10; 110 ++suf; 111 } 112 } 113 114 // UINT32_MAX in base 10 would need 10 + 1 bytes. Remember 115 // that initializing to "" initializes all elements to 116 // zero so '\0'-termination gets handled by this. 117 char buf[16] = ""; 118 size_t pos = sizeof(buf) - 1; 119 120 do { 121 buf[--pos] = '0' + (v % 10); 122 v /= 10; 123 } while (v != 0); 124 125 str_append_str(str, buf + pos); 126 str_append_str(str, suffixes[suf]); 127 } 128 129 return; 130 } 131 132 133 ////////////////////////////////////////////// 134 // Parsing and stringification declarations // 135 ////////////////////////////////////////////// 136 137 /// Maximum length for filter and option names. 138 /// 11 chars + terminating '\0' + sizeof(uint32_t) = 16 bytes 139 #define NAME_LEN_MAX 11 140 141 142 /// For option_map.flags: Use .u.map to do convert the input value 143 /// to an integer. Without this flag, .u.range.{min,max} are used 144 /// as the allowed range for the integer. 145 #define OPTMAP_USE_NAME_VALUE_MAP 0x01 146 147 /// For option_map.flags: Allow KiB/MiB/GiB in input string and use them in 148 /// the stringified output if the value is an exact multiple of these. 149 /// This is used e.g. for LZMA1/2 dictionary size. 150 #define OPTMAP_USE_BYTE_SUFFIX 0x02 151 152 /// For option_map.flags: If the integer value is zero then this option 153 /// won't be included in the stringified output. It's used e.g. for 154 /// BCJ filter start offset which usually is zero. 155 #define OPTMAP_NO_STRFY_ZERO 0x04 156 157 /// Possible values for option_map.type. Since OPTMAP_TYPE_UINT32 is 0, 158 /// it doesn't need to be specified in the initializers as it is 159 /// the implicit value. 160 enum { 161 OPTMAP_TYPE_UINT32, 162 OPTMAP_TYPE_LZMA_MODE, 163 OPTMAP_TYPE_LZMA_MATCH_FINDER, 164 OPTMAP_TYPE_LZMA_PRESET, 165 }; 166 167 168 /// This is for mapping string values in options to integers. 169 /// The last element of an array must have "" as the name. 170 /// It's used e.g. for match finder names in LZMA1/2. 171 typedef struct { 172 const char name[NAME_LEN_MAX + 1]; 173 const uint32_t value; 174 } name_value_map; 175 176 177 /// Each filter that has options needs an array of option_map structures. 178 /// The array doesn't need to be terminated as the functions take the 179 /// length of the array as an argument. 180 /// 181 /// When converting a string to filter options structure, option values 182 /// will be handled in a few different ways: 183 /// 184 /// (1) If .type equals OPTMAP_TYPE_LZMA_PRESET then LZMA1/2 preset string 185 /// is handled specially. 186 /// 187 /// (2) If .flags has OPTMAP_USE_NAME_VALUE_MAP set then the string is 188 /// converted to an integer using the name_value_map pointed by .u.map. 189 /// The last element in .u.map must have .name = "" as the terminator. 190 /// 191 /// (3) Otherwise the string is treated as a non-negative unsigned decimal 192 /// integer which must be in the range set in .u.range. If .flags has 193 /// OPTMAP_USE_BYTE_SUFFIX then KiB, MiB, and GiB suffixes are allowed. 194 /// 195 /// The integer value from (2) or (3) is then stored to filter_options 196 /// at the offset specified in .offset using the type specified in .type 197 /// (default is uint32_t). 198 /// 199 /// Stringifying a filter is done by processing a given number of options 200 /// in order from the beginning of an option_map array. The integer is 201 /// read from filter_options at .offset using the type from .type. 202 /// 203 /// If the integer is zero and .flags has OPTMAP_NO_STRFY_ZERO then the 204 /// option is skipped. 205 /// 206 /// If .flags has OPTMAP_USE_NAME_VALUE_MAP set then .u.map will be used 207 /// to convert the option to a string. If the map doesn't contain a string 208 /// for the integer value then "UNKNOWN" is used. 209 /// 210 /// If .flags doesn't have OPTMAP_USE_NAME_VALUE_MAP set then the integer is 211 /// converted to a decimal value. If OPTMAP_USE_BYTE_SUFFIX is used then KiB, 212 /// MiB, or GiB suffix is used if the value is an exact multiple of these. 213 /// Plain "B" suffix is never used. 214 typedef struct { 215 char name[NAME_LEN_MAX + 1]; 216 uint8_t type; 217 uint8_t flags; 218 uint16_t offset; 219 220 union { 221 struct { 222 uint32_t min; 223 uint32_t max; 224 } range; 225 226 const name_value_map *map; 227 } u; 228 } option_map; 229 230 231 static const char *parse_options(const char **const str, const char *str_end, 232 void *filter_options, 233 const option_map *const optmap, const size_t optmap_size); 234 235 236 ///////// 237 // BCJ // 238 ///////// 239 240 #if defined(HAVE_ENCODER_X86) \ 241 || defined(HAVE_DECODER_X86) \ 242 || defined(HAVE_ENCODER_ARM) \ 243 || defined(HAVE_DECODER_ARM) \ 244 || defined(HAVE_ENCODER_ARMTHUMB) \ 245 || defined(HAVE_DECODER_ARMTHUMB) \ 246 || defined(HAVE_ENCODER_ARM64) \ 247 || defined(HAVE_DECODER_ARM64) \ 248 || defined(HAVE_ENCODER_POWERPC) \ 249 || defined(HAVE_DECODER_POWERPC) \ 250 || defined(HAVE_ENCODER_IA64) \ 251 || defined(HAVE_DECODER_IA64) \ 252 || defined(HAVE_ENCODER_SPARC) \ 253 || defined(HAVE_DECODER_SPARC) 254 static const option_map bcj_optmap[] = { 255 { 256 .name = "start", 257 .flags = OPTMAP_NO_STRFY_ZERO | OPTMAP_USE_BYTE_SUFFIX, 258 .offset = offsetof(lzma_options_bcj, start_offset), 259 .u.range.min = 0, 260 .u.range.max = UINT32_MAX, 261 } 262 }; 263 264 265 static const char * 266 parse_bcj(const char **const str, const char *str_end, void *filter_options) 267 { 268 // filter_options was zeroed on allocation and that is enough 269 // for the default value. 270 return parse_options(str, str_end, filter_options, 271 bcj_optmap, ARRAY_SIZE(bcj_optmap)); 272 } 273 #endif 274 275 276 /////////// 277 // Delta // 278 /////////// 279 280 #if defined(HAVE_ENCODER_DELTA) || defined(HAVE_DECODER_DELTA) 281 static const option_map delta_optmap[] = { 282 { 283 .name = "dist", 284 .offset = offsetof(lzma_options_delta, dist), 285 .u.range.min = LZMA_DELTA_DIST_MIN, 286 .u.range.max = LZMA_DELTA_DIST_MAX, 287 } 288 }; 289 290 291 static const char * 292 parse_delta(const char **const str, const char *str_end, void *filter_options) 293 { 294 lzma_options_delta *opts = filter_options; 295 opts->type = LZMA_DELTA_TYPE_BYTE; 296 opts->dist = LZMA_DELTA_DIST_MIN; 297 298 return parse_options(str, str_end, filter_options, 299 delta_optmap, ARRAY_SIZE(delta_optmap)); 300 } 301 #endif 302 303 304 /////////////////// 305 // LZMA1 & LZMA2 // 306 /////////////////// 307 308 /// Help string for presets 309 #define LZMA12_PRESET_STR "0-9[e]" 310 311 312 static const char * 313 parse_lzma12_preset(const char **const str, const char *str_end, 314 uint32_t *preset) 315 { 316 assert(*str < str_end); 317 *preset = (uint32_t)(**str - '0'); 318 319 // NOTE: Remember to update LZMA12_PRESET_STR if this is modified! 320 while (++*str < str_end) { 321 switch (**str) { 322 case 'e': 323 *preset |= LZMA_PRESET_EXTREME; 324 break; 325 326 default: 327 return "Unsupported preset flag"; 328 } 329 } 330 331 return NULL; 332 } 333 334 335 static const char * 336 set_lzma12_preset(const char **const str, const char *str_end, 337 void *filter_options) 338 { 339 uint32_t preset; 340 const char *errmsg = parse_lzma12_preset(str, str_end, &preset); 341 if (errmsg != NULL) 342 return errmsg; 343 344 lzma_options_lzma *opts = filter_options; 345 if (lzma_lzma_preset(opts, preset)) 346 return "Unsupported preset"; 347 348 return NULL; 349 } 350 351 352 static const name_value_map lzma12_mode_map[] = { 353 { "fast", LZMA_MODE_FAST }, 354 { "normal", LZMA_MODE_NORMAL }, 355 { "", 0 } 356 }; 357 358 359 static const name_value_map lzma12_mf_map[] = { 360 { "hc3", LZMA_MF_HC3 }, 361 { "hc4", LZMA_MF_HC4 }, 362 { "bt2", LZMA_MF_BT2 }, 363 { "bt3", LZMA_MF_BT3 }, 364 { "bt4", LZMA_MF_BT4 }, 365 { "", 0 } 366 }; 367 368 369 static const option_map lzma12_optmap[] = { 370 { 371 .name = "preset", 372 .type = OPTMAP_TYPE_LZMA_PRESET, 373 }, { 374 .name = "dict", 375 .flags = OPTMAP_USE_BYTE_SUFFIX, 376 .offset = offsetof(lzma_options_lzma, dict_size), 377 .u.range.min = LZMA_DICT_SIZE_MIN, 378 // FIXME? The max is really max for encoding but decoding 379 // would allow 4 GiB - 1 B. 380 .u.range.max = (UINT32_C(1) << 30) + (UINT32_C(1) << 29), 381 }, { 382 .name = "lc", 383 .offset = offsetof(lzma_options_lzma, lc), 384 .u.range.min = LZMA_LCLP_MIN, 385 .u.range.max = LZMA_LCLP_MAX, 386 }, { 387 .name = "lp", 388 .offset = offsetof(lzma_options_lzma, lp), 389 .u.range.min = LZMA_LCLP_MIN, 390 .u.range.max = LZMA_LCLP_MAX, 391 }, { 392 .name = "pb", 393 .offset = offsetof(lzma_options_lzma, pb), 394 .u.range.min = LZMA_PB_MIN, 395 .u.range.max = LZMA_PB_MAX, 396 }, { 397 .name = "mode", 398 .type = OPTMAP_TYPE_LZMA_MODE, 399 .flags = OPTMAP_USE_NAME_VALUE_MAP, 400 .offset = offsetof(lzma_options_lzma, mode), 401 .u.map = lzma12_mode_map, 402 }, { 403 .name = "nice", 404 .offset = offsetof(lzma_options_lzma, nice_len), 405 .u.range.min = 2, 406 .u.range.max = 273, 407 }, { 408 .name = "mf", 409 .type = OPTMAP_TYPE_LZMA_MATCH_FINDER, 410 .flags = OPTMAP_USE_NAME_VALUE_MAP, 411 .offset = offsetof(lzma_options_lzma, mf), 412 .u.map = lzma12_mf_map, 413 }, { 414 .name = "depth", 415 .offset = offsetof(lzma_options_lzma, depth), 416 .u.range.min = 0, 417 .u.range.max = UINT32_MAX, 418 } 419 }; 420 421 422 static const char * 423 parse_lzma12(const char **const str, const char *str_end, void *filter_options) 424 { 425 lzma_options_lzma *opts = filter_options; 426 427 // It cannot fail. 428 const bool preset_ret = lzma_lzma_preset(opts, LZMA_PRESET_DEFAULT); 429 assert(!preset_ret); 430 (void)preset_ret; 431 432 const char *errmsg = parse_options(str, str_end, filter_options, 433 lzma12_optmap, ARRAY_SIZE(lzma12_optmap)); 434 if (errmsg != NULL) 435 return errmsg; 436 437 if (opts->lc + opts->lp > LZMA_LCLP_MAX) 438 return "The sum of lc and lp must not exceed 4"; 439 440 return NULL; 441 } 442 443 444 ///////////////////////////////////////// 445 // Generic parsing and stringification // 446 ///////////////////////////////////////// 447 448 static const struct { 449 /// Name of the filter 450 char name[NAME_LEN_MAX + 1]; 451 452 /// For lzma_str_to_filters: 453 /// Size of the filter-specific options structure. 454 uint32_t opts_size; 455 456 /// Filter ID 457 lzma_vli id; 458 459 /// For lzma_str_to_filters: 460 /// Function to parse the filter-specific options. The filter_options 461 /// will already have been allocated using lzma_alloc_zero(). 462 const char *(*parse)(const char **str, const char *str_end, 463 void *filter_options); 464 465 /// For lzma_str_from_filters: 466 /// If the flag LZMA_STR_ENCODER is used then the first 467 /// strfy_encoder elements of optmap are stringified. 468 /// With LZMA_STR_DECODER strfy_decoder is used. 469 /// Currently encoders use all options that decoders do but if 470 /// that changes then this needs to be changed too, for example, 471 /// add a new OPTMAP flag to skip printing some decoder-only options. 472 const option_map *optmap; 473 uint8_t strfy_encoder; 474 uint8_t strfy_decoder; 475 476 /// For lzma_str_from_filters: 477 /// If true, lzma_filter.options is allowed to be NULL. In that case, 478 /// only the filter name is printed without any options. 479 bool allow_null; 480 481 } filter_name_map[] = { 482 #if defined (HAVE_ENCODER_LZMA1) || defined(HAVE_DECODER_LZMA1) 483 { "lzma1", sizeof(lzma_options_lzma), LZMA_FILTER_LZMA1, 484 &parse_lzma12, lzma12_optmap, 9, 5, false }, 485 #endif 486 487 #if defined(HAVE_ENCODER_LZMA2) || defined(HAVE_DECODER_LZMA2) 488 { "lzma2", sizeof(lzma_options_lzma), LZMA_FILTER_LZMA2, 489 &parse_lzma12, lzma12_optmap, 9, 2, false }, 490 #endif 491 492 #if defined(HAVE_ENCODER_X86) || defined(HAVE_DECODER_X86) 493 { "x86", sizeof(lzma_options_bcj), LZMA_FILTER_X86, 494 &parse_bcj, bcj_optmap, 1, 1, true }, 495 #endif 496 497 #if defined(HAVE_ENCODER_ARM) || defined(HAVE_DECODER_ARM) 498 { "arm", sizeof(lzma_options_bcj), LZMA_FILTER_ARM, 499 &parse_bcj, bcj_optmap, 1, 1, true }, 500 #endif 501 502 #if defined(HAVE_ENCODER_ARMTHUMB) || defined(HAVE_DECODER_ARMTHUMB) 503 { "armthumb", sizeof(lzma_options_bcj), LZMA_FILTER_ARMTHUMB, 504 &parse_bcj, bcj_optmap, 1, 1, true }, 505 #endif 506 507 #if defined(HAVE_ENCODER_ARM64) || defined(HAVE_DECODER_ARM64) 508 { "arm64", sizeof(lzma_options_bcj), LZMA_FILTER_ARM64, 509 &parse_bcj, bcj_optmap, 1, 1, true }, 510 #endif 511 512 #if defined(HAVE_ENCODER_POWERPC) || defined(HAVE_DECODER_POWERPC) 513 { "powerpc", sizeof(lzma_options_bcj), LZMA_FILTER_POWERPC, 514 &parse_bcj, bcj_optmap, 1, 1, true }, 515 #endif 516 517 #if defined(HAVE_ENCODER_IA64) || defined(HAVE_DECODER_IA64) 518 { "ia64", sizeof(lzma_options_bcj), LZMA_FILTER_IA64, 519 &parse_bcj, bcj_optmap, 1, 1, true }, 520 #endif 521 522 #if defined(HAVE_ENCODER_SPARC) || defined(HAVE_DECODER_SPARC) 523 { "sparc", sizeof(lzma_options_bcj), LZMA_FILTER_SPARC, 524 &parse_bcj, bcj_optmap, 1, 1, true }, 525 #endif 526 527 #if defined(HAVE_ENCODER_DELTA) || defined(HAVE_DECODER_DELTA) 528 { "delta", sizeof(lzma_options_delta), LZMA_FILTER_DELTA, 529 &parse_delta, delta_optmap, 1, 1, false }, 530 #endif 531 }; 532 533 534 /// Decodes options from a string for one filter (name1=value1,name2=value2). 535 /// Caller must have allocated memory for filter_options already and set 536 /// the initial default values. This is called from the filter-specific 537 /// parse_* functions. 538 /// 539 /// The input string starts at *str and the address in str_end is the first 540 /// char that is not part of the string anymore. So no '\0' terminator is 541 /// used. *str is advanced every time something has been decoded successfully. 542 static const char * 543 parse_options(const char **const str, const char *str_end, 544 void *filter_options, 545 const option_map *const optmap, const size_t optmap_size) 546 { 547 while (*str < str_end && **str != '\0') { 548 // Each option is of the form name=value. 549 // Commas (',') separate options. Extra commas are ignored. 550 // Ignoring extra commas makes it simpler if an optional 551 // option stored in a shell variable which can be empty. 552 if (**str == ',') { 553 ++*str; 554 continue; 555 } 556 557 // Find where the next name=value ends. 558 const size_t str_len = (size_t)(str_end - *str); 559 const char *name_eq_value_end = memchr(*str, ',', str_len); 560 if (name_eq_value_end == NULL) 561 name_eq_value_end = str_end; 562 563 const char *equals_sign = memchr(*str, '=', 564 (size_t)(name_eq_value_end - *str)); 565 566 // Fail if the '=' wasn't found or the option name is missing 567 // (the first char is '='). 568 if (equals_sign == NULL || **str == '=') 569 return "Options must be 'name=value' pairs separated " 570 "with commas"; 571 572 // Reject a too long option name so that the memcmp() 573 // in the loop below won't read past the end of the 574 // string in optmap[i].name. 575 const size_t name_len = (size_t)(equals_sign - *str); 576 if (name_len > NAME_LEN_MAX) 577 return "Unknown option name"; 578 579 // Find the option name from optmap[]. 580 size_t i = 0; 581 while (true) { 582 if (i == optmap_size) 583 return "Unknown option name"; 584 585 if (memcmp(*str, optmap[i].name, name_len) == 0 586 && optmap[i].name[name_len] == '\0') 587 break; 588 589 ++i; 590 } 591 592 // The input string is good at least until the start of 593 // the option value. 594 *str = equals_sign + 1; 595 596 // The code assumes that the option value isn't an empty 597 // string so check it here. 598 const size_t value_len = (size_t)(name_eq_value_end - *str); 599 if (value_len == 0) 600 return "Option value cannot be empty"; 601 602 // LZMA1/2 preset has its own parsing function. 603 if (optmap[i].type == OPTMAP_TYPE_LZMA_PRESET) { 604 const char *errmsg = set_lzma12_preset(str, 605 name_eq_value_end, filter_options); 606 if (errmsg != NULL) 607 return errmsg; 608 609 continue; 610 } 611 612 // It's an integer value. 613 uint32_t v; 614 if (optmap[i].flags & OPTMAP_USE_NAME_VALUE_MAP) { 615 // The integer is picked from a string-to-integer map. 616 // 617 // Reject a too long value string so that the memcmp() 618 // in the loop below won't read past the end of the 619 // string in optmap[i].u.map[j].name. 620 if (value_len > NAME_LEN_MAX) 621 return "Invalid option value"; 622 623 const name_value_map *map = optmap[i].u.map; 624 size_t j = 0; 625 while (true) { 626 // The array is terminated with an empty name. 627 if (map[j].name[0] == '\0') 628 return "Invalid option value"; 629 630 if (memcmp(*str, map[j].name, value_len) == 0 631 && map[j].name[value_len] 632 == '\0') { 633 v = map[j].value; 634 break; 635 } 636 637 ++j; 638 } 639 } else if (**str < '0' || **str > '9') { 640 // Note that "max" isn't supported while it is 641 // supported in xz. It's not useful here. 642 return "Value is not a non-negative decimal integer"; 643 } else { 644 // strtoul() has locale-specific behavior so it cannot 645 // be relied on to get reproducible results since we 646 // cannot change the locate in a thread-safe library. 647 // It also needs '\0'-termination. 648 // 649 // Use a temporary pointer so that *str will point 650 // to the beginning of the value string in case 651 // an error occurs. 652 const char *p = *str; 653 v = 0; 654 do { 655 if (v > UINT32_MAX / 10) 656 return "Value out of range"; 657 658 v *= 10; 659 660 const uint32_t add = (uint32_t)(*p - '0'); 661 if (UINT32_MAX - add < v) 662 return "Value out of range"; 663 664 v += add; 665 ++p; 666 } while (p < name_eq_value_end 667 && *p >= '0' && *p <= '9'); 668 669 if (p < name_eq_value_end) { 670 // Remember this position so that it can be 671 // used for error messages that are 672 // specifically about the suffix. (Out of 673 // range values are about the whole value 674 // and those error messages point to the 675 // beginning of the number part, 676 // not to the suffix.) 677 const char *multiplier_start = p; 678 679 // If multiplier suffix shouldn't be used 680 // then don't allow them even if the value 681 // would stay within limits. This is a somewhat 682 // unnecessary check but it rejects silly 683 // things like lzma2:pb=0MiB which xz allows. 684 if ((optmap[i].flags & OPTMAP_USE_BYTE_SUFFIX) 685 == 0) { 686 *str = multiplier_start; 687 return "This option does not support " 688 "any integer suffixes"; 689 } 690 691 uint32_t shift; 692 693 switch (*p) { 694 case 'k': 695 case 'K': 696 shift = 10; 697 break; 698 699 case 'm': 700 case 'M': 701 shift = 20; 702 break; 703 704 case 'g': 705 case 'G': 706 shift = 30; 707 break; 708 709 default: 710 *str = multiplier_start; 711 return "Invalid multiplier suffix " 712 "(KiB, MiB, or GiB)"; 713 } 714 715 ++p; 716 717 // Allow "M", "Mi", "MB", "MiB" and the same 718 // for the other five characters from the 719 // switch-statement above. All are handled 720 // as base-2 (perhaps a mistake, perhaps not). 721 // Note that 'i' and 'B' are case sensitive. 722 if (p < name_eq_value_end && *p == 'i') 723 ++p; 724 725 if (p < name_eq_value_end && *p == 'B') 726 ++p; 727 728 // Now we must have no chars remaining. 729 if (p < name_eq_value_end) { 730 *str = multiplier_start; 731 return "Invalid multiplier suffix " 732 "(KiB, MiB, or GiB)"; 733 } 734 735 if (v > (UINT32_MAX >> shift)) 736 return "Value out of range"; 737 738 v <<= shift; 739 } 740 741 if (v < optmap[i].u.range.min 742 || v > optmap[i].u.range.max) 743 return "Value out of range"; 744 } 745 746 // Set the value in filter_options. Enums are handled 747 // specially since the underlying type isn't the same 748 // as uint32_t on all systems. 749 void *ptr = (char *)filter_options + optmap[i].offset; 750 switch (optmap[i].type) { 751 case OPTMAP_TYPE_LZMA_MODE: 752 *(lzma_mode *)ptr = (lzma_mode)v; 753 break; 754 755 case OPTMAP_TYPE_LZMA_MATCH_FINDER: 756 *(lzma_match_finder *)ptr = (lzma_match_finder)v; 757 break; 758 759 default: 760 *(uint32_t *)ptr = v; 761 break; 762 } 763 764 // This option has been successfully handled. 765 *str = name_eq_value_end; 766 } 767 768 // No errors. 769 return NULL; 770 } 771 772 773 /// Finds the name of the filter at the beginning of the string and 774 /// calls filter_name_map[i].parse() to decode the filter-specific options. 775 /// The caller must have set str_end so that exactly one filter and its 776 /// options are present without any trailing characters. 777 static const char * 778 parse_filter(const char **const str, const char *str_end, lzma_filter *filter, 779 const lzma_allocator *allocator, bool only_xz) 780 { 781 // Search for a colon or equals sign that would separate the filter 782 // name from filter options. If neither is found, then the input 783 // string only contains a filter name and there are no options. 784 // 785 // First assume that a colon or equals sign won't be found: 786 const char *name_end = str_end; 787 const char *opts_start = str_end; 788 789 for (const char *p = *str; p < str_end; ++p) { 790 if (*p == ':' || *p == '=') { 791 name_end = p; 792 793 // Filter options (name1=value1,name2=value2,...) 794 // begin after the colon or equals sign. 795 opts_start = p + 1; 796 break; 797 } 798 } 799 800 // Reject a too long filter name so that the memcmp() 801 // in the loop below won't read past the end of the 802 // string in filter_name_map[i].name. 803 const size_t name_len = (size_t)(name_end - *str); 804 if (name_len > NAME_LEN_MAX) 805 return "Unknown filter name"; 806 807 for (size_t i = 0; i < ARRAY_SIZE(filter_name_map); ++i) { 808 if (memcmp(*str, filter_name_map[i].name, name_len) == 0 809 && filter_name_map[i].name[name_len] == '\0') { 810 if (only_xz && filter_name_map[i].id 811 >= LZMA_FILTER_RESERVED_START) 812 return "This filter cannot be used in " 813 "the .xz format"; 814 815 // Allocate the filter-specific options and 816 // initialize the memory with zeros. 817 void *options = lzma_alloc_zero( 818 filter_name_map[i].opts_size, 819 allocator); 820 if (options == NULL) 821 return "Memory allocation failed"; 822 823 // Filter name was found so the input string is good 824 // at least this far. 825 *str = opts_start; 826 827 const char *errmsg = filter_name_map[i].parse( 828 str, str_end, options); 829 if (errmsg != NULL) { 830 lzma_free(options, allocator); 831 return errmsg; 832 } 833 834 // *filter is modified only when parsing is successful. 835 filter->id = filter_name_map[i].id; 836 filter->options = options; 837 return NULL; 838 } 839 } 840 841 return "Unknown filter name"; 842 } 843 844 845 /// Converts the string to a filter chain (array of lzma_filter structures). 846 /// 847 /// *str is advanced every time something has been decoded successfully. 848 /// This way the caller knows where in the string a possible error occurred. 849 static const char * 850 str_to_filters(const char **const str, lzma_filter *filters, uint32_t flags, 851 const lzma_allocator *allocator) 852 { 853 const char *errmsg; 854 855 // Skip leading spaces. 856 while (**str == ' ') 857 ++*str; 858 859 if (**str == '\0') 860 return "Empty string is not allowed, " 861 "try \"6\" if a default value is needed"; 862 863 // Detect the type of the string. 864 // 865 // A string beginning with a digit or a string beginning with 866 // one dash and a digit are treated as presets. Trailing spaces 867 // will be ignored too (leading spaces were already ignored above). 868 // 869 // For example, "6", "7 ", "-9e", or " -3 " are treated as presets. 870 // Strings like "-" or "- " aren't preset. 871 #define MY_IS_DIGIT(c) ((c) >= '0' && (c) <= '9') 872 if (MY_IS_DIGIT(**str) || (**str == '-' && MY_IS_DIGIT((*str)[1]))) { 873 if (**str == '-') 874 ++*str; 875 876 // Ignore trailing spaces. 877 const size_t str_len = strlen(*str); 878 const char *str_end = memchr(*str, ' ', str_len); 879 if (str_end != NULL) { 880 // There is at least one trailing space. Check that 881 // there are no chars other than spaces. 882 for (size_t i = 1; str_end[i] != '\0'; ++i) 883 if (str_end[i] != ' ') 884 return "Unsupported preset"; 885 } else { 886 // There are no trailing spaces. Use the whole string. 887 str_end = *str + str_len; 888 } 889 890 uint32_t preset; 891 errmsg = parse_lzma12_preset(str, str_end, &preset); 892 if (errmsg != NULL) 893 return errmsg; 894 895 lzma_options_lzma *opts = lzma_alloc(sizeof(*opts), allocator); 896 if (opts == NULL) 897 return "Memory allocation failed"; 898 899 if (lzma_lzma_preset(opts, preset)) { 900 lzma_free(opts, allocator); 901 return "Unsupported preset"; 902 } 903 904 filters[0].id = LZMA_FILTER_LZMA2; 905 filters[0].options = opts; 906 filters[1].id = LZMA_VLI_UNKNOWN; 907 filters[1].options = NULL; 908 909 return NULL; 910 } 911 912 // Not a preset so it must be a filter chain. 913 // 914 // If LZMA_STR_ALL_FILTERS isn't used we allow only filters that 915 // can be used in .xz. 916 const bool only_xz = (flags & LZMA_STR_ALL_FILTERS) == 0; 917 918 // Use a temporary array so that we don't modify the caller-supplied 919 // one until we know that no errors occurred. 920 lzma_filter temp_filters[LZMA_FILTERS_MAX + 1]; 921 922 size_t i = 0; 923 do { 924 if (i == LZMA_FILTERS_MAX) { 925 errmsg = "The maximum number of filters is four"; 926 goto error; 927 } 928 929 // Skip "--" if present. 930 if ((*str)[0] == '-' && (*str)[1] == '-') 931 *str += 2; 932 933 // Locate the end of "filter:name1=value1,name2=value2", 934 // stopping at the first "--" or a single space. 935 const char *filter_end = *str; 936 while (filter_end[0] != '\0') { 937 if ((filter_end[0] == '-' && filter_end[1] == '-') 938 || filter_end[0] == ' ') 939 break; 940 941 ++filter_end; 942 } 943 944 // Inputs that have "--" at the end or "-- " in the middle 945 // will result in an empty filter name. 946 if (filter_end == *str) { 947 errmsg = "Filter name is missing"; 948 goto error; 949 } 950 951 errmsg = parse_filter(str, filter_end, &temp_filters[i], 952 allocator, only_xz); 953 if (errmsg != NULL) 954 goto error; 955 956 // Skip trailing spaces. 957 while (**str == ' ') 958 ++*str; 959 960 ++i; 961 } while (**str != '\0'); 962 963 // Seems to be good, terminate the array so that 964 // basic validation can be done. 965 temp_filters[i].id = LZMA_VLI_UNKNOWN; 966 temp_filters[i].options = NULL; 967 968 // Do basic validation if the application didn't prohibit it. 969 if ((flags & LZMA_STR_NO_VALIDATION) == 0) { 970 size_t dummy; 971 const lzma_ret ret = lzma_validate_chain(temp_filters, &dummy); 972 assert(ret == LZMA_OK || ret == LZMA_OPTIONS_ERROR); 973 if (ret != LZMA_OK) { 974 errmsg = "Invalid filter chain " 975 "('lzma2' missing at the end?)"; 976 goto error; 977 } 978 } 979 980 // All good. Copy the filters to the application supplied array. 981 memcpy(filters, temp_filters, (i + 1) * sizeof(lzma_filter)); 982 return NULL; 983 984 error: 985 // Free the filter options that were successfully decoded. 986 while (i-- > 0) 987 lzma_free(temp_filters[i].options, allocator); 988 989 return errmsg; 990 } 991 992 993 extern LZMA_API(const char *) 994 lzma_str_to_filters(const char *str, int *error_pos, lzma_filter *filters, 995 uint32_t flags, const lzma_allocator *allocator) 996 { 997 if (str == NULL || filters == NULL) 998 return "Unexpected NULL pointer argument(s) " 999 "to lzma_str_to_filters()"; 1000 1001 // Validate the flags. 1002 const uint32_t supported_flags 1003 = LZMA_STR_ALL_FILTERS 1004 | LZMA_STR_NO_VALIDATION; 1005 1006 if (flags & ~supported_flags) 1007 return "Unsupported flags to lzma_str_to_filters()"; 1008 1009 const char *used = str; 1010 const char *errmsg = str_to_filters(&used, filters, flags, allocator); 1011 1012 if (error_pos != NULL) { 1013 const size_t n = (size_t)(used - str); 1014 *error_pos = n > INT_MAX ? INT_MAX : (int)n; 1015 } 1016 1017 return errmsg; 1018 } 1019 1020 1021 /// Converts options of one filter to a string. 1022 /// 1023 /// The caller must have already put the filter name in the destination 1024 /// string. Since it is possible that no options will be needed, the caller 1025 /// won't have put a delimiter character (':' or '=') in the string yet. 1026 /// We will add it if at least one option will be added to the string. 1027 static void 1028 strfy_filter(lzma_str *dest, const char *delimiter, 1029 const option_map *optmap, size_t optmap_count, 1030 const void *filter_options) 1031 { 1032 for (size_t i = 0; i < optmap_count; ++i) { 1033 // No attempt is made to reverse LZMA1/2 preset. 1034 if (optmap[i].type == OPTMAP_TYPE_LZMA_PRESET) 1035 continue; 1036 1037 // All options have integer values, some just are mapped 1038 // to a string with a name_value_map. LZMA1/2 preset 1039 // isn't reversed back to preset=PRESET form. 1040 uint32_t v; 1041 const void *ptr 1042 = (const char *)filter_options + optmap[i].offset; 1043 switch (optmap[i].type) { 1044 case OPTMAP_TYPE_LZMA_MODE: 1045 v = *(const lzma_mode *)ptr; 1046 break; 1047 1048 case OPTMAP_TYPE_LZMA_MATCH_FINDER: 1049 v = *(const lzma_match_finder *)ptr; 1050 break; 1051 1052 default: 1053 v = *(const uint32_t *)ptr; 1054 break; 1055 } 1056 1057 // Skip this if this option should be omitted from 1058 // the string when the value is zero. 1059 if (v == 0 && (optmap[i].flags & OPTMAP_NO_STRFY_ZERO)) 1060 continue; 1061 1062 // Before the first option we add whatever delimiter 1063 // the caller gave us. For later options a comma is used. 1064 str_append_str(dest, delimiter); 1065 delimiter = ","; 1066 1067 // Add the option name and equals sign. 1068 str_append_str(dest, optmap[i].name); 1069 str_append_str(dest, "="); 1070 1071 if (optmap[i].flags & OPTMAP_USE_NAME_VALUE_MAP) { 1072 const name_value_map *map = optmap[i].u.map; 1073 size_t j = 0; 1074 while (true) { 1075 if (map[j].name[0] == '\0') { 1076 str_append_str(dest, "UNKNOWN"); 1077 break; 1078 } 1079 1080 if (map[j].value == v) { 1081 str_append_str(dest, map[j].name); 1082 break; 1083 } 1084 1085 ++j; 1086 } 1087 } else { 1088 str_append_u32(dest, v, 1089 optmap[i].flags & OPTMAP_USE_BYTE_SUFFIX); 1090 } 1091 } 1092 1093 return; 1094 } 1095 1096 1097 extern LZMA_API(lzma_ret) 1098 lzma_str_from_filters(char **output_str, const lzma_filter *filters, 1099 uint32_t flags, const lzma_allocator *allocator) 1100 { 1101 // On error *output_str is always set to NULL. 1102 // Do it as the very first step. 1103 if (output_str == NULL) 1104 return LZMA_PROG_ERROR; 1105 1106 *output_str = NULL; 1107 1108 if (filters == NULL) 1109 return LZMA_PROG_ERROR; 1110 1111 // Validate the flags. 1112 const uint32_t supported_flags 1113 = LZMA_STR_ENCODER 1114 | LZMA_STR_DECODER 1115 | LZMA_STR_GETOPT_LONG 1116 | LZMA_STR_NO_SPACES; 1117 1118 if (flags & ~supported_flags) 1119 return LZMA_OPTIONS_ERROR; 1120 1121 // There must be at least one filter. 1122 if (filters[0].id == LZMA_VLI_UNKNOWN) 1123 return LZMA_OPTIONS_ERROR; 1124 1125 // Allocate memory for the output string. 1126 lzma_str dest; 1127 return_if_error(str_init(&dest, allocator)); 1128 1129 const bool show_opts = (flags & (LZMA_STR_ENCODER | LZMA_STR_DECODER)); 1130 1131 const char *opt_delim = (flags & LZMA_STR_GETOPT_LONG) ? "=" : ":"; 1132 1133 for (size_t i = 0; filters[i].id != LZMA_VLI_UNKNOWN; ++i) { 1134 // If we reach LZMA_FILTERS_MAX, then the filters array 1135 // is too large since the ID cannot be LZMA_VLI_UNKNOWN here. 1136 if (i == LZMA_FILTERS_MAX) { 1137 str_free(&dest, allocator); 1138 return LZMA_OPTIONS_ERROR; 1139 } 1140 1141 // Don't add a space between filters if the caller 1142 // doesn't want them. 1143 if (i > 0 && !(flags & LZMA_STR_NO_SPACES)) 1144 str_append_str(&dest, " "); 1145 1146 // Use dashes for xz getopt_long() compatible syntax but also 1147 // use dashes to separate filters when spaces weren't wanted. 1148 if ((flags & LZMA_STR_GETOPT_LONG) 1149 || (i > 0 && (flags & LZMA_STR_NO_SPACES))) 1150 str_append_str(&dest, "--"); 1151 1152 size_t j = 0; 1153 while (true) { 1154 if (j == ARRAY_SIZE(filter_name_map)) { 1155 // Filter ID in filters[i].id isn't supported. 1156 str_free(&dest, allocator); 1157 return LZMA_OPTIONS_ERROR; 1158 } 1159 1160 if (filter_name_map[j].id == filters[i].id) { 1161 // Add the filter name. 1162 str_append_str(&dest, filter_name_map[j].name); 1163 1164 // If only the filter names were wanted then 1165 // skip to the next filter. In this case 1166 // .options is ignored and may be NULL even 1167 // when the filter doesn't allow NULL options. 1168 if (!show_opts) 1169 break; 1170 1171 if (filters[i].options == NULL) { 1172 if (!filter_name_map[j].allow_null) { 1173 // Filter-specific options 1174 // are missing but with 1175 // this filter the options 1176 // structure is mandatory. 1177 str_free(&dest, allocator); 1178 return LZMA_OPTIONS_ERROR; 1179 } 1180 1181 // .options is allowed to be NULL. 1182 // There is no need to add any 1183 // options to the string. 1184 break; 1185 } 1186 1187 // Options structure is available. Add 1188 // the filter options to the string. 1189 const size_t optmap_count 1190 = (flags & LZMA_STR_ENCODER) 1191 ? filter_name_map[j].strfy_encoder 1192 : filter_name_map[j].strfy_decoder; 1193 strfy_filter(&dest, opt_delim, 1194 filter_name_map[j].optmap, 1195 optmap_count, 1196 filters[i].options); 1197 break; 1198 } 1199 1200 ++j; 1201 } 1202 } 1203 1204 return str_finish(output_str, &dest, allocator); 1205 } 1206 1207 1208 extern LZMA_API(lzma_ret) 1209 lzma_str_list_filters(char **output_str, lzma_vli filter_id, uint32_t flags, 1210 const lzma_allocator *allocator) 1211 { 1212 // On error *output_str is always set to NULL. 1213 // Do it as the very first step. 1214 if (output_str == NULL) 1215 return LZMA_PROG_ERROR; 1216 1217 *output_str = NULL; 1218 1219 // Validate the flags. 1220 const uint32_t supported_flags 1221 = LZMA_STR_ALL_FILTERS 1222 | LZMA_STR_ENCODER 1223 | LZMA_STR_DECODER 1224 | LZMA_STR_GETOPT_LONG; 1225 1226 if (flags & ~supported_flags) 1227 return LZMA_OPTIONS_ERROR; 1228 1229 // Allocate memory for the output string. 1230 lzma_str dest; 1231 return_if_error(str_init(&dest, allocator)); 1232 1233 // If only listing the filter names then separate them with spaces. 1234 // Otherwise use newlines. 1235 const bool show_opts = (flags & (LZMA_STR_ENCODER | LZMA_STR_DECODER)); 1236 const char *filter_delim = show_opts ? "\n" : " "; 1237 1238 const char *opt_delim = (flags & LZMA_STR_GETOPT_LONG) ? "=" : ":"; 1239 bool first_filter_printed = false; 1240 1241 for (size_t i = 0; i < ARRAY_SIZE(filter_name_map); ++i) { 1242 // If we are printing only one filter then skip others. 1243 if (filter_id != LZMA_VLI_UNKNOWN 1244 && filter_id != filter_name_map[i].id) 1245 continue; 1246 1247 // If we are printing only .xz filters then skip the others. 1248 if (filter_name_map[i].id >= LZMA_FILTER_RESERVED_START 1249 && (flags & LZMA_STR_ALL_FILTERS) == 0 1250 && filter_id == LZMA_VLI_UNKNOWN) 1251 continue; 1252 1253 // Add a new line if this isn't the first filter being 1254 // written to the string. 1255 if (first_filter_printed) 1256 str_append_str(&dest, filter_delim); 1257 1258 first_filter_printed = true; 1259 1260 if (flags & LZMA_STR_GETOPT_LONG) 1261 str_append_str(&dest, "--"); 1262 1263 str_append_str(&dest, filter_name_map[i].name); 1264 1265 // If only the filter names were wanted then continue 1266 // to the next filter. 1267 if (!show_opts) 1268 continue; 1269 1270 const option_map *optmap = filter_name_map[i].optmap; 1271 const char *d = opt_delim; 1272 1273 const size_t end = (flags & LZMA_STR_ENCODER) 1274 ? filter_name_map[i].strfy_encoder 1275 : filter_name_map[i].strfy_decoder; 1276 1277 for (size_t j = 0; j < end; ++j) { 1278 // The first option is delimited from the filter 1279 // name using "=" or ":" and the rest of the options 1280 // are separated with ",". 1281 str_append_str(&dest, d); 1282 d = ","; 1283 1284 // optname=<possible_values> 1285 str_append_str(&dest, optmap[j].name); 1286 str_append_str(&dest, "=<"); 1287 1288 if (optmap[j].type == OPTMAP_TYPE_LZMA_PRESET) { 1289 // LZMA1/2 preset has its custom help string. 1290 str_append_str(&dest, LZMA12_PRESET_STR); 1291 } else if (optmap[j].flags 1292 & OPTMAP_USE_NAME_VALUE_MAP) { 1293 // Separate the possible option values by "|". 1294 const name_value_map *m = optmap[j].u.map; 1295 for (size_t k = 0; m[k].name[0] != '\0'; ++k) { 1296 if (k > 0) 1297 str_append_str(&dest, "|"); 1298 1299 str_append_str(&dest, m[k].name); 1300 } 1301 } else { 1302 // Integer range is shown as min-max. 1303 const bool use_byte_suffix = optmap[j].flags 1304 & OPTMAP_USE_BYTE_SUFFIX; 1305 str_append_u32(&dest, optmap[j].u.range.min, 1306 use_byte_suffix); 1307 str_append_str(&dest, "-"); 1308 str_append_u32(&dest, optmap[j].u.range.max, 1309 use_byte_suffix); 1310 } 1311 1312 str_append_str(&dest, ">"); 1313 } 1314 } 1315 1316 // If no filters were added to the string then it must be because 1317 // the caller provided an unsupported Filter ID. 1318 if (!first_filter_printed) { 1319 str_free(&dest, allocator); 1320 return LZMA_OPTIONS_ERROR; 1321 } 1322 1323 return str_finish(output_str, &dest, allocator); 1324 } 1325