1 // SPDX-License-Identifier: 0BSD 2 3 /////////////////////////////////////////////////////////////////////////////// 4 // 5 /// \file string_conversion.c 6 /// \brief Conversion of strings to filter chain and vice versa 7 // 8 // Author: Lasse Collin 9 // 10 /////////////////////////////////////////////////////////////////////////////// 11 12 #include "filter_common.h" 13 14 15 // liblzma itself doesn't use gettext to translate messages. 16 // Mark the strings still so that xz can translate them. 17 #define N_(msgid) msgid 18 19 20 ///////////////////// 21 // String building // 22 ///////////////////// 23 24 /// How much memory to allocate for strings. For now, no realloc is used 25 /// so this needs to be big enough even though there of course is 26 /// an overflow check still. 27 /// 28 /// FIXME? Using a fixed size is wasteful if the application doesn't free 29 /// the string fairly quickly but this can be improved later if needed. 30 #define STR_ALLOC_SIZE 800 31 32 33 typedef struct { 34 char *buf; 35 size_t pos; 36 } lzma_str; 37 38 39 static lzma_ret 40 str_init(lzma_str *str, const lzma_allocator *allocator) 41 { 42 str->buf = lzma_alloc(STR_ALLOC_SIZE, allocator); 43 if (str->buf == NULL) 44 return LZMA_MEM_ERROR; 45 46 str->pos = 0; 47 return LZMA_OK; 48 } 49 50 51 static void 52 str_free(lzma_str *str, const lzma_allocator *allocator) 53 { 54 lzma_free(str->buf, allocator); 55 return; 56 } 57 58 59 static bool 60 str_is_full(const lzma_str *str) 61 { 62 return str->pos == STR_ALLOC_SIZE - 1; 63 } 64 65 66 static lzma_ret 67 str_finish(char **dest, lzma_str *str, const lzma_allocator *allocator) 68 { 69 if (str_is_full(str)) { 70 // The preallocated buffer was too small. 71 // This shouldn't happen as STR_ALLOC_SIZE should 72 // be adjusted if new filters are added. 73 lzma_free(str->buf, allocator); 74 *dest = NULL; 75 assert(0); 76 return LZMA_PROG_ERROR; 77 } 78 79 str->buf[str->pos] = '\0'; 80 *dest = str->buf; 81 return LZMA_OK; 82 } 83 84 85 static void 86 str_append_str(lzma_str *str, const char *s) 87 { 88 const size_t len = strlen(s); 89 const size_t limit = STR_ALLOC_SIZE - 1 - str->pos; 90 const size_t copy_size = my_min(len, limit); 91 92 memcpy(str->buf + str->pos, s, copy_size); 93 str->pos += copy_size; 94 return; 95 } 96 97 98 static void 99 str_append_u32(lzma_str *str, uint32_t v, bool use_byte_suffix) 100 { 101 if (v == 0) { 102 str_append_str(str, "0"); 103 } else { 104 // NOTE: Don't use plain "B" because xz and the parser in this 105 // file don't support it and at glance it may look like 8 106 // (there cannot be a space before the suffix). 107 static const char suffixes[4][4] = { "", "KiB", "MiB", "GiB" }; 108 109 size_t suf = 0; 110 if (use_byte_suffix) { 111 while ((v & 1023) == 0 112 && suf < ARRAY_SIZE(suffixes) - 1) { 113 v >>= 10; 114 ++suf; 115 } 116 } 117 118 // UINT32_MAX in base 10 would need 10 + 1 bytes. Remember 119 // that initializing to "" initializes all elements to 120 // zero so '\0'-termination gets handled by this. 121 char buf[16] = ""; 122 size_t pos = sizeof(buf) - 1; 123 124 do { 125 buf[--pos] = '0' + (v % 10); 126 v /= 10; 127 } while (v != 0); 128 129 str_append_str(str, buf + pos); 130 str_append_str(str, suffixes[suf]); 131 } 132 133 return; 134 } 135 136 137 ////////////////////////////////////////////// 138 // Parsing and stringification declarations // 139 ////////////////////////////////////////////// 140 141 /// Maximum length for filter and option names. 142 /// 11 chars + terminating '\0' + sizeof(uint32_t) = 16 bytes 143 #define NAME_LEN_MAX 11 144 145 146 /// For option_map.flags: Use .u.map to do convert the input value 147 /// to an integer. Without this flag, .u.range.{min,max} are used 148 /// as the allowed range for the integer. 149 #define OPTMAP_USE_NAME_VALUE_MAP 0x01 150 151 /// For option_map.flags: Allow KiB/MiB/GiB in input string and use them in 152 /// the stringified output if the value is an exact multiple of these. 153 /// This is used e.g. for LZMA1/2 dictionary size. 154 #define OPTMAP_USE_BYTE_SUFFIX 0x02 155 156 /// For option_map.flags: If the integer value is zero then this option 157 /// won't be included in the stringified output. It's used e.g. for 158 /// BCJ filter start offset which usually is zero. 159 #define OPTMAP_NO_STRFY_ZERO 0x04 160 161 /// Possible values for option_map.type. Since OPTMAP_TYPE_UINT32 is 0, 162 /// it doesn't need to be specified in the initializers as it is 163 /// the implicit value. 164 enum { 165 OPTMAP_TYPE_UINT32, 166 OPTMAP_TYPE_LZMA_MODE, 167 OPTMAP_TYPE_LZMA_MATCH_FINDER, 168 OPTMAP_TYPE_LZMA_PRESET, 169 }; 170 171 172 /// This is for mapping string values in options to integers. 173 /// The last element of an array must have "" as the name. 174 /// It's used e.g. for match finder names in LZMA1/2. 175 typedef struct { 176 const char name[NAME_LEN_MAX + 1]; 177 const uint32_t value; 178 } name_value_map; 179 180 181 /// Each filter that has options needs an array of option_map structures. 182 /// The array doesn't need to be terminated as the functions take the 183 /// length of the array as an argument. 184 /// 185 /// When converting a string to filter options structure, option values 186 /// will be handled in a few different ways: 187 /// 188 /// (1) If .type equals OPTMAP_TYPE_LZMA_PRESET then LZMA1/2 preset string 189 /// is handled specially. 190 /// 191 /// (2) If .flags has OPTMAP_USE_NAME_VALUE_MAP set then the string is 192 /// converted to an integer using the name_value_map pointed by .u.map. 193 /// The last element in .u.map must have .name = "" as the terminator. 194 /// 195 /// (3) Otherwise the string is treated as a non-negative unsigned decimal 196 /// integer which must be in the range set in .u.range. If .flags has 197 /// OPTMAP_USE_BYTE_SUFFIX then KiB, MiB, and GiB suffixes are allowed. 198 /// 199 /// The integer value from (2) or (3) is then stored to filter_options 200 /// at the offset specified in .offset using the type specified in .type 201 /// (default is uint32_t). 202 /// 203 /// Stringifying a filter is done by processing a given number of options 204 /// in order from the beginning of an option_map array. The integer is 205 /// read from filter_options at .offset using the type from .type. 206 /// 207 /// If the integer is zero and .flags has OPTMAP_NO_STRFY_ZERO then the 208 /// option is skipped. 209 /// 210 /// If .flags has OPTMAP_USE_NAME_VALUE_MAP set then .u.map will be used 211 /// to convert the option to a string. If the map doesn't contain a string 212 /// for the integer value then "UNKNOWN" is used. 213 /// 214 /// If .flags doesn't have OPTMAP_USE_NAME_VALUE_MAP set then the integer is 215 /// converted to a decimal value. If OPTMAP_USE_BYTE_SUFFIX is used then KiB, 216 /// MiB, or GiB suffix is used if the value is an exact multiple of these. 217 /// Plain "B" suffix is never used. 218 typedef struct { 219 char name[NAME_LEN_MAX + 1]; 220 uint8_t type; 221 uint8_t flags; 222 uint16_t offset; 223 224 union { 225 // NVHPC has problems with unions that contain pointers that 226 // are not the first members, so keep "map" at the top. 227 const name_value_map *map; 228 229 struct { 230 uint32_t min; 231 uint32_t max; 232 } range; 233 } u; 234 } option_map; 235 236 237 static const char *parse_options(const char **const str, const char *str_end, 238 void *filter_options, 239 const option_map *const optmap, const size_t optmap_size); 240 241 242 ///////// 243 // BCJ // 244 ///////// 245 246 #if defined(HAVE_ENCODER_X86) \ 247 || defined(HAVE_DECODER_X86) \ 248 || defined(HAVE_ENCODER_ARM) \ 249 || defined(HAVE_DECODER_ARM) \ 250 || defined(HAVE_ENCODER_ARMTHUMB) \ 251 || defined(HAVE_DECODER_ARMTHUMB) \ 252 || defined(HAVE_ENCODER_ARM64) \ 253 || defined(HAVE_DECODER_ARM64) \ 254 || defined(HAVE_ENCODER_POWERPC) \ 255 || defined(HAVE_DECODER_POWERPC) \ 256 || defined(HAVE_ENCODER_IA64) \ 257 || defined(HAVE_DECODER_IA64) \ 258 || defined(HAVE_ENCODER_SPARC) \ 259 || defined(HAVE_DECODER_SPARC) \ 260 || defined(HAVE_ENCODER_RISCV) \ 261 || defined(HAVE_DECODER_RISCV) 262 static const option_map bcj_optmap[] = { 263 { 264 .name = "start", 265 .flags = OPTMAP_NO_STRFY_ZERO | OPTMAP_USE_BYTE_SUFFIX, 266 .offset = offsetof(lzma_options_bcj, start_offset), 267 .u.range.min = 0, 268 .u.range.max = UINT32_MAX, 269 } 270 }; 271 272 273 static const char * 274 parse_bcj(const char **const str, const char *str_end, void *filter_options) 275 { 276 // filter_options was zeroed on allocation and that is enough 277 // for the default value. 278 return parse_options(str, str_end, filter_options, 279 bcj_optmap, ARRAY_SIZE(bcj_optmap)); 280 } 281 #endif 282 283 284 /////////// 285 // Delta // 286 /////////// 287 288 #if defined(HAVE_ENCODER_DELTA) || defined(HAVE_DECODER_DELTA) 289 static const option_map delta_optmap[] = { 290 { 291 .name = "dist", 292 .offset = offsetof(lzma_options_delta, dist), 293 .u.range.min = LZMA_DELTA_DIST_MIN, 294 .u.range.max = LZMA_DELTA_DIST_MAX, 295 } 296 }; 297 298 299 static const char * 300 parse_delta(const char **const str, const char *str_end, void *filter_options) 301 { 302 lzma_options_delta *opts = filter_options; 303 opts->type = LZMA_DELTA_TYPE_BYTE; 304 opts->dist = LZMA_DELTA_DIST_MIN; 305 306 return parse_options(str, str_end, filter_options, 307 delta_optmap, ARRAY_SIZE(delta_optmap)); 308 } 309 #endif 310 311 312 /////////////////// 313 // LZMA1 & LZMA2 // 314 /////////////////// 315 316 /// Help string for presets 317 #define LZMA12_PRESET_STR "0-9[e]" 318 319 320 static const char * 321 parse_lzma12_preset(const char **const str, const char *str_end, 322 uint32_t *preset) 323 { 324 assert(*str < str_end); 325 326 if (!(**str >= '0' && **str <= '9')) 327 return N_("Unsupported preset"); 328 329 *preset = (uint32_t)(**str - '0'); 330 331 // NOTE: Remember to update LZMA12_PRESET_STR if this is modified! 332 while (++*str < str_end) { 333 switch (**str) { 334 case 'e': 335 *preset |= LZMA_PRESET_EXTREME; 336 break; 337 338 default: 339 return N_("Unsupported flag in the preset"); 340 } 341 } 342 343 return NULL; 344 } 345 346 347 static const char * 348 set_lzma12_preset(const char **const str, const char *str_end, 349 void *filter_options) 350 { 351 uint32_t preset; 352 const char *errmsg = parse_lzma12_preset(str, str_end, &preset); 353 if (errmsg != NULL) 354 return errmsg; 355 356 lzma_options_lzma *opts = filter_options; 357 if (lzma_lzma_preset(opts, preset)) 358 return N_("Unsupported preset"); 359 360 return NULL; 361 } 362 363 364 static const name_value_map lzma12_mode_map[] = { 365 { "fast", LZMA_MODE_FAST }, 366 { "normal", LZMA_MODE_NORMAL }, 367 { "", 0 } 368 }; 369 370 371 static const name_value_map lzma12_mf_map[] = { 372 { "hc3", LZMA_MF_HC3 }, 373 { "hc4", LZMA_MF_HC4 }, 374 { "bt2", LZMA_MF_BT2 }, 375 { "bt3", LZMA_MF_BT3 }, 376 { "bt4", LZMA_MF_BT4 }, 377 { "", 0 } 378 }; 379 380 381 static const option_map lzma12_optmap[] = { 382 { 383 .name = "preset", 384 .type = OPTMAP_TYPE_LZMA_PRESET, 385 }, { 386 .name = "dict", 387 .flags = OPTMAP_USE_BYTE_SUFFIX, 388 .offset = offsetof(lzma_options_lzma, dict_size), 389 .u.range.min = LZMA_DICT_SIZE_MIN, 390 // FIXME? The max is really max for encoding but decoding 391 // would allow 4 GiB - 1 B. 392 .u.range.max = (UINT32_C(1) << 30) + (UINT32_C(1) << 29), 393 }, { 394 .name = "lc", 395 .offset = offsetof(lzma_options_lzma, lc), 396 .u.range.min = LZMA_LCLP_MIN, 397 .u.range.max = LZMA_LCLP_MAX, 398 }, { 399 .name = "lp", 400 .offset = offsetof(lzma_options_lzma, lp), 401 .u.range.min = LZMA_LCLP_MIN, 402 .u.range.max = LZMA_LCLP_MAX, 403 }, { 404 .name = "pb", 405 .offset = offsetof(lzma_options_lzma, pb), 406 .u.range.min = LZMA_PB_MIN, 407 .u.range.max = LZMA_PB_MAX, 408 }, { 409 .name = "mode", 410 .type = OPTMAP_TYPE_LZMA_MODE, 411 .flags = OPTMAP_USE_NAME_VALUE_MAP, 412 .offset = offsetof(lzma_options_lzma, mode), 413 .u.map = lzma12_mode_map, 414 }, { 415 .name = "nice", 416 .offset = offsetof(lzma_options_lzma, nice_len), 417 .u.range.min = 2, 418 .u.range.max = 273, 419 }, { 420 .name = "mf", 421 .type = OPTMAP_TYPE_LZMA_MATCH_FINDER, 422 .flags = OPTMAP_USE_NAME_VALUE_MAP, 423 .offset = offsetof(lzma_options_lzma, mf), 424 .u.map = lzma12_mf_map, 425 }, { 426 .name = "depth", 427 .offset = offsetof(lzma_options_lzma, depth), 428 .u.range.min = 0, 429 .u.range.max = UINT32_MAX, 430 } 431 }; 432 433 434 static const char * 435 parse_lzma12(const char **const str, const char *str_end, void *filter_options) 436 { 437 lzma_options_lzma *opts = filter_options; 438 439 // It cannot fail. 440 const bool preset_ret = lzma_lzma_preset(opts, LZMA_PRESET_DEFAULT); 441 assert(!preset_ret); 442 (void)preset_ret; 443 444 const char *errmsg = parse_options(str, str_end, filter_options, 445 lzma12_optmap, ARRAY_SIZE(lzma12_optmap)); 446 if (errmsg != NULL) 447 return errmsg; 448 449 if (opts->lc + opts->lp > LZMA_LCLP_MAX) 450 return N_("The sum of lc and lp must not exceed 4"); 451 452 return NULL; 453 } 454 455 456 ///////////////////////////////////////// 457 // Generic parsing and stringification // 458 ///////////////////////////////////////// 459 460 static const struct { 461 /// Name of the filter 462 char name[NAME_LEN_MAX + 1]; 463 464 /// For lzma_str_to_filters: 465 /// Size of the filter-specific options structure. 466 uint32_t opts_size; 467 468 /// Filter ID 469 lzma_vli id; 470 471 /// For lzma_str_to_filters: 472 /// Function to parse the filter-specific options. The filter_options 473 /// will already have been allocated using lzma_alloc_zero(). 474 const char *(*parse)(const char **str, const char *str_end, 475 void *filter_options); 476 477 /// For lzma_str_from_filters: 478 /// If the flag LZMA_STR_ENCODER is used then the first 479 /// strfy_encoder elements of optmap are stringified. 480 /// With LZMA_STR_DECODER strfy_decoder is used. 481 /// Currently encoders use all options that decoders do but if 482 /// that changes then this needs to be changed too, for example, 483 /// add a new OPTMAP flag to skip printing some decoder-only options. 484 const option_map *optmap; 485 uint8_t strfy_encoder; 486 uint8_t strfy_decoder; 487 488 /// For lzma_str_from_filters: 489 /// If true, lzma_filter.options is allowed to be NULL. In that case, 490 /// only the filter name is printed without any options. 491 bool allow_null; 492 493 } filter_name_map[] = { 494 #if defined (HAVE_ENCODER_LZMA1) || defined(HAVE_DECODER_LZMA1) 495 { "lzma1", sizeof(lzma_options_lzma), LZMA_FILTER_LZMA1, 496 &parse_lzma12, lzma12_optmap, 9, 5, false }, 497 #endif 498 499 #if defined(HAVE_ENCODER_LZMA2) || defined(HAVE_DECODER_LZMA2) 500 { "lzma2", sizeof(lzma_options_lzma), LZMA_FILTER_LZMA2, 501 &parse_lzma12, lzma12_optmap, 9, 2, false }, 502 #endif 503 504 #if defined(HAVE_ENCODER_X86) || defined(HAVE_DECODER_X86) 505 { "x86", sizeof(lzma_options_bcj), LZMA_FILTER_X86, 506 &parse_bcj, bcj_optmap, 1, 1, true }, 507 #endif 508 509 #if defined(HAVE_ENCODER_ARM) || defined(HAVE_DECODER_ARM) 510 { "arm", sizeof(lzma_options_bcj), LZMA_FILTER_ARM, 511 &parse_bcj, bcj_optmap, 1, 1, true }, 512 #endif 513 514 #if defined(HAVE_ENCODER_ARMTHUMB) || defined(HAVE_DECODER_ARMTHUMB) 515 { "armthumb", sizeof(lzma_options_bcj), LZMA_FILTER_ARMTHUMB, 516 &parse_bcj, bcj_optmap, 1, 1, true }, 517 #endif 518 519 #if defined(HAVE_ENCODER_ARM64) || defined(HAVE_DECODER_ARM64) 520 { "arm64", sizeof(lzma_options_bcj), LZMA_FILTER_ARM64, 521 &parse_bcj, bcj_optmap, 1, 1, true }, 522 #endif 523 524 #if defined(HAVE_ENCODER_RISCV) || defined(HAVE_DECODER_RISCV) 525 { "riscv", sizeof(lzma_options_bcj), LZMA_FILTER_RISCV, 526 &parse_bcj, bcj_optmap, 1, 1, true }, 527 #endif 528 529 #if defined(HAVE_ENCODER_POWERPC) || defined(HAVE_DECODER_POWERPC) 530 { "powerpc", sizeof(lzma_options_bcj), LZMA_FILTER_POWERPC, 531 &parse_bcj, bcj_optmap, 1, 1, true }, 532 #endif 533 534 #if defined(HAVE_ENCODER_IA64) || defined(HAVE_DECODER_IA64) 535 { "ia64", sizeof(lzma_options_bcj), LZMA_FILTER_IA64, 536 &parse_bcj, bcj_optmap, 1, 1, true }, 537 #endif 538 539 #if defined(HAVE_ENCODER_SPARC) || defined(HAVE_DECODER_SPARC) 540 { "sparc", sizeof(lzma_options_bcj), LZMA_FILTER_SPARC, 541 &parse_bcj, bcj_optmap, 1, 1, true }, 542 #endif 543 544 #if defined(HAVE_ENCODER_DELTA) || defined(HAVE_DECODER_DELTA) 545 { "delta", sizeof(lzma_options_delta), LZMA_FILTER_DELTA, 546 &parse_delta, delta_optmap, 1, 1, false }, 547 #endif 548 }; 549 550 551 /// Decodes options from a string for one filter (name1=value1,name2=value2). 552 /// Caller must have allocated memory for filter_options already and set 553 /// the initial default values. This is called from the filter-specific 554 /// parse_* functions. 555 /// 556 /// The input string starts at *str and the address in str_end is the first 557 /// char that is not part of the string anymore. So no '\0' terminator is 558 /// used. *str is advanced every time something has been decoded successfully. 559 static const char * 560 parse_options(const char **const str, const char *str_end, 561 void *filter_options, 562 const option_map *const optmap, const size_t optmap_size) 563 { 564 while (*str < str_end && **str != '\0') { 565 // Each option is of the form name=value. 566 // Commas (',') separate options. Extra commas are ignored. 567 // Ignoring extra commas makes it simpler if an optional 568 // option stored in a shell variable which can be empty. 569 if (**str == ',') { 570 ++*str; 571 continue; 572 } 573 574 // Find where the next name=value ends. 575 const size_t str_len = (size_t)(str_end - *str); 576 const char *name_eq_value_end = memchr(*str, ',', str_len); 577 if (name_eq_value_end == NULL) 578 name_eq_value_end = str_end; 579 580 const char *equals_sign = memchr(*str, '=', 581 (size_t)(name_eq_value_end - *str)); 582 583 // Fail if the '=' wasn't found or the option name is missing 584 // (the first char is '='). 585 if (equals_sign == NULL || **str == '=') 586 return N_("Options must be 'name=value' pairs " 587 "separated with commas"); 588 589 // Reject a too long option name so that the memcmp() 590 // in the loop below won't read past the end of the 591 // string in optmap[i].name. 592 const size_t name_len = (size_t)(equals_sign - *str); 593 if (name_len > NAME_LEN_MAX) 594 return N_("Unknown option name"); 595 596 // Find the option name from optmap[]. 597 size_t i = 0; 598 while (true) { 599 if (i == optmap_size) 600 return N_("Unknown option name"); 601 602 if (memcmp(*str, optmap[i].name, name_len) == 0 603 && optmap[i].name[name_len] == '\0') 604 break; 605 606 ++i; 607 } 608 609 // The input string is good at least until the start of 610 // the option value. 611 *str = equals_sign + 1; 612 613 // The code assumes that the option value isn't an empty 614 // string so check it here. 615 const size_t value_len = (size_t)(name_eq_value_end - *str); 616 if (value_len == 0) 617 return N_("Option value cannot be empty"); 618 619 // LZMA1/2 preset has its own parsing function. 620 if (optmap[i].type == OPTMAP_TYPE_LZMA_PRESET) { 621 const char *errmsg = set_lzma12_preset(str, 622 name_eq_value_end, filter_options); 623 if (errmsg != NULL) 624 return errmsg; 625 626 continue; 627 } 628 629 // It's an integer value. 630 uint32_t v; 631 if (optmap[i].flags & OPTMAP_USE_NAME_VALUE_MAP) { 632 // The integer is picked from a string-to-integer map. 633 // 634 // Reject a too long value string so that the memcmp() 635 // in the loop below won't read past the end of the 636 // string in optmap[i].u.map[j].name. 637 if (value_len > NAME_LEN_MAX) 638 return N_("Invalid option value"); 639 640 const name_value_map *map = optmap[i].u.map; 641 size_t j = 0; 642 while (true) { 643 // The array is terminated with an empty name. 644 if (map[j].name[0] == '\0') 645 return N_("Invalid option value"); 646 647 if (memcmp(*str, map[j].name, value_len) == 0 648 && map[j].name[value_len] 649 == '\0') { 650 v = map[j].value; 651 break; 652 } 653 654 ++j; 655 } 656 } else if (**str < '0' || **str > '9') { 657 // Note that "max" isn't supported while it is 658 // supported in xz. It's not useful here. 659 return N_("Value is not a non-negative " 660 "decimal integer"); 661 } else { 662 // strtoul() has locale-specific behavior so it cannot 663 // be relied on to get reproducible results since we 664 // cannot change the locate in a thread-safe library. 665 // It also needs '\0'-termination. 666 // 667 // Use a temporary pointer so that *str will point 668 // to the beginning of the value string in case 669 // an error occurs. 670 const char *p = *str; 671 v = 0; 672 do { 673 if (v > UINT32_MAX / 10) 674 return N_("Value out of range"); 675 676 v *= 10; 677 678 const uint32_t add = (uint32_t)(*p - '0'); 679 if (UINT32_MAX - add < v) 680 return N_("Value out of range"); 681 682 v += add; 683 ++p; 684 } while (p < name_eq_value_end 685 && *p >= '0' && *p <= '9'); 686 687 if (p < name_eq_value_end) { 688 // Remember this position so that it can be 689 // used for error messages that are 690 // specifically about the suffix. (Out of 691 // range values are about the whole value 692 // and those error messages point to the 693 // beginning of the number part, 694 // not to the suffix.) 695 const char *multiplier_start = p; 696 697 // If multiplier suffix shouldn't be used 698 // then don't allow them even if the value 699 // would stay within limits. This is a somewhat 700 // unnecessary check but it rejects silly 701 // things like lzma2:pb=0MiB which xz allows. 702 if ((optmap[i].flags & OPTMAP_USE_BYTE_SUFFIX) 703 == 0) { 704 *str = multiplier_start; 705 return N_("This option does not " 706 "support any multiplier " 707 "suffixes"); 708 } 709 710 uint32_t shift; 711 712 switch (*p) { 713 case 'k': 714 case 'K': 715 shift = 10; 716 break; 717 718 case 'm': 719 case 'M': 720 shift = 20; 721 break; 722 723 case 'g': 724 case 'G': 725 shift = 30; 726 break; 727 728 default: 729 *str = multiplier_start; 730 731 // TRANSLATORS: Don't translate the 732 // suffixes "KiB", "MiB", or "GiB" 733 // because a user can only specify 734 // untranslated suffixes. 735 return N_("Invalid multiplier suffix " 736 "(KiB, MiB, or GiB)"); 737 } 738 739 ++p; 740 741 // Allow "M", "Mi", "MB", "MiB" and the same 742 // for the other five characters from the 743 // switch-statement above. All are handled 744 // as base-2 (perhaps a mistake, perhaps not). 745 // Note that 'i' and 'B' are case sensitive. 746 if (p < name_eq_value_end && *p == 'i') 747 ++p; 748 749 if (p < name_eq_value_end && *p == 'B') 750 ++p; 751 752 // Now we must have no chars remaining. 753 if (p < name_eq_value_end) { 754 *str = multiplier_start; 755 return N_("Invalid multiplier suffix " 756 "(KiB, MiB, or GiB)"); 757 } 758 759 if (v > (UINT32_MAX >> shift)) 760 return N_("Value out of range"); 761 762 v <<= shift; 763 } 764 765 if (v < optmap[i].u.range.min 766 || v > optmap[i].u.range.max) 767 return N_("Value out of range"); 768 } 769 770 // Set the value in filter_options. Enums are handled 771 // specially since the underlying type isn't the same 772 // as uint32_t on all systems. 773 void *ptr = (char *)filter_options + optmap[i].offset; 774 switch (optmap[i].type) { 775 case OPTMAP_TYPE_LZMA_MODE: 776 *(lzma_mode *)ptr = (lzma_mode)v; 777 break; 778 779 case OPTMAP_TYPE_LZMA_MATCH_FINDER: 780 *(lzma_match_finder *)ptr = (lzma_match_finder)v; 781 break; 782 783 default: 784 *(uint32_t *)ptr = v; 785 break; 786 } 787 788 // This option has been successfully handled. 789 *str = name_eq_value_end; 790 } 791 792 // No errors. 793 return NULL; 794 } 795 796 797 /// Finds the name of the filter at the beginning of the string and 798 /// calls filter_name_map[i].parse() to decode the filter-specific options. 799 /// The caller must have set str_end so that exactly one filter and its 800 /// options are present without any trailing characters. 801 static const char * 802 parse_filter(const char **const str, const char *str_end, lzma_filter *filter, 803 const lzma_allocator *allocator, bool only_xz) 804 { 805 // Search for a colon or equals sign that would separate the filter 806 // name from filter options. If neither is found, then the input 807 // string only contains a filter name and there are no options. 808 // 809 // First assume that a colon or equals sign won't be found: 810 const char *name_end = str_end; 811 const char *opts_start = str_end; 812 813 for (const char *p = *str; p < str_end; ++p) { 814 if (*p == ':' || *p == '=') { 815 name_end = p; 816 817 // Filter options (name1=value1,name2=value2,...) 818 // begin after the colon or equals sign. 819 opts_start = p + 1; 820 break; 821 } 822 } 823 824 // Reject a too long filter name so that the memcmp() 825 // in the loop below won't read past the end of the 826 // string in filter_name_map[i].name. 827 const size_t name_len = (size_t)(name_end - *str); 828 if (name_len > NAME_LEN_MAX) 829 return N_("Unknown filter name"); 830 831 for (size_t i = 0; i < ARRAY_SIZE(filter_name_map); ++i) { 832 if (memcmp(*str, filter_name_map[i].name, name_len) == 0 833 && filter_name_map[i].name[name_len] == '\0') { 834 if (only_xz && filter_name_map[i].id 835 >= LZMA_FILTER_RESERVED_START) 836 return N_("This filter cannot be used in " 837 "the .xz format"); 838 839 // Allocate the filter-specific options and 840 // initialize the memory with zeros. 841 void *options = lzma_alloc_zero( 842 filter_name_map[i].opts_size, 843 allocator); 844 if (options == NULL) 845 return N_("Memory allocation failed"); 846 847 // Filter name was found so the input string is good 848 // at least this far. 849 *str = opts_start; 850 851 const char *errmsg = filter_name_map[i].parse( 852 str, str_end, options); 853 if (errmsg != NULL) { 854 lzma_free(options, allocator); 855 return errmsg; 856 } 857 858 // *filter is modified only when parsing is successful. 859 filter->id = filter_name_map[i].id; 860 filter->options = options; 861 return NULL; 862 } 863 } 864 865 return N_("Unknown filter name"); 866 } 867 868 869 /// Converts the string to a filter chain (array of lzma_filter structures). 870 /// 871 /// *str is advanced every time something has been decoded successfully. 872 /// This way the caller knows where in the string a possible error occurred. 873 static const char * 874 str_to_filters(const char **const str, lzma_filter *filters, uint32_t flags, 875 const lzma_allocator *allocator) 876 { 877 const char *errmsg; 878 879 // Skip leading spaces. 880 while (**str == ' ') 881 ++*str; 882 883 if (**str == '\0') 884 return N_("Empty string is not allowed, " 885 "try '6' if a default value is needed"); 886 887 // Detect the type of the string. 888 // 889 // A string beginning with a digit or a string beginning with 890 // one dash and a digit are treated as presets. Trailing spaces 891 // will be ignored too (leading spaces were already ignored above). 892 // 893 // For example, "6", "7 ", "-9e", or " -3 " are treated as presets. 894 // Strings like "-" or "- " aren't preset. 895 #define MY_IS_DIGIT(c) ((c) >= '0' && (c) <= '9') 896 if (MY_IS_DIGIT(**str) || (**str == '-' && MY_IS_DIGIT((*str)[1]))) { 897 if (**str == '-') 898 ++*str; 899 900 // Ignore trailing spaces. 901 const size_t str_len = strlen(*str); 902 const char *str_end = memchr(*str, ' ', str_len); 903 if (str_end != NULL) { 904 // There is at least one trailing space. Check that 905 // there are no chars other than spaces. 906 for (size_t i = 1; str_end[i] != '\0'; ++i) 907 if (str_end[i] != ' ') 908 return N_("Unsupported preset"); 909 } else { 910 // There are no trailing spaces. Use the whole string. 911 str_end = *str + str_len; 912 } 913 914 uint32_t preset; 915 errmsg = parse_lzma12_preset(str, str_end, &preset); 916 if (errmsg != NULL) 917 return errmsg; 918 919 lzma_options_lzma *opts = lzma_alloc(sizeof(*opts), allocator); 920 if (opts == NULL) 921 return N_("Memory allocation failed"); 922 923 if (lzma_lzma_preset(opts, preset)) { 924 lzma_free(opts, allocator); 925 return N_("Unsupported preset"); 926 } 927 928 filters[0].id = LZMA_FILTER_LZMA2; 929 filters[0].options = opts; 930 filters[1].id = LZMA_VLI_UNKNOWN; 931 filters[1].options = NULL; 932 933 return NULL; 934 } 935 936 // Not a preset so it must be a filter chain. 937 // 938 // If LZMA_STR_ALL_FILTERS isn't used we allow only filters that 939 // can be used in .xz. 940 const bool only_xz = (flags & LZMA_STR_ALL_FILTERS) == 0; 941 942 // Use a temporary array so that we don't modify the caller-supplied 943 // one until we know that no errors occurred. 944 lzma_filter temp_filters[LZMA_FILTERS_MAX + 1]; 945 946 size_t i = 0; 947 do { 948 if (i == LZMA_FILTERS_MAX) { 949 errmsg = N_("The maximum number of filters is four"); 950 goto error; 951 } 952 953 // Skip "--" if present. 954 if ((*str)[0] == '-' && (*str)[1] == '-') 955 *str += 2; 956 957 // Locate the end of "filter:name1=value1,name2=value2", 958 // stopping at the first "--" or a single space. 959 const char *filter_end = *str; 960 while (filter_end[0] != '\0') { 961 if ((filter_end[0] == '-' && filter_end[1] == '-') 962 || filter_end[0] == ' ') 963 break; 964 965 ++filter_end; 966 } 967 968 // Inputs that have "--" at the end or "-- " in the middle 969 // will result in an empty filter name. 970 if (filter_end == *str) { 971 errmsg = N_("Filter name is missing"); 972 goto error; 973 } 974 975 errmsg = parse_filter(str, filter_end, &temp_filters[i], 976 allocator, only_xz); 977 if (errmsg != NULL) 978 goto error; 979 980 // Skip trailing spaces. 981 while (**str == ' ') 982 ++*str; 983 984 ++i; 985 } while (**str != '\0'); 986 987 // Seems to be good, terminate the array so that 988 // basic validation can be done. 989 temp_filters[i].id = LZMA_VLI_UNKNOWN; 990 temp_filters[i].options = NULL; 991 992 // Do basic validation if the application didn't prohibit it. 993 if ((flags & LZMA_STR_NO_VALIDATION) == 0) { 994 size_t dummy; 995 const lzma_ret ret = lzma_validate_chain(temp_filters, &dummy); 996 assert(ret == LZMA_OK || ret == LZMA_OPTIONS_ERROR); 997 if (ret != LZMA_OK) { 998 errmsg = N_("Invalid filter chain " 999 "('lzma2' missing at the end?)"); 1000 goto error; 1001 } 1002 } 1003 1004 // All good. Copy the filters to the application supplied array. 1005 memcpy(filters, temp_filters, (i + 1) * sizeof(lzma_filter)); 1006 return NULL; 1007 1008 error: 1009 // Free the filter options that were successfully decoded. 1010 while (i-- > 0) 1011 lzma_free(temp_filters[i].options, allocator); 1012 1013 return errmsg; 1014 } 1015 1016 1017 extern LZMA_API(const char *) 1018 lzma_str_to_filters(const char *str, int *error_pos, lzma_filter *filters, 1019 uint32_t flags, const lzma_allocator *allocator) 1020 { 1021 // If error_pos isn't NULL, *error_pos must always be set. 1022 // liblzma <= 5.4.6 and <= 5.6.1 have a bug and don't do this 1023 // when str == NULL or filters == NULL or flags are unsupported. 1024 if (error_pos != NULL) 1025 *error_pos = 0; 1026 1027 if (str == NULL || filters == NULL) { 1028 // Don't translate this because it's only shown in case of 1029 // a programming error. 1030 return "Unexpected NULL pointer argument(s) " 1031 "to lzma_str_to_filters()"; 1032 } 1033 1034 // Validate the flags. 1035 const uint32_t supported_flags 1036 = LZMA_STR_ALL_FILTERS 1037 | LZMA_STR_NO_VALIDATION; 1038 1039 if (flags & ~supported_flags) { 1040 // This message is possible only if the caller uses flags 1041 // that are only supported in a newer liblzma version (or 1042 // the flags are simply buggy). Don't translate this at least 1043 // when liblzma itself doesn't use gettext; xz and liblzma 1044 // are usually upgraded at the same time. 1045 return "Unsupported flags to lzma_str_to_filters()"; 1046 } 1047 1048 const char *used = str; 1049 const char *errmsg = str_to_filters(&used, filters, flags, allocator); 1050 1051 if (error_pos != NULL) { 1052 const size_t n = (size_t)(used - str); 1053 *error_pos = n > INT_MAX ? INT_MAX : (int)n; 1054 } 1055 1056 return errmsg; 1057 } 1058 1059 1060 /// Converts options of one filter to a string. 1061 /// 1062 /// The caller must have already put the filter name in the destination 1063 /// string. Since it is possible that no options will be needed, the caller 1064 /// won't have put a delimiter character (':' or '=') in the string yet. 1065 /// We will add it if at least one option will be added to the string. 1066 static void 1067 strfy_filter(lzma_str *dest, const char *delimiter, 1068 const option_map *optmap, size_t optmap_count, 1069 const void *filter_options) 1070 { 1071 for (size_t i = 0; i < optmap_count; ++i) { 1072 // No attempt is made to reverse LZMA1/2 preset. 1073 if (optmap[i].type == OPTMAP_TYPE_LZMA_PRESET) 1074 continue; 1075 1076 // All options have integer values, some just are mapped 1077 // to a string with a name_value_map. LZMA1/2 preset 1078 // isn't reversed back to preset=PRESET form. 1079 uint32_t v; 1080 const void *ptr 1081 = (const char *)filter_options + optmap[i].offset; 1082 switch (optmap[i].type) { 1083 case OPTMAP_TYPE_LZMA_MODE: 1084 v = *(const lzma_mode *)ptr; 1085 break; 1086 1087 case OPTMAP_TYPE_LZMA_MATCH_FINDER: 1088 v = *(const lzma_match_finder *)ptr; 1089 break; 1090 1091 default: 1092 v = *(const uint32_t *)ptr; 1093 break; 1094 } 1095 1096 // Skip this if this option should be omitted from 1097 // the string when the value is zero. 1098 if (v == 0 && (optmap[i].flags & OPTMAP_NO_STRFY_ZERO)) 1099 continue; 1100 1101 // Before the first option we add whatever delimiter 1102 // the caller gave us. For later options a comma is used. 1103 str_append_str(dest, delimiter); 1104 delimiter = ","; 1105 1106 // Add the option name and equals sign. 1107 str_append_str(dest, optmap[i].name); 1108 str_append_str(dest, "="); 1109 1110 if (optmap[i].flags & OPTMAP_USE_NAME_VALUE_MAP) { 1111 const name_value_map *map = optmap[i].u.map; 1112 size_t j = 0; 1113 while (true) { 1114 if (map[j].name[0] == '\0') { 1115 str_append_str(dest, "UNKNOWN"); 1116 break; 1117 } 1118 1119 if (map[j].value == v) { 1120 str_append_str(dest, map[j].name); 1121 break; 1122 } 1123 1124 ++j; 1125 } 1126 } else { 1127 str_append_u32(dest, v, 1128 optmap[i].flags & OPTMAP_USE_BYTE_SUFFIX); 1129 } 1130 } 1131 1132 return; 1133 } 1134 1135 1136 extern LZMA_API(lzma_ret) 1137 lzma_str_from_filters(char **output_str, const lzma_filter *filters, 1138 uint32_t flags, const lzma_allocator *allocator) 1139 { 1140 // On error *output_str is always set to NULL. 1141 // Do it as the very first step. 1142 if (output_str == NULL) 1143 return LZMA_PROG_ERROR; 1144 1145 *output_str = NULL; 1146 1147 if (filters == NULL) 1148 return LZMA_PROG_ERROR; 1149 1150 // Validate the flags. 1151 const uint32_t supported_flags 1152 = LZMA_STR_ENCODER 1153 | LZMA_STR_DECODER 1154 | LZMA_STR_GETOPT_LONG 1155 | LZMA_STR_NO_SPACES; 1156 1157 if (flags & ~supported_flags) 1158 return LZMA_OPTIONS_ERROR; 1159 1160 // There must be at least one filter. 1161 if (filters[0].id == LZMA_VLI_UNKNOWN) 1162 return LZMA_OPTIONS_ERROR; 1163 1164 // Allocate memory for the output string. 1165 lzma_str dest; 1166 return_if_error(str_init(&dest, allocator)); 1167 1168 const bool show_opts = (flags & (LZMA_STR_ENCODER | LZMA_STR_DECODER)); 1169 1170 const char *opt_delim = (flags & LZMA_STR_GETOPT_LONG) ? "=" : ":"; 1171 1172 for (size_t i = 0; filters[i].id != LZMA_VLI_UNKNOWN; ++i) { 1173 // If we reach LZMA_FILTERS_MAX, then the filters array 1174 // is too large since the ID cannot be LZMA_VLI_UNKNOWN here. 1175 if (i == LZMA_FILTERS_MAX) { 1176 str_free(&dest, allocator); 1177 return LZMA_OPTIONS_ERROR; 1178 } 1179 1180 // Don't add a space between filters if the caller 1181 // doesn't want them. 1182 if (i > 0 && !(flags & LZMA_STR_NO_SPACES)) 1183 str_append_str(&dest, " "); 1184 1185 // Use dashes for xz getopt_long() compatible syntax but also 1186 // use dashes to separate filters when spaces weren't wanted. 1187 if ((flags & LZMA_STR_GETOPT_LONG) 1188 || (i > 0 && (flags & LZMA_STR_NO_SPACES))) 1189 str_append_str(&dest, "--"); 1190 1191 size_t j = 0; 1192 while (true) { 1193 if (j == ARRAY_SIZE(filter_name_map)) { 1194 // Filter ID in filters[i].id isn't supported. 1195 str_free(&dest, allocator); 1196 return LZMA_OPTIONS_ERROR; 1197 } 1198 1199 if (filter_name_map[j].id == filters[i].id) { 1200 // Add the filter name. 1201 str_append_str(&dest, filter_name_map[j].name); 1202 1203 // If only the filter names were wanted then 1204 // skip to the next filter. In this case 1205 // .options is ignored and may be NULL even 1206 // when the filter doesn't allow NULL options. 1207 if (!show_opts) 1208 break; 1209 1210 if (filters[i].options == NULL) { 1211 if (!filter_name_map[j].allow_null) { 1212 // Filter-specific options 1213 // are missing but with 1214 // this filter the options 1215 // structure is mandatory. 1216 str_free(&dest, allocator); 1217 return LZMA_OPTIONS_ERROR; 1218 } 1219 1220 // .options is allowed to be NULL. 1221 // There is no need to add any 1222 // options to the string. 1223 break; 1224 } 1225 1226 // Options structure is available. Add 1227 // the filter options to the string. 1228 const size_t optmap_count 1229 = (flags & LZMA_STR_ENCODER) 1230 ? filter_name_map[j].strfy_encoder 1231 : filter_name_map[j].strfy_decoder; 1232 strfy_filter(&dest, opt_delim, 1233 filter_name_map[j].optmap, 1234 optmap_count, 1235 filters[i].options); 1236 break; 1237 } 1238 1239 ++j; 1240 } 1241 } 1242 1243 return str_finish(output_str, &dest, allocator); 1244 } 1245 1246 1247 extern LZMA_API(lzma_ret) 1248 lzma_str_list_filters(char **output_str, lzma_vli filter_id, uint32_t flags, 1249 const lzma_allocator *allocator) 1250 { 1251 // On error *output_str is always set to NULL. 1252 // Do it as the very first step. 1253 if (output_str == NULL) 1254 return LZMA_PROG_ERROR; 1255 1256 *output_str = NULL; 1257 1258 // Validate the flags. 1259 const uint32_t supported_flags 1260 = LZMA_STR_ALL_FILTERS 1261 | LZMA_STR_ENCODER 1262 | LZMA_STR_DECODER 1263 | LZMA_STR_GETOPT_LONG; 1264 1265 if (flags & ~supported_flags) 1266 return LZMA_OPTIONS_ERROR; 1267 1268 // Allocate memory for the output string. 1269 lzma_str dest; 1270 return_if_error(str_init(&dest, allocator)); 1271 1272 // If only listing the filter names then separate them with spaces. 1273 // Otherwise use newlines. 1274 const bool show_opts = (flags & (LZMA_STR_ENCODER | LZMA_STR_DECODER)); 1275 const char *filter_delim = show_opts ? "\n" : " "; 1276 1277 const char *opt_delim = (flags & LZMA_STR_GETOPT_LONG) ? "=" : ":"; 1278 bool first_filter_printed = false; 1279 1280 for (size_t i = 0; i < ARRAY_SIZE(filter_name_map); ++i) { 1281 // If we are printing only one filter then skip others. 1282 if (filter_id != LZMA_VLI_UNKNOWN 1283 && filter_id != filter_name_map[i].id) 1284 continue; 1285 1286 // If we are printing only .xz filters then skip the others. 1287 if (filter_name_map[i].id >= LZMA_FILTER_RESERVED_START 1288 && (flags & LZMA_STR_ALL_FILTERS) == 0 1289 && filter_id == LZMA_VLI_UNKNOWN) 1290 continue; 1291 1292 // Add a new line if this isn't the first filter being 1293 // written to the string. 1294 if (first_filter_printed) 1295 str_append_str(&dest, filter_delim); 1296 1297 first_filter_printed = true; 1298 1299 if (flags & LZMA_STR_GETOPT_LONG) 1300 str_append_str(&dest, "--"); 1301 1302 str_append_str(&dest, filter_name_map[i].name); 1303 1304 // If only the filter names were wanted then continue 1305 // to the next filter. 1306 if (!show_opts) 1307 continue; 1308 1309 const option_map *optmap = filter_name_map[i].optmap; 1310 const char *d = opt_delim; 1311 1312 const size_t end = (flags & LZMA_STR_ENCODER) 1313 ? filter_name_map[i].strfy_encoder 1314 : filter_name_map[i].strfy_decoder; 1315 1316 for (size_t j = 0; j < end; ++j) { 1317 // The first option is delimited from the filter 1318 // name using "=" or ":" and the rest of the options 1319 // are separated with ",". 1320 str_append_str(&dest, d); 1321 d = ","; 1322 1323 // optname=<possible_values> 1324 str_append_str(&dest, optmap[j].name); 1325 str_append_str(&dest, "=<"); 1326 1327 if (optmap[j].type == OPTMAP_TYPE_LZMA_PRESET) { 1328 // LZMA1/2 preset has its custom help string. 1329 str_append_str(&dest, LZMA12_PRESET_STR); 1330 } else if (optmap[j].flags 1331 & OPTMAP_USE_NAME_VALUE_MAP) { 1332 // Separate the possible option values by "|". 1333 const name_value_map *m = optmap[j].u.map; 1334 for (size_t k = 0; m[k].name[0] != '\0'; ++k) { 1335 if (k > 0) 1336 str_append_str(&dest, "|"); 1337 1338 str_append_str(&dest, m[k].name); 1339 } 1340 } else { 1341 // Integer range is shown as min-max. 1342 const bool use_byte_suffix = optmap[j].flags 1343 & OPTMAP_USE_BYTE_SUFFIX; 1344 str_append_u32(&dest, optmap[j].u.range.min, 1345 use_byte_suffix); 1346 str_append_str(&dest, "-"); 1347 str_append_u32(&dest, optmap[j].u.range.max, 1348 use_byte_suffix); 1349 } 1350 1351 str_append_str(&dest, ">"); 1352 } 1353 } 1354 1355 // If no filters were added to the string then it must be because 1356 // the caller provided an unsupported Filter ID. 1357 if (!first_filter_printed) { 1358 str_free(&dest, allocator); 1359 return LZMA_OPTIONS_ERROR; 1360 } 1361 1362 return str_finish(output_str, &dest, allocator); 1363 } 1364