xref: /freebsd/contrib/xz/src/liblzma/common/string_conversion.c (revision f126890ac5386406dadf7c4cfa9566cbb56537c5)
1 // SPDX-License-Identifier: 0BSD
2 
3 ///////////////////////////////////////////////////////////////////////////////
4 //
5 /// \file       string_conversion.c
6 /// \brief      Conversion of strings to filter chain and vice versa
7 //
8 //  Author:     Lasse Collin
9 //
10 ///////////////////////////////////////////////////////////////////////////////
11 
12 #include "filter_common.h"
13 
14 
15 /////////////////////
16 // String building //
17 /////////////////////
18 
19 /// How much memory to allocate for strings. For now, no realloc is used
20 /// so this needs to be big enough even though there of course is
21 /// an overflow check still.
22 ///
23 /// FIXME? Using a fixed size is wasteful if the application doesn't free
24 /// the string fairly quickly but this can be improved later if needed.
25 #define STR_ALLOC_SIZE 800
26 
27 
28 typedef struct {
29 	char *buf;
30 	size_t pos;
31 } lzma_str;
32 
33 
34 static lzma_ret
35 str_init(lzma_str *str, const lzma_allocator *allocator)
36 {
37 	str->buf = lzma_alloc(STR_ALLOC_SIZE, allocator);
38 	if (str->buf == NULL)
39 		return LZMA_MEM_ERROR;
40 
41 	str->pos = 0;
42 	return LZMA_OK;
43 }
44 
45 
46 static void
47 str_free(lzma_str *str, const lzma_allocator *allocator)
48 {
49 	lzma_free(str->buf, allocator);
50 	return;
51 }
52 
53 
54 static bool
55 str_is_full(const lzma_str *str)
56 {
57 	return str->pos == STR_ALLOC_SIZE - 1;
58 }
59 
60 
61 static lzma_ret
62 str_finish(char **dest, lzma_str *str, const lzma_allocator *allocator)
63 {
64 	if (str_is_full(str)) {
65 		// The preallocated buffer was too small.
66 		// This shouldn't happen as STR_ALLOC_SIZE should
67 		// be adjusted if new filters are added.
68 		lzma_free(str->buf, allocator);
69 		*dest = NULL;
70 		assert(0);
71 		return LZMA_PROG_ERROR;
72 	}
73 
74 	str->buf[str->pos] = '\0';
75 	*dest = str->buf;
76 	return LZMA_OK;
77 }
78 
79 
80 static void
81 str_append_str(lzma_str *str, const char *s)
82 {
83 	const size_t len = strlen(s);
84 	const size_t limit = STR_ALLOC_SIZE - 1 - str->pos;
85 	const size_t copy_size = my_min(len, limit);
86 
87 	memcpy(str->buf + str->pos, s, copy_size);
88 	str->pos += copy_size;
89 	return;
90 }
91 
92 
93 static void
94 str_append_u32(lzma_str *str, uint32_t v, bool use_byte_suffix)
95 {
96 	if (v == 0) {
97 		str_append_str(str, "0");
98 	} else {
99 		// NOTE: Don't use plain "B" because xz and the parser in this
100 		// file don't support it and at glance it may look like 8
101 		// (there cannot be a space before the suffix).
102 		static const char suffixes[4][4] = { "", "KiB", "MiB", "GiB" };
103 
104 		size_t suf = 0;
105 		if (use_byte_suffix) {
106 			while ((v & 1023) == 0
107 					&& suf < ARRAY_SIZE(suffixes) - 1) {
108 				v >>= 10;
109 				++suf;
110 			}
111 		}
112 
113 		// UINT32_MAX in base 10 would need 10 + 1 bytes. Remember
114 		// that initializing to "" initializes all elements to
115 		// zero so '\0'-termination gets handled by this.
116 		char buf[16] = "";
117 		size_t pos = sizeof(buf) - 1;
118 
119 		do {
120 			buf[--pos] = '0' + (v % 10);
121 			v /= 10;
122 		} while (v != 0);
123 
124 		str_append_str(str, buf + pos);
125 		str_append_str(str, suffixes[suf]);
126 	}
127 
128 	return;
129 }
130 
131 
132 //////////////////////////////////////////////
133 // Parsing and stringification declarations //
134 //////////////////////////////////////////////
135 
136 /// Maximum length for filter and option names.
137 /// 11 chars + terminating '\0' + sizeof(uint32_t) = 16 bytes
138 #define NAME_LEN_MAX 11
139 
140 
141 /// For option_map.flags: Use .u.map to do convert the input value
142 /// to an integer. Without this flag, .u.range.{min,max} are used
143 /// as the allowed range for the integer.
144 #define OPTMAP_USE_NAME_VALUE_MAP 0x01
145 
146 /// For option_map.flags: Allow KiB/MiB/GiB in input string and use them in
147 /// the stringified output if the value is an exact multiple of these.
148 /// This is used e.g. for LZMA1/2 dictionary size.
149 #define OPTMAP_USE_BYTE_SUFFIX 0x02
150 
151 /// For option_map.flags: If the integer value is zero then this option
152 /// won't be included in the stringified output. It's used e.g. for
153 /// BCJ filter start offset which usually is zero.
154 #define OPTMAP_NO_STRFY_ZERO 0x04
155 
156 /// Possible values for option_map.type. Since OPTMAP_TYPE_UINT32 is 0,
157 /// it doesn't need to be specified in the initializers as it is
158 /// the implicit value.
159 enum {
160 	OPTMAP_TYPE_UINT32,
161 	OPTMAP_TYPE_LZMA_MODE,
162 	OPTMAP_TYPE_LZMA_MATCH_FINDER,
163 	OPTMAP_TYPE_LZMA_PRESET,
164 };
165 
166 
167 /// This is for mapping string values in options to integers.
168 /// The last element of an array must have "" as the name.
169 /// It's used e.g. for match finder names in LZMA1/2.
170 typedef struct {
171 	const char name[NAME_LEN_MAX + 1];
172 	const uint32_t value;
173 } name_value_map;
174 
175 
176 /// Each filter that has options needs an array of option_map structures.
177 /// The array doesn't need to be terminated as the functions take the
178 /// length of the array as an argument.
179 ///
180 /// When converting a string to filter options structure, option values
181 /// will be handled in a few different ways:
182 ///
183 /// (1) If .type equals OPTMAP_TYPE_LZMA_PRESET then LZMA1/2 preset string
184 ///     is handled specially.
185 ///
186 /// (2) If .flags has OPTMAP_USE_NAME_VALUE_MAP set then the string is
187 ///     converted to an integer using the name_value_map pointed by .u.map.
188 ///     The last element in .u.map must have .name = "" as the terminator.
189 ///
190 /// (3) Otherwise the string is treated as a non-negative unsigned decimal
191 ///     integer which must be in the range set in .u.range. If .flags has
192 ///     OPTMAP_USE_BYTE_SUFFIX then KiB, MiB, and GiB suffixes are allowed.
193 ///
194 /// The integer value from (2) or (3) is then stored to filter_options
195 /// at the offset specified in .offset using the type specified in .type
196 /// (default is uint32_t).
197 ///
198 /// Stringifying a filter is done by processing a given number of options
199 /// in order from the beginning of an option_map array. The integer is
200 /// read from filter_options at .offset using the type from .type.
201 ///
202 /// If the integer is zero and .flags has OPTMAP_NO_STRFY_ZERO then the
203 /// option is skipped.
204 ///
205 /// If .flags has OPTMAP_USE_NAME_VALUE_MAP set then .u.map will be used
206 /// to convert the option to a string. If the map doesn't contain a string
207 /// for the integer value then "UNKNOWN" is used.
208 ///
209 /// If .flags doesn't have OPTMAP_USE_NAME_VALUE_MAP set then the integer is
210 /// converted to a decimal value. If OPTMAP_USE_BYTE_SUFFIX is used then KiB,
211 /// MiB, or GiB suffix is used if the value is an exact multiple of these.
212 /// Plain "B" suffix is never used.
213 typedef struct {
214 	char name[NAME_LEN_MAX + 1];
215 	uint8_t type;
216 	uint8_t flags;
217 	uint16_t offset;
218 
219 	union {
220 		struct {
221 			uint32_t min;
222 			uint32_t max;
223 		} range;
224 
225 		const name_value_map *map;
226 	} u;
227 } option_map;
228 
229 
230 static const char *parse_options(const char **const str, const char *str_end,
231 		void *filter_options,
232 		const option_map *const optmap, const size_t optmap_size);
233 
234 
235 /////////
236 // BCJ //
237 /////////
238 
239 #if defined(HAVE_ENCODER_X86) \
240 		|| defined(HAVE_DECODER_X86) \
241 		|| defined(HAVE_ENCODER_ARM) \
242 		|| defined(HAVE_DECODER_ARM) \
243 		|| defined(HAVE_ENCODER_ARMTHUMB) \
244 		|| defined(HAVE_DECODER_ARMTHUMB) \
245 		|| defined(HAVE_ENCODER_ARM64) \
246 		|| defined(HAVE_DECODER_ARM64) \
247 		|| defined(HAVE_ENCODER_POWERPC) \
248 		|| defined(HAVE_DECODER_POWERPC) \
249 		|| defined(HAVE_ENCODER_IA64) \
250 		|| defined(HAVE_DECODER_IA64) \
251 		|| defined(HAVE_ENCODER_SPARC) \
252 		|| defined(HAVE_DECODER_SPARC) \
253 		|| defined(HAVE_ENCODER_RISCV) \
254 		|| defined(HAVE_DECODER_RISCV)
255 static const option_map bcj_optmap[] = {
256 	{
257 		.name = "start",
258 		.flags = OPTMAP_NO_STRFY_ZERO | OPTMAP_USE_BYTE_SUFFIX,
259 		.offset = offsetof(lzma_options_bcj, start_offset),
260 		.u.range.min = 0,
261 		.u.range.max = UINT32_MAX,
262 	}
263 };
264 
265 
266 static const char *
267 parse_bcj(const char **const str, const char *str_end, void *filter_options)
268 {
269 	// filter_options was zeroed on allocation and that is enough
270 	// for the default value.
271 	return parse_options(str, str_end, filter_options,
272 			bcj_optmap, ARRAY_SIZE(bcj_optmap));
273 }
274 #endif
275 
276 
277 ///////////
278 // Delta //
279 ///////////
280 
281 #if defined(HAVE_ENCODER_DELTA) || defined(HAVE_DECODER_DELTA)
282 static const option_map delta_optmap[] = {
283 	{
284 		.name = "dist",
285 		.offset = offsetof(lzma_options_delta, dist),
286 		.u.range.min = LZMA_DELTA_DIST_MIN,
287 		.u.range.max = LZMA_DELTA_DIST_MAX,
288 	}
289 };
290 
291 
292 static const char *
293 parse_delta(const char **const str, const char *str_end, void *filter_options)
294 {
295 	lzma_options_delta *opts = filter_options;
296 	opts->type = LZMA_DELTA_TYPE_BYTE;
297 	opts->dist = LZMA_DELTA_DIST_MIN;
298 
299 	return parse_options(str, str_end, filter_options,
300 			delta_optmap, ARRAY_SIZE(delta_optmap));
301 }
302 #endif
303 
304 
305 ///////////////////
306 // LZMA1 & LZMA2 //
307 ///////////////////
308 
309 /// Help string for presets
310 #define LZMA12_PRESET_STR "0-9[e]"
311 
312 
313 static const char *
314 parse_lzma12_preset(const char **const str, const char *str_end,
315 		uint32_t *preset)
316 {
317 	assert(*str < str_end);
318 	*preset = (uint32_t)(**str - '0');
319 
320 	// NOTE: Remember to update LZMA12_PRESET_STR if this is modified!
321 	while (++*str < str_end) {
322 		switch (**str) {
323 		case 'e':
324 			*preset |= LZMA_PRESET_EXTREME;
325 			break;
326 
327 		default:
328 			return "Unsupported preset flag";
329 		}
330 	}
331 
332 	return NULL;
333 }
334 
335 
336 static const char *
337 set_lzma12_preset(const char **const str, const char *str_end,
338 		void *filter_options)
339 {
340 	uint32_t preset;
341 	const char *errmsg = parse_lzma12_preset(str, str_end, &preset);
342 	if (errmsg != NULL)
343 		return errmsg;
344 
345 	lzma_options_lzma *opts = filter_options;
346 	if (lzma_lzma_preset(opts, preset))
347 		return "Unsupported preset";
348 
349 	return NULL;
350 }
351 
352 
353 static const name_value_map lzma12_mode_map[] = {
354 	{ "fast",   LZMA_MODE_FAST },
355 	{ "normal", LZMA_MODE_NORMAL },
356 	{ "",       0 }
357 };
358 
359 
360 static const name_value_map lzma12_mf_map[] = {
361 	{ "hc3", LZMA_MF_HC3 },
362 	{ "hc4", LZMA_MF_HC4 },
363 	{ "bt2", LZMA_MF_BT2 },
364 	{ "bt3", LZMA_MF_BT3 },
365 	{ "bt4", LZMA_MF_BT4 },
366 	{ "",    0 }
367 };
368 
369 
370 static const option_map lzma12_optmap[] = {
371 	{
372 		.name = "preset",
373 		.type = OPTMAP_TYPE_LZMA_PRESET,
374 	}, {
375 		.name = "dict",
376 		.flags = OPTMAP_USE_BYTE_SUFFIX,
377 		.offset = offsetof(lzma_options_lzma, dict_size),
378 		.u.range.min = LZMA_DICT_SIZE_MIN,
379 		// FIXME? The max is really max for encoding but decoding
380 		// would allow 4 GiB - 1 B.
381 		.u.range.max = (UINT32_C(1) << 30) + (UINT32_C(1) << 29),
382 	}, {
383 		.name = "lc",
384 		.offset = offsetof(lzma_options_lzma, lc),
385 		.u.range.min = LZMA_LCLP_MIN,
386 		.u.range.max = LZMA_LCLP_MAX,
387 	}, {
388 		.name = "lp",
389 		.offset = offsetof(lzma_options_lzma, lp),
390 		.u.range.min = LZMA_LCLP_MIN,
391 		.u.range.max = LZMA_LCLP_MAX,
392 	}, {
393 		.name = "pb",
394 		.offset = offsetof(lzma_options_lzma, pb),
395 		.u.range.min = LZMA_PB_MIN,
396 		.u.range.max = LZMA_PB_MAX,
397 	}, {
398 		.name = "mode",
399 		.type = OPTMAP_TYPE_LZMA_MODE,
400 		.flags = OPTMAP_USE_NAME_VALUE_MAP,
401 		.offset = offsetof(lzma_options_lzma, mode),
402 		.u.map = lzma12_mode_map,
403 	}, {
404 		.name = "nice",
405 		.offset = offsetof(lzma_options_lzma, nice_len),
406 		.u.range.min = 2,
407 		.u.range.max = 273,
408 	}, {
409 		.name = "mf",
410 		.type = OPTMAP_TYPE_LZMA_MATCH_FINDER,
411 		.flags = OPTMAP_USE_NAME_VALUE_MAP,
412 		.offset = offsetof(lzma_options_lzma, mf),
413 		.u.map = lzma12_mf_map,
414 	}, {
415 		.name = "depth",
416 		.offset = offsetof(lzma_options_lzma, depth),
417 		.u.range.min = 0,
418 		.u.range.max = UINT32_MAX,
419 	}
420 };
421 
422 
423 static const char *
424 parse_lzma12(const char **const str, const char *str_end, void *filter_options)
425 {
426 	lzma_options_lzma *opts = filter_options;
427 
428 	// It cannot fail.
429 	const bool preset_ret = lzma_lzma_preset(opts, LZMA_PRESET_DEFAULT);
430 	assert(!preset_ret);
431 	(void)preset_ret;
432 
433 	const char *errmsg = parse_options(str, str_end, filter_options,
434 			lzma12_optmap, ARRAY_SIZE(lzma12_optmap));
435 	if (errmsg != NULL)
436 		return errmsg;
437 
438 	if (opts->lc + opts->lp > LZMA_LCLP_MAX)
439 		return "The sum of lc and lp must not exceed 4";
440 
441 	return NULL;
442 }
443 
444 
445 /////////////////////////////////////////
446 // Generic parsing and stringification //
447 /////////////////////////////////////////
448 
449 static const struct {
450 	/// Name of the filter
451 	char name[NAME_LEN_MAX + 1];
452 
453 	/// For lzma_str_to_filters:
454 	/// Size of the filter-specific options structure.
455 	uint32_t opts_size;
456 
457 	/// Filter ID
458 	lzma_vli id;
459 
460 	/// For lzma_str_to_filters:
461 	/// Function to parse the filter-specific options. The filter_options
462 	/// will already have been allocated using lzma_alloc_zero().
463 	const char *(*parse)(const char **str, const char *str_end,
464 			void *filter_options);
465 
466 	/// For lzma_str_from_filters:
467 	/// If the flag LZMA_STR_ENCODER is used then the first
468 	/// strfy_encoder elements of optmap are stringified.
469 	/// With LZMA_STR_DECODER strfy_decoder is used.
470 	/// Currently encoders use all options that decoders do but if
471 	/// that changes then this needs to be changed too, for example,
472 	/// add a new OPTMAP flag to skip printing some decoder-only options.
473 	const option_map *optmap;
474 	uint8_t strfy_encoder;
475 	uint8_t strfy_decoder;
476 
477 	/// For lzma_str_from_filters:
478 	/// If true, lzma_filter.options is allowed to be NULL. In that case,
479 	/// only the filter name is printed without any options.
480 	bool allow_null;
481 
482 } filter_name_map[] = {
483 #if defined (HAVE_ENCODER_LZMA1) || defined(HAVE_DECODER_LZMA1)
484 	{ "lzma1",        sizeof(lzma_options_lzma),  LZMA_FILTER_LZMA1,
485 	  &parse_lzma12,  lzma12_optmap, 9, 5, false },
486 #endif
487 
488 #if defined(HAVE_ENCODER_LZMA2) || defined(HAVE_DECODER_LZMA2)
489 	{ "lzma2",        sizeof(lzma_options_lzma),  LZMA_FILTER_LZMA2,
490 	  &parse_lzma12,  lzma12_optmap, 9, 2, false },
491 #endif
492 
493 #if defined(HAVE_ENCODER_X86) || defined(HAVE_DECODER_X86)
494 	{ "x86",          sizeof(lzma_options_bcj),   LZMA_FILTER_X86,
495 	  &parse_bcj,     bcj_optmap, 1, 1, true },
496 #endif
497 
498 #if defined(HAVE_ENCODER_ARM) || defined(HAVE_DECODER_ARM)
499 	{ "arm",          sizeof(lzma_options_bcj),   LZMA_FILTER_ARM,
500 	  &parse_bcj,     bcj_optmap, 1, 1, true },
501 #endif
502 
503 #if defined(HAVE_ENCODER_ARMTHUMB) || defined(HAVE_DECODER_ARMTHUMB)
504 	{ "armthumb",     sizeof(lzma_options_bcj),   LZMA_FILTER_ARMTHUMB,
505 	  &parse_bcj,     bcj_optmap, 1, 1, true },
506 #endif
507 
508 #if defined(HAVE_ENCODER_ARM64) || defined(HAVE_DECODER_ARM64)
509 	{ "arm64",        sizeof(lzma_options_bcj),   LZMA_FILTER_ARM64,
510 	  &parse_bcj,     bcj_optmap, 1, 1, true },
511 #endif
512 
513 #if defined(HAVE_ENCODER_RISCV) || defined(HAVE_DECODER_RISCV)
514 	{ "riscv",        sizeof(lzma_options_bcj),   LZMA_FILTER_RISCV,
515 	  &parse_bcj,     bcj_optmap, 1, 1, true },
516 #endif
517 
518 #if defined(HAVE_ENCODER_POWERPC) || defined(HAVE_DECODER_POWERPC)
519 	{ "powerpc",      sizeof(lzma_options_bcj),   LZMA_FILTER_POWERPC,
520 	  &parse_bcj,     bcj_optmap, 1, 1, true },
521 #endif
522 
523 #if defined(HAVE_ENCODER_IA64) || defined(HAVE_DECODER_IA64)
524 	{ "ia64",         sizeof(lzma_options_bcj),   LZMA_FILTER_IA64,
525 	  &parse_bcj,     bcj_optmap, 1, 1, true },
526 #endif
527 
528 #if defined(HAVE_ENCODER_SPARC) || defined(HAVE_DECODER_SPARC)
529 	{ "sparc",        sizeof(lzma_options_bcj),   LZMA_FILTER_SPARC,
530 	  &parse_bcj,     bcj_optmap, 1, 1, true },
531 #endif
532 
533 #if defined(HAVE_ENCODER_DELTA) || defined(HAVE_DECODER_DELTA)
534 	{ "delta",        sizeof(lzma_options_delta), LZMA_FILTER_DELTA,
535 	  &parse_delta,   delta_optmap, 1, 1, false },
536 #endif
537 };
538 
539 
540 /// Decodes options from a string for one filter (name1=value1,name2=value2).
541 /// Caller must have allocated memory for filter_options already and set
542 /// the initial default values. This is called from the filter-specific
543 /// parse_* functions.
544 ///
545 /// The input string starts at *str and the address in str_end is the first
546 /// char that is not part of the string anymore. So no '\0' terminator is
547 /// used. *str is advanced every time something has been decoded successfully.
548 static const char *
549 parse_options(const char **const str, const char *str_end,
550 		void *filter_options,
551 		const option_map *const optmap, const size_t optmap_size)
552 {
553 	while (*str < str_end && **str != '\0') {
554 		// Each option is of the form name=value.
555 		// Commas (',') separate options. Extra commas are ignored.
556 		// Ignoring extra commas makes it simpler if an optional
557 		// option stored in a shell variable which can be empty.
558 		if (**str == ',') {
559 			++*str;
560 			continue;
561 		}
562 
563 		// Find where the next name=value ends.
564 		const size_t str_len = (size_t)(str_end - *str);
565 		const char *name_eq_value_end = memchr(*str, ',', str_len);
566 		if (name_eq_value_end == NULL)
567 			name_eq_value_end = str_end;
568 
569 		const char *equals_sign = memchr(*str, '=',
570 				(size_t)(name_eq_value_end - *str));
571 
572 		// Fail if the '=' wasn't found or the option name is missing
573 		// (the first char is '=').
574 		if (equals_sign == NULL || **str == '=')
575 			return "Options must be 'name=value' pairs separated "
576 					"with commas";
577 
578 		// Reject a too long option name so that the memcmp()
579 		// in the loop below won't read past the end of the
580 		// string in optmap[i].name.
581 		const size_t name_len = (size_t)(equals_sign - *str);
582 		if (name_len > NAME_LEN_MAX)
583 			return "Unknown option name";
584 
585 		// Find the option name from optmap[].
586 		size_t i = 0;
587 		while (true) {
588 			if (i == optmap_size)
589 				return "Unknown option name";
590 
591 			if (memcmp(*str, optmap[i].name, name_len) == 0
592 					&& optmap[i].name[name_len] == '\0')
593 				break;
594 
595 			++i;
596 		}
597 
598 		// The input string is good at least until the start of
599 		// the option value.
600 		*str = equals_sign + 1;
601 
602 		// The code assumes that the option value isn't an empty
603 		// string so check it here.
604 		const size_t value_len = (size_t)(name_eq_value_end - *str);
605 		if (value_len == 0)
606 			return "Option value cannot be empty";
607 
608 		// LZMA1/2 preset has its own parsing function.
609 		if (optmap[i].type == OPTMAP_TYPE_LZMA_PRESET) {
610 			const char *errmsg = set_lzma12_preset(str,
611 					name_eq_value_end, filter_options);
612 			if (errmsg != NULL)
613 				return errmsg;
614 
615 			continue;
616 		}
617 
618 		// It's an integer value.
619 		uint32_t v;
620 		if (optmap[i].flags & OPTMAP_USE_NAME_VALUE_MAP) {
621 			// The integer is picked from a string-to-integer map.
622 			//
623 			// Reject a too long value string so that the memcmp()
624 			// in the loop below won't read past the end of the
625 			// string in optmap[i].u.map[j].name.
626 			if (value_len > NAME_LEN_MAX)
627 				return "Invalid option value";
628 
629 			const name_value_map *map = optmap[i].u.map;
630 			size_t j = 0;
631 			while (true) {
632 				// The array is terminated with an empty name.
633 				if (map[j].name[0] == '\0')
634 					return "Invalid option value";
635 
636 				if (memcmp(*str, map[j].name, value_len) == 0
637 						&& map[j].name[value_len]
638 							== '\0') {
639 					v = map[j].value;
640 					break;
641 				}
642 
643 				++j;
644 			}
645 		} else if (**str < '0' || **str > '9') {
646 			// Note that "max" isn't supported while it is
647 			// supported in xz. It's not useful here.
648 			return "Value is not a non-negative decimal integer";
649 		} else {
650 			// strtoul() has locale-specific behavior so it cannot
651 			// be relied on to get reproducible results since we
652 			// cannot change the locate in a thread-safe library.
653 			// It also needs '\0'-termination.
654 			//
655 			// Use a temporary pointer so that *str will point
656 			// to the beginning of the value string in case
657 			// an error occurs.
658 			const char *p = *str;
659 			v = 0;
660 			do {
661 				if (v > UINT32_MAX / 10)
662 					return "Value out of range";
663 
664 				v *= 10;
665 
666 				const uint32_t add = (uint32_t)(*p - '0');
667 				if (UINT32_MAX - add < v)
668 					return "Value out of range";
669 
670 				v += add;
671 				++p;
672 			} while (p < name_eq_value_end
673 					&& *p >= '0' && *p <= '9');
674 
675 			if (p < name_eq_value_end) {
676 				// Remember this position so that it can be
677 				// used for error messages that are
678 				// specifically about the suffix. (Out of
679 				// range values are about the whole value
680 				// and those error messages point to the
681 				// beginning of the number part,
682 				// not to the suffix.)
683 				const char *multiplier_start = p;
684 
685 				// If multiplier suffix shouldn't be used
686 				// then don't allow them even if the value
687 				// would stay within limits. This is a somewhat
688 				// unnecessary check but it rejects silly
689 				// things like lzma2:pb=0MiB which xz allows.
690 				if ((optmap[i].flags & OPTMAP_USE_BYTE_SUFFIX)
691 						== 0) {
692 					*str = multiplier_start;
693 					return "This option does not support "
694 						"any integer suffixes";
695 				}
696 
697 				uint32_t shift;
698 
699 				switch (*p) {
700 				case 'k':
701 				case 'K':
702 					shift = 10;
703 					break;
704 
705 				case 'm':
706 				case 'M':
707 					shift = 20;
708 					break;
709 
710 				case 'g':
711 				case 'G':
712 					shift = 30;
713 					break;
714 
715 				default:
716 					*str = multiplier_start;
717 					return "Invalid multiplier suffix "
718 							"(KiB, MiB, or GiB)";
719 				}
720 
721 				++p;
722 
723 				// Allow "M", "Mi", "MB", "MiB" and the same
724 				// for the other five characters from the
725 				// switch-statement above. All are handled
726 				// as base-2 (perhaps a mistake, perhaps not).
727 				// Note that 'i' and 'B' are case sensitive.
728 				if (p < name_eq_value_end && *p == 'i')
729 					++p;
730 
731 				if (p < name_eq_value_end && *p == 'B')
732 					++p;
733 
734 				// Now we must have no chars remaining.
735 				if (p < name_eq_value_end) {
736 					*str = multiplier_start;
737 					return "Invalid multiplier suffix "
738 							"(KiB, MiB, or GiB)";
739 				}
740 
741 				if (v > (UINT32_MAX >> shift))
742 					return "Value out of range";
743 
744 				v <<= shift;
745 			}
746 
747 			if (v < optmap[i].u.range.min
748 					|| v > optmap[i].u.range.max)
749 				return "Value out of range";
750 		}
751 
752 		// Set the value in filter_options. Enums are handled
753 		// specially since the underlying type isn't the same
754 		// as uint32_t on all systems.
755 		void *ptr = (char *)filter_options + optmap[i].offset;
756 		switch (optmap[i].type) {
757 		case OPTMAP_TYPE_LZMA_MODE:
758 			*(lzma_mode *)ptr = (lzma_mode)v;
759 			break;
760 
761 		case OPTMAP_TYPE_LZMA_MATCH_FINDER:
762 			*(lzma_match_finder *)ptr = (lzma_match_finder)v;
763 			break;
764 
765 		default:
766 			*(uint32_t *)ptr = v;
767 			break;
768 		}
769 
770 		// This option has been successfully handled.
771 		*str = name_eq_value_end;
772 	}
773 
774 	// No errors.
775 	return NULL;
776 }
777 
778 
779 /// Finds the name of the filter at the beginning of the string and
780 /// calls filter_name_map[i].parse() to decode the filter-specific options.
781 /// The caller must have set str_end so that exactly one filter and its
782 /// options are present without any trailing characters.
783 static const char *
784 parse_filter(const char **const str, const char *str_end, lzma_filter *filter,
785 		const lzma_allocator *allocator, bool only_xz)
786 {
787 	// Search for a colon or equals sign that would separate the filter
788 	// name from filter options. If neither is found, then the input
789 	// string only contains a filter name and there are no options.
790 	//
791 	// First assume that a colon or equals sign won't be found:
792 	const char *name_end = str_end;
793 	const char *opts_start = str_end;
794 
795 	for (const char *p = *str; p < str_end; ++p) {
796 		if (*p == ':' || *p == '=') {
797 			name_end = p;
798 
799 			// Filter options (name1=value1,name2=value2,...)
800 			// begin after the colon or equals sign.
801 			opts_start = p + 1;
802 			break;
803 		}
804 	}
805 
806 	// Reject a too long filter name so that the memcmp()
807 	// in the loop below won't read past the end of the
808 	// string in filter_name_map[i].name.
809 	const size_t name_len = (size_t)(name_end - *str);
810 	if (name_len > NAME_LEN_MAX)
811 		return "Unknown filter name";
812 
813 	for (size_t i = 0; i < ARRAY_SIZE(filter_name_map); ++i) {
814 		if (memcmp(*str, filter_name_map[i].name, name_len) == 0
815 				&& filter_name_map[i].name[name_len] == '\0') {
816 			if (only_xz && filter_name_map[i].id
817 					>= LZMA_FILTER_RESERVED_START)
818 				return "This filter cannot be used in "
819 						"the .xz format";
820 
821 			// Allocate the filter-specific options and
822 			// initialize the memory with zeros.
823 			void *options = lzma_alloc_zero(
824 					filter_name_map[i].opts_size,
825 					allocator);
826 			if (options == NULL)
827 				return "Memory allocation failed";
828 
829 			// Filter name was found so the input string is good
830 			// at least this far.
831 			*str = opts_start;
832 
833 			const char *errmsg = filter_name_map[i].parse(
834 					str, str_end, options);
835 			if (errmsg != NULL) {
836 				lzma_free(options, allocator);
837 				return errmsg;
838 			}
839 
840 			// *filter is modified only when parsing is successful.
841 			filter->id = filter_name_map[i].id;
842 			filter->options = options;
843 			return NULL;
844 		}
845 	}
846 
847 	return "Unknown filter name";
848 }
849 
850 
851 /// Converts the string to a filter chain (array of lzma_filter structures).
852 ///
853 /// *str is advanced every time something has been decoded successfully.
854 /// This way the caller knows where in the string a possible error occurred.
855 static const char *
856 str_to_filters(const char **const str, lzma_filter *filters, uint32_t flags,
857 		const lzma_allocator *allocator)
858 {
859 	const char *errmsg;
860 
861 	// Skip leading spaces.
862 	while (**str == ' ')
863 		++*str;
864 
865 	if (**str == '\0')
866 		return "Empty string is not allowed, "
867 				"try \"6\" if a default value is needed";
868 
869 	// Detect the type of the string.
870 	//
871 	// A string beginning with a digit or a string beginning with
872 	// one dash and a digit are treated as presets. Trailing spaces
873 	// will be ignored too (leading spaces were already ignored above).
874 	//
875 	// For example, "6", "7  ", "-9e", or "  -3  " are treated as presets.
876 	// Strings like "-" or "- " aren't preset.
877 #define MY_IS_DIGIT(c) ((c) >= '0' && (c) <= '9')
878 	if (MY_IS_DIGIT(**str) || (**str == '-' && MY_IS_DIGIT((*str)[1]))) {
879 		if (**str == '-')
880 			++*str;
881 
882 		// Ignore trailing spaces.
883 		const size_t str_len = strlen(*str);
884 		const char *str_end = memchr(*str, ' ', str_len);
885 		if (str_end != NULL) {
886 			// There is at least one trailing space. Check that
887 			// there are no chars other than spaces.
888 			for (size_t i = 1; str_end[i] != '\0'; ++i)
889 				if (str_end[i] != ' ')
890 					return "Unsupported preset";
891 		} else {
892 			// There are no trailing spaces. Use the whole string.
893 			str_end = *str + str_len;
894 		}
895 
896 		uint32_t preset;
897 		errmsg = parse_lzma12_preset(str, str_end, &preset);
898 		if (errmsg != NULL)
899 			return errmsg;
900 
901 		lzma_options_lzma *opts = lzma_alloc(sizeof(*opts), allocator);
902 		if (opts == NULL)
903 			return "Memory allocation failed";
904 
905 		if (lzma_lzma_preset(opts, preset)) {
906 			lzma_free(opts, allocator);
907 			return "Unsupported preset";
908 		}
909 
910 		filters[0].id = LZMA_FILTER_LZMA2;
911 		filters[0].options = opts;
912 		filters[1].id = LZMA_VLI_UNKNOWN;
913 		filters[1].options = NULL;
914 
915 		return NULL;
916 	}
917 
918 	// Not a preset so it must be a filter chain.
919 	//
920 	// If LZMA_STR_ALL_FILTERS isn't used we allow only filters that
921 	// can be used in .xz.
922 	const bool only_xz = (flags & LZMA_STR_ALL_FILTERS) == 0;
923 
924 	// Use a temporary array so that we don't modify the caller-supplied
925 	// one until we know that no errors occurred.
926 	lzma_filter temp_filters[LZMA_FILTERS_MAX + 1];
927 
928 	size_t i = 0;
929 	do {
930 		if (i == LZMA_FILTERS_MAX) {
931 			errmsg = "The maximum number of filters is four";
932 			goto error;
933 		}
934 
935 		// Skip "--" if present.
936 		if ((*str)[0] == '-' && (*str)[1] == '-')
937 			*str += 2;
938 
939 		// Locate the end of "filter:name1=value1,name2=value2",
940 		// stopping at the first "--" or a single space.
941 		const char *filter_end = *str;
942 		while (filter_end[0] != '\0') {
943 			if ((filter_end[0] == '-' && filter_end[1] == '-')
944 					|| filter_end[0] == ' ')
945 				break;
946 
947 			++filter_end;
948 		}
949 
950 		// Inputs that have "--" at the end or "-- " in the middle
951 		// will result in an empty filter name.
952 		if (filter_end == *str) {
953 			errmsg = "Filter name is missing";
954 			goto error;
955 		}
956 
957 		errmsg = parse_filter(str, filter_end, &temp_filters[i],
958 				allocator, only_xz);
959 		if (errmsg != NULL)
960 			goto error;
961 
962 		// Skip trailing spaces.
963 		while (**str == ' ')
964 			++*str;
965 
966 		++i;
967 	} while (**str != '\0');
968 
969 	// Seems to be good, terminate the array so that
970 	// basic validation can be done.
971 	temp_filters[i].id = LZMA_VLI_UNKNOWN;
972 	temp_filters[i].options = NULL;
973 
974 	// Do basic validation if the application didn't prohibit it.
975 	if ((flags & LZMA_STR_NO_VALIDATION) == 0) {
976 		size_t dummy;
977 		const lzma_ret ret = lzma_validate_chain(temp_filters, &dummy);
978 		assert(ret == LZMA_OK || ret == LZMA_OPTIONS_ERROR);
979 		if (ret != LZMA_OK) {
980 			errmsg = "Invalid filter chain "
981 					"('lzma2' missing at the end?)";
982 			goto error;
983 		}
984 	}
985 
986 	// All good. Copy the filters to the application supplied array.
987 	memcpy(filters, temp_filters, (i + 1) * sizeof(lzma_filter));
988 	return NULL;
989 
990 error:
991 	// Free the filter options that were successfully decoded.
992 	while (i-- > 0)
993 		lzma_free(temp_filters[i].options, allocator);
994 
995 	return errmsg;
996 }
997 
998 
999 extern LZMA_API(const char *)
1000 lzma_str_to_filters(const char *str, int *error_pos, lzma_filter *filters,
1001 		uint32_t flags, const lzma_allocator *allocator)
1002 {
1003 	if (str == NULL || filters == NULL)
1004 		return "Unexpected NULL pointer argument(s) "
1005 				"to lzma_str_to_filters()";
1006 
1007 	// Validate the flags.
1008 	const uint32_t supported_flags
1009 			= LZMA_STR_ALL_FILTERS
1010 			| LZMA_STR_NO_VALIDATION;
1011 
1012 	if (flags & ~supported_flags)
1013 		return "Unsupported flags to lzma_str_to_filters()";
1014 
1015 	const char *used = str;
1016 	const char *errmsg = str_to_filters(&used, filters, flags, allocator);
1017 
1018 	if (error_pos != NULL) {
1019 		const size_t n = (size_t)(used - str);
1020 		*error_pos = n > INT_MAX ? INT_MAX : (int)n;
1021 	}
1022 
1023 	return errmsg;
1024 }
1025 
1026 
1027 /// Converts options of one filter to a string.
1028 ///
1029 /// The caller must have already put the filter name in the destination
1030 /// string. Since it is possible that no options will be needed, the caller
1031 /// won't have put a delimiter character (':' or '=') in the string yet.
1032 /// We will add it if at least one option will be added to the string.
1033 static void
1034 strfy_filter(lzma_str *dest, const char *delimiter,
1035 		const option_map *optmap, size_t optmap_count,
1036 		const void *filter_options)
1037 {
1038 	for (size_t i = 0; i < optmap_count; ++i) {
1039 		// No attempt is made to reverse LZMA1/2 preset.
1040 		if (optmap[i].type == OPTMAP_TYPE_LZMA_PRESET)
1041 			continue;
1042 
1043 		// All options have integer values, some just are mapped
1044 		// to a string with a name_value_map. LZMA1/2 preset
1045 		// isn't reversed back to preset=PRESET form.
1046 		uint32_t v;
1047 		const void *ptr
1048 			= (const char *)filter_options + optmap[i].offset;
1049 		switch (optmap[i].type) {
1050 			case OPTMAP_TYPE_LZMA_MODE:
1051 				v = *(const lzma_mode *)ptr;
1052 				break;
1053 
1054 			case OPTMAP_TYPE_LZMA_MATCH_FINDER:
1055 				v = *(const lzma_match_finder *)ptr;
1056 				break;
1057 
1058 			default:
1059 				v = *(const uint32_t *)ptr;
1060 				break;
1061 		}
1062 
1063 		// Skip this if this option should be omitted from
1064 		// the string when the value is zero.
1065 		if (v == 0 && (optmap[i].flags & OPTMAP_NO_STRFY_ZERO))
1066 			continue;
1067 
1068 		// Before the first option we add whatever delimiter
1069 		// the caller gave us. For later options a comma is used.
1070 		str_append_str(dest, delimiter);
1071 		delimiter = ",";
1072 
1073 		// Add the option name and equals sign.
1074 		str_append_str(dest, optmap[i].name);
1075 		str_append_str(dest, "=");
1076 
1077 		if (optmap[i].flags & OPTMAP_USE_NAME_VALUE_MAP) {
1078 			const name_value_map *map = optmap[i].u.map;
1079 			size_t j = 0;
1080 			while (true) {
1081 				if (map[j].name[0] == '\0') {
1082 					str_append_str(dest, "UNKNOWN");
1083 					break;
1084 				}
1085 
1086 				if (map[j].value == v) {
1087 					str_append_str(dest, map[j].name);
1088 					break;
1089 				}
1090 
1091 				++j;
1092 			}
1093 		} else {
1094 			str_append_u32(dest, v,
1095 				optmap[i].flags & OPTMAP_USE_BYTE_SUFFIX);
1096 		}
1097 	}
1098 
1099 	return;
1100 }
1101 
1102 
1103 extern LZMA_API(lzma_ret)
1104 lzma_str_from_filters(char **output_str, const lzma_filter *filters,
1105 		uint32_t flags, const lzma_allocator *allocator)
1106 {
1107 	// On error *output_str is always set to NULL.
1108 	// Do it as the very first step.
1109 	if (output_str == NULL)
1110 		return LZMA_PROG_ERROR;
1111 
1112 	*output_str = NULL;
1113 
1114 	if (filters == NULL)
1115 		return LZMA_PROG_ERROR;
1116 
1117 	// Validate the flags.
1118 	const uint32_t supported_flags
1119 			= LZMA_STR_ENCODER
1120 			| LZMA_STR_DECODER
1121 			| LZMA_STR_GETOPT_LONG
1122 			| LZMA_STR_NO_SPACES;
1123 
1124 	if (flags & ~supported_flags)
1125 		return LZMA_OPTIONS_ERROR;
1126 
1127 	// There must be at least one filter.
1128 	if (filters[0].id == LZMA_VLI_UNKNOWN)
1129 		return LZMA_OPTIONS_ERROR;
1130 
1131 	// Allocate memory for the output string.
1132 	lzma_str dest;
1133 	return_if_error(str_init(&dest, allocator));
1134 
1135 	const bool show_opts = (flags & (LZMA_STR_ENCODER | LZMA_STR_DECODER));
1136 
1137 	const char *opt_delim = (flags & LZMA_STR_GETOPT_LONG) ? "=" : ":";
1138 
1139 	for (size_t i = 0; filters[i].id != LZMA_VLI_UNKNOWN; ++i) {
1140 		// If we reach LZMA_FILTERS_MAX, then the filters array
1141 		// is too large since the ID cannot be LZMA_VLI_UNKNOWN here.
1142 		if (i == LZMA_FILTERS_MAX) {
1143 			str_free(&dest, allocator);
1144 			return LZMA_OPTIONS_ERROR;
1145 		}
1146 
1147 		// Don't add a space between filters if the caller
1148 		// doesn't want them.
1149 		if (i > 0 && !(flags & LZMA_STR_NO_SPACES))
1150 			str_append_str(&dest, " ");
1151 
1152 		// Use dashes for xz getopt_long() compatible syntax but also
1153 		// use dashes to separate filters when spaces weren't wanted.
1154 		if ((flags & LZMA_STR_GETOPT_LONG)
1155 				|| (i > 0 && (flags & LZMA_STR_NO_SPACES)))
1156 			str_append_str(&dest, "--");
1157 
1158 		size_t j = 0;
1159 		while (true) {
1160 			if (j == ARRAY_SIZE(filter_name_map)) {
1161 				// Filter ID in filters[i].id isn't supported.
1162 				str_free(&dest, allocator);
1163 				return LZMA_OPTIONS_ERROR;
1164 			}
1165 
1166 			if (filter_name_map[j].id == filters[i].id) {
1167 				// Add the filter name.
1168 				str_append_str(&dest, filter_name_map[j].name);
1169 
1170 				// If only the filter names were wanted then
1171 				// skip to the next filter. In this case
1172 				// .options is ignored and may be NULL even
1173 				// when the filter doesn't allow NULL options.
1174 				if (!show_opts)
1175 					break;
1176 
1177 				if (filters[i].options == NULL) {
1178 					if (!filter_name_map[j].allow_null) {
1179 						// Filter-specific options
1180 						// are missing but with
1181 						// this filter the options
1182 						// structure is mandatory.
1183 						str_free(&dest, allocator);
1184 						return LZMA_OPTIONS_ERROR;
1185 					}
1186 
1187 					// .options is allowed to be NULL.
1188 					// There is no need to add any
1189 					// options to the string.
1190 					break;
1191 				}
1192 
1193 				// Options structure is available. Add
1194 				// the filter options to the string.
1195 				const size_t optmap_count
1196 					= (flags & LZMA_STR_ENCODER)
1197 					? filter_name_map[j].strfy_encoder
1198 					: filter_name_map[j].strfy_decoder;
1199 				strfy_filter(&dest, opt_delim,
1200 						filter_name_map[j].optmap,
1201 						optmap_count,
1202 						filters[i].options);
1203 				break;
1204 			}
1205 
1206 			++j;
1207 		}
1208 	}
1209 
1210 	return str_finish(output_str, &dest, allocator);
1211 }
1212 
1213 
1214 extern LZMA_API(lzma_ret)
1215 lzma_str_list_filters(char **output_str, lzma_vli filter_id, uint32_t flags,
1216 		const lzma_allocator *allocator)
1217 {
1218 	// On error *output_str is always set to NULL.
1219 	// Do it as the very first step.
1220 	if (output_str == NULL)
1221 		return LZMA_PROG_ERROR;
1222 
1223 	*output_str = NULL;
1224 
1225 	// Validate the flags.
1226 	const uint32_t supported_flags
1227 			= LZMA_STR_ALL_FILTERS
1228 			| LZMA_STR_ENCODER
1229 			| LZMA_STR_DECODER
1230 			| LZMA_STR_GETOPT_LONG;
1231 
1232 	if (flags & ~supported_flags)
1233 		return LZMA_OPTIONS_ERROR;
1234 
1235 	// Allocate memory for the output string.
1236 	lzma_str dest;
1237 	return_if_error(str_init(&dest, allocator));
1238 
1239 	// If only listing the filter names then separate them with spaces.
1240 	// Otherwise use newlines.
1241 	const bool show_opts = (flags & (LZMA_STR_ENCODER | LZMA_STR_DECODER));
1242 	const char *filter_delim = show_opts ? "\n" : " ";
1243 
1244 	const char *opt_delim = (flags & LZMA_STR_GETOPT_LONG) ? "=" : ":";
1245 	bool first_filter_printed = false;
1246 
1247 	for (size_t i = 0; i < ARRAY_SIZE(filter_name_map); ++i) {
1248 		// If we are printing only one filter then skip others.
1249 		if (filter_id != LZMA_VLI_UNKNOWN
1250 				&& filter_id != filter_name_map[i].id)
1251 			continue;
1252 
1253 		// If we are printing only .xz filters then skip the others.
1254 		if (filter_name_map[i].id >= LZMA_FILTER_RESERVED_START
1255 				&& (flags & LZMA_STR_ALL_FILTERS) == 0
1256 				&& filter_id == LZMA_VLI_UNKNOWN)
1257 			continue;
1258 
1259 		// Add a new line if this isn't the first filter being
1260 		// written to the string.
1261 		if (first_filter_printed)
1262 			str_append_str(&dest, filter_delim);
1263 
1264 		first_filter_printed = true;
1265 
1266 		if (flags & LZMA_STR_GETOPT_LONG)
1267 			str_append_str(&dest, "--");
1268 
1269 		str_append_str(&dest, filter_name_map[i].name);
1270 
1271 		// If only the filter names were wanted then continue
1272 		// to the next filter.
1273 		if (!show_opts)
1274 			continue;
1275 
1276 		const option_map *optmap = filter_name_map[i].optmap;
1277 		const char *d = opt_delim;
1278 
1279 		const size_t end = (flags & LZMA_STR_ENCODER)
1280 				? filter_name_map[i].strfy_encoder
1281 				: filter_name_map[i].strfy_decoder;
1282 
1283 		for (size_t j = 0; j < end; ++j) {
1284 			// The first option is delimited from the filter
1285 			// name using "=" or ":" and the rest of the options
1286 			// are separated with ",".
1287 			str_append_str(&dest, d);
1288 			d = ",";
1289 
1290 			// optname=<possible_values>
1291 			str_append_str(&dest, optmap[j].name);
1292 			str_append_str(&dest, "=<");
1293 
1294 			if (optmap[j].type == OPTMAP_TYPE_LZMA_PRESET) {
1295 				// LZMA1/2 preset has its custom help string.
1296 				str_append_str(&dest, LZMA12_PRESET_STR);
1297 			} else if (optmap[j].flags
1298 					& OPTMAP_USE_NAME_VALUE_MAP) {
1299 				// Separate the possible option values by "|".
1300 				const name_value_map *m = optmap[j].u.map;
1301 				for (size_t k = 0; m[k].name[0] != '\0'; ++k) {
1302 					if (k > 0)
1303 						str_append_str(&dest, "|");
1304 
1305 					str_append_str(&dest, m[k].name);
1306 				}
1307 			} else {
1308 				// Integer range is shown as min-max.
1309 				const bool use_byte_suffix = optmap[j].flags
1310 						& OPTMAP_USE_BYTE_SUFFIX;
1311 				str_append_u32(&dest, optmap[j].u.range.min,
1312 						use_byte_suffix);
1313 				str_append_str(&dest, "-");
1314 				str_append_u32(&dest, optmap[j].u.range.max,
1315 						use_byte_suffix);
1316 			}
1317 
1318 			str_append_str(&dest, ">");
1319 		}
1320 	}
1321 
1322 	// If no filters were added to the string then it must be because
1323 	// the caller provided an unsupported Filter ID.
1324 	if (!first_filter_printed) {
1325 		str_free(&dest, allocator);
1326 		return LZMA_OPTIONS_ERROR;
1327 	}
1328 
1329 	return str_finish(output_str, &dest, allocator);
1330 }
1331