xref: /freebsd/contrib/xz/src/liblzma/common/string_conversion.c (revision 2e3507c25e42292b45a5482e116d278f5515d04d)
1 ///////////////////////////////////////////////////////////////////////////////
2 //
3 /// \file       string_conversion.c
4 /// \brief      Conversion of strings to filter chain and vice versa
5 //
6 //  Author:     Lasse Collin
7 //
8 //  This file has been put into the public domain.
9 //  You can do whatever you want with this file.
10 //
11 ///////////////////////////////////////////////////////////////////////////////
12 
13 #include "filter_common.h"
14 
15 
16 /////////////////////
17 // String building //
18 /////////////////////
19 
20 /// How much memory to allocate for strings. For now, no realloc is used
21 /// so this needs to be big enough even though there of course is
22 /// an overflow check still.
23 ///
24 /// FIXME? Using a fixed size is wasteful if the application doesn't free
25 /// the string fairly quickly but this can be improved later if needed.
26 #define STR_ALLOC_SIZE 800
27 
28 
29 typedef struct {
30 	char *buf;
31 	size_t pos;
32 } lzma_str;
33 
34 
35 static lzma_ret
36 str_init(lzma_str *str, const lzma_allocator *allocator)
37 {
38 	str->buf = lzma_alloc(STR_ALLOC_SIZE, allocator);
39 	if (str->buf == NULL)
40 		return LZMA_MEM_ERROR;
41 
42 	str->pos = 0;
43 	return LZMA_OK;
44 }
45 
46 
47 static void
48 str_free(lzma_str *str, const lzma_allocator *allocator)
49 {
50 	lzma_free(str->buf, allocator);
51 	return;
52 }
53 
54 
55 static bool
56 str_is_full(const lzma_str *str)
57 {
58 	return str->pos == STR_ALLOC_SIZE - 1;
59 }
60 
61 
62 static lzma_ret
63 str_finish(char **dest, lzma_str *str, const lzma_allocator *allocator)
64 {
65 	if (str_is_full(str)) {
66 		// The preallocated buffer was too small.
67 		// This shouldn't happen as STR_ALLOC_SIZE should
68 		// be adjusted if new filters are added.
69 		lzma_free(str->buf, allocator);
70 		*dest = NULL;
71 		assert(0);
72 		return LZMA_PROG_ERROR;
73 	}
74 
75 	str->buf[str->pos] = '\0';
76 	*dest = str->buf;
77 	return LZMA_OK;
78 }
79 
80 
81 static void
82 str_append_str(lzma_str *str, const char *s)
83 {
84 	const size_t len = strlen(s);
85 	const size_t limit = STR_ALLOC_SIZE - 1 - str->pos;
86 	const size_t copy_size = my_min(len, limit);
87 
88 	memcpy(str->buf + str->pos, s, copy_size);
89 	str->pos += copy_size;
90 	return;
91 }
92 
93 
94 static void
95 str_append_u32(lzma_str *str, uint32_t v, bool use_byte_suffix)
96 {
97 	if (v == 0) {
98 		str_append_str(str, "0");
99 	} else {
100 		// NOTE: Don't use plain "B" because xz and the parser in this
101 		// file don't support it and at glance it may look like 8
102 		// (there cannot be a space before the suffix).
103 		static const char suffixes[4][4] = { "", "KiB", "MiB", "GiB" };
104 
105 		size_t suf = 0;
106 		if (use_byte_suffix) {
107 			while ((v & 1023) == 0
108 					&& suf < ARRAY_SIZE(suffixes) - 1) {
109 				v >>= 10;
110 				++suf;
111 			}
112 		}
113 
114 		// UINT32_MAX in base 10 would need 10 + 1 bytes. Remember
115 		// that initializing to "" initializes all elements to
116 		// zero so '\0'-termination gets handled by this.
117 		char buf[16] = "";
118 		size_t pos = sizeof(buf) - 1;
119 
120 		do {
121 			buf[--pos] = '0' + (v % 10);
122 			v /= 10;
123 		} while (v != 0);
124 
125 		str_append_str(str, buf + pos);
126 		str_append_str(str, suffixes[suf]);
127 	}
128 
129 	return;
130 }
131 
132 
133 //////////////////////////////////////////////
134 // Parsing and stringification declarations //
135 //////////////////////////////////////////////
136 
137 /// Maximum length for filter and option names.
138 /// 11 chars + terminating '\0' + sizeof(uint32_t) = 16 bytes
139 #define NAME_LEN_MAX 11
140 
141 
142 /// For option_map.flags: Use .u.map to do convert the input value
143 /// to an integer. Without this flag, .u.range.{min,max} are used
144 /// as the allowed range for the integer.
145 #define OPTMAP_USE_NAME_VALUE_MAP 0x01
146 
147 /// For option_map.flags: Allow KiB/MiB/GiB in input string and use them in
148 /// the stringified output if the value is an exact multiple of these.
149 /// This is used e.g. for LZMA1/2 dictionary size.
150 #define OPTMAP_USE_BYTE_SUFFIX 0x02
151 
152 /// For option_map.flags: If the integer value is zero then this option
153 /// won't be included in the stringified output. It's used e.g. for
154 /// BCJ filter start offset which usually is zero.
155 #define OPTMAP_NO_STRFY_ZERO 0x04
156 
157 /// Possible values for option_map.type. Since OPTMAP_TYPE_UINT32 is 0,
158 /// it doesn't need to be specified in the initializers as it is
159 /// the implicit value.
160 enum {
161 	OPTMAP_TYPE_UINT32,
162 	OPTMAP_TYPE_LZMA_MODE,
163 	OPTMAP_TYPE_LZMA_MATCH_FINDER,
164 	OPTMAP_TYPE_LZMA_PRESET,
165 };
166 
167 
168 /// This is for mapping string values in options to integers.
169 /// The last element of an array must have "" as the name.
170 /// It's used e.g. for match finder names in LZMA1/2.
171 typedef struct {
172 	const char name[NAME_LEN_MAX + 1];
173 	const uint32_t value;
174 } name_value_map;
175 
176 
177 /// Each filter that has options needs an array of option_map structures.
178 /// The array doesn't need to be terminated as the functions take the
179 /// length of the array as an argument.
180 ///
181 /// When converting a string to filter options structure, option values
182 /// will be handled in a few different ways:
183 ///
184 /// (1) If .type equals OPTMAP_TYPE_LZMA_PRESET then LZMA1/2 preset string
185 ///     is handled specially.
186 ///
187 /// (2) If .flags has OPTMAP_USE_NAME_VALUE_MAP set then the string is
188 ///     converted to an integer using the name_value_map pointed by .u.map.
189 ///     The last element in .u.map must have .name = "" as the terminator.
190 ///
191 /// (3) Otherwise the string is treated as a non-negative unsigned decimal
192 ///     integer which must be in the range set in .u.range. If .flags has
193 ///     OPTMAP_USE_BYTE_SUFFIX then KiB, MiB, and GiB suffixes are allowed.
194 ///
195 /// The integer value from (2) or (3) is then stored to filter_options
196 /// at the offset specified in .offset using the type specified in .type
197 /// (default is uint32_t).
198 ///
199 /// Stringifying a filter is done by processing a given number of options
200 /// in order from the beginning of an option_map array. The integer is
201 /// read from filter_options at .offset using the type from .type.
202 ///
203 /// If the integer is zero and .flags has OPTMAP_NO_STRFY_ZERO then the
204 /// option is skipped.
205 ///
206 /// If .flags has OPTMAP_USE_NAME_VALUE_MAP set then .u.map will be used
207 /// to convert the option to a string. If the map doesn't contain a string
208 /// for the integer value then "UNKNOWN" is used.
209 ///
210 /// If .flags doesn't have OPTMAP_USE_NAME_VALUE_MAP set then the integer is
211 /// converted to a decimal value. If OPTMAP_USE_BYTE_SUFFIX is used then KiB,
212 /// MiB, or GiB suffix is used if the value is an exact multiple of these.
213 /// Plain "B" suffix is never used.
214 typedef struct {
215 	char name[NAME_LEN_MAX + 1];
216 	uint8_t type;
217 	uint8_t flags;
218 	uint16_t offset;
219 
220 	union {
221 		struct {
222 			uint32_t min;
223 			uint32_t max;
224 		} range;
225 
226 		const name_value_map *map;
227 	} u;
228 } option_map;
229 
230 
231 static const char *parse_options(const char **const str, const char *str_end,
232 		void *filter_options,
233 		const option_map *const optmap, const size_t optmap_size);
234 
235 
236 /////////
237 // BCJ //
238 /////////
239 
240 #if defined(HAVE_ENCODER_X86) \
241 		|| defined(HAVE_DECODER_X86) \
242 		|| defined(HAVE_ENCODER_ARM) \
243 		|| defined(HAVE_DECODER_ARM) \
244 		|| defined(HAVE_ENCODER_ARMTHUMB) \
245 		|| defined(HAVE_DECODER_ARMTHUMB) \
246 		|| defined(HAVE_ENCODER_ARM64) \
247 		|| defined(HAVE_DECODER_ARM64) \
248 		|| defined(HAVE_ENCODER_POWERPC) \
249 		|| defined(HAVE_DECODER_POWERPC) \
250 		|| defined(HAVE_ENCODER_IA64) \
251 		|| defined(HAVE_DECODER_IA64) \
252 		|| defined(HAVE_ENCODER_SPARC) \
253 		|| defined(HAVE_DECODER_SPARC)
254 static const option_map bcj_optmap[] = {
255 	{
256 		.name = "start",
257 		.flags = OPTMAP_NO_STRFY_ZERO | OPTMAP_USE_BYTE_SUFFIX,
258 		.offset = offsetof(lzma_options_bcj, start_offset),
259 		.u.range.min = 0,
260 		.u.range.max = UINT32_MAX,
261 	}
262 };
263 
264 
265 static const char *
266 parse_bcj(const char **const str, const char *str_end, void *filter_options)
267 {
268 	// filter_options was zeroed on allocation and that is enough
269 	// for the default value.
270 	return parse_options(str, str_end, filter_options,
271 			bcj_optmap, ARRAY_SIZE(bcj_optmap));
272 }
273 #endif
274 
275 
276 ///////////
277 // Delta //
278 ///////////
279 
280 #if defined(HAVE_ENCODER_DELTA) || defined(HAVE_DECODER_DELTA)
281 static const option_map delta_optmap[] = {
282 	{
283 		.name = "dist",
284 		.offset = offsetof(lzma_options_delta, dist),
285 		.u.range.min = LZMA_DELTA_DIST_MIN,
286 		.u.range.max = LZMA_DELTA_DIST_MAX,
287 	}
288 };
289 
290 
291 static const char *
292 parse_delta(const char **const str, const char *str_end, void *filter_options)
293 {
294 	lzma_options_delta *opts = filter_options;
295 	opts->type = LZMA_DELTA_TYPE_BYTE;
296 	opts->dist = LZMA_DELTA_DIST_MIN;
297 
298 	return parse_options(str, str_end, filter_options,
299 			delta_optmap, ARRAY_SIZE(delta_optmap));
300 }
301 #endif
302 
303 
304 ///////////////////
305 // LZMA1 & LZMA2 //
306 ///////////////////
307 
308 /// Help string for presets
309 #define LZMA12_PRESET_STR "0-9[e]"
310 
311 
312 static const char *
313 parse_lzma12_preset(const char **const str, const char *str_end,
314 		uint32_t *preset)
315 {
316 	assert(*str < str_end);
317 	*preset = (uint32_t)(**str - '0');
318 
319 	// NOTE: Remember to update LZMA12_PRESET_STR if this is modified!
320 	while (++*str < str_end) {
321 		switch (**str) {
322 		case 'e':
323 			*preset |= LZMA_PRESET_EXTREME;
324 			break;
325 
326 		default:
327 			return "Unsupported preset flag";
328 		}
329 	}
330 
331 	return NULL;
332 }
333 
334 
335 static const char *
336 set_lzma12_preset(const char **const str, const char *str_end,
337 		void *filter_options)
338 {
339 	uint32_t preset;
340 	const char *errmsg = parse_lzma12_preset(str, str_end, &preset);
341 	if (errmsg != NULL)
342 		return errmsg;
343 
344 	lzma_options_lzma *opts = filter_options;
345 	if (lzma_lzma_preset(opts, preset))
346 		return "Unsupported preset";
347 
348 	return NULL;
349 }
350 
351 
352 static const name_value_map lzma12_mode_map[] = {
353 	{ "fast",   LZMA_MODE_FAST },
354 	{ "normal", LZMA_MODE_NORMAL },
355 	{ "",       0 }
356 };
357 
358 
359 static const name_value_map lzma12_mf_map[] = {
360 	{ "hc3", LZMA_MF_HC3 },
361 	{ "hc4", LZMA_MF_HC4 },
362 	{ "bt2", LZMA_MF_BT2 },
363 	{ "bt3", LZMA_MF_BT3 },
364 	{ "bt4", LZMA_MF_BT4 },
365 	{ "",    0 }
366 };
367 
368 
369 static const option_map lzma12_optmap[] = {
370 	{
371 		.name = "preset",
372 		.type = OPTMAP_TYPE_LZMA_PRESET,
373 	}, {
374 		.name = "dict",
375 		.flags = OPTMAP_USE_BYTE_SUFFIX,
376 		.offset = offsetof(lzma_options_lzma, dict_size),
377 		.u.range.min = LZMA_DICT_SIZE_MIN,
378 		// FIXME? The max is really max for encoding but decoding
379 		// would allow 4 GiB - 1 B.
380 		.u.range.max = (UINT32_C(1) << 30) + (UINT32_C(1) << 29),
381 	}, {
382 		.name = "lc",
383 		.offset = offsetof(lzma_options_lzma, lc),
384 		.u.range.min = LZMA_LCLP_MIN,
385 		.u.range.max = LZMA_LCLP_MAX,
386 	}, {
387 		.name = "lp",
388 		.offset = offsetof(lzma_options_lzma, lp),
389 		.u.range.min = LZMA_LCLP_MIN,
390 		.u.range.max = LZMA_LCLP_MAX,
391 	}, {
392 		.name = "pb",
393 		.offset = offsetof(lzma_options_lzma, pb),
394 		.u.range.min = LZMA_PB_MIN,
395 		.u.range.max = LZMA_PB_MAX,
396 	}, {
397 		.name = "mode",
398 		.type = OPTMAP_TYPE_LZMA_MODE,
399 		.flags = OPTMAP_USE_NAME_VALUE_MAP,
400 		.offset = offsetof(lzma_options_lzma, mode),
401 		.u.map = lzma12_mode_map,
402 	}, {
403 		.name = "nice",
404 		.offset = offsetof(lzma_options_lzma, nice_len),
405 		.u.range.min = 2,
406 		.u.range.max = 273,
407 	}, {
408 		.name = "mf",
409 		.type = OPTMAP_TYPE_LZMA_MATCH_FINDER,
410 		.flags = OPTMAP_USE_NAME_VALUE_MAP,
411 		.offset = offsetof(lzma_options_lzma, mf),
412 		.u.map = lzma12_mf_map,
413 	}, {
414 		.name = "depth",
415 		.offset = offsetof(lzma_options_lzma, depth),
416 		.u.range.min = 0,
417 		.u.range.max = UINT32_MAX,
418 	}
419 };
420 
421 
422 static const char *
423 parse_lzma12(const char **const str, const char *str_end, void *filter_options)
424 {
425 	lzma_options_lzma *opts = filter_options;
426 
427 	// It cannot fail.
428 	const bool preset_ret = lzma_lzma_preset(opts, LZMA_PRESET_DEFAULT);
429 	assert(!preset_ret);
430 	(void)preset_ret;
431 
432 	const char *errmsg = parse_options(str, str_end, filter_options,
433 			lzma12_optmap, ARRAY_SIZE(lzma12_optmap));
434 	if (errmsg != NULL)
435 		return errmsg;
436 
437 	if (opts->lc + opts->lp > LZMA_LCLP_MAX)
438 		return "The sum of lc and lp must not exceed 4";
439 
440 	return NULL;
441 }
442 
443 
444 /////////////////////////////////////////
445 // Generic parsing and stringification //
446 /////////////////////////////////////////
447 
448 static const struct {
449 	/// Name of the filter
450 	char name[NAME_LEN_MAX + 1];
451 
452 	/// For lzma_str_to_filters:
453 	/// Size of the filter-specific options structure.
454 	uint32_t opts_size;
455 
456 	/// Filter ID
457 	lzma_vli id;
458 
459 	/// For lzma_str_to_filters:
460 	/// Function to parse the filter-specific options. The filter_options
461 	/// will already have been allocated using lzma_alloc_zero().
462 	const char *(*parse)(const char **str, const char *str_end,
463 			void *filter_options);
464 
465 	/// For lzma_str_from_filters:
466 	/// If the flag LZMA_STR_ENCODER is used then the first
467 	/// strfy_encoder elements of optmap are stringified.
468 	/// With LZMA_STR_DECODER strfy_decoder is used.
469 	/// Currently encoders use all options that decoders do but if
470 	/// that changes then this needs to be changed too, for example,
471 	/// add a new OPTMAP flag to skip printing some decoder-only options.
472 	const option_map *optmap;
473 	uint8_t strfy_encoder;
474 	uint8_t strfy_decoder;
475 
476 	/// For lzma_str_from_filters:
477 	/// If true, lzma_filter.options is allowed to be NULL. In that case,
478 	/// only the filter name is printed without any options.
479 	bool allow_null;
480 
481 } filter_name_map[] = {
482 #if defined (HAVE_ENCODER_LZMA1) || defined(HAVE_DECODER_LZMA1)
483 	{ "lzma1",        sizeof(lzma_options_lzma),  LZMA_FILTER_LZMA1,
484 	  &parse_lzma12,  lzma12_optmap, 9, 5, false },
485 #endif
486 
487 #if defined(HAVE_ENCODER_LZMA2) || defined(HAVE_DECODER_LZMA2)
488 	{ "lzma2",        sizeof(lzma_options_lzma),  LZMA_FILTER_LZMA2,
489 	  &parse_lzma12,  lzma12_optmap, 9, 2, false },
490 #endif
491 
492 #if defined(HAVE_ENCODER_X86) || defined(HAVE_DECODER_X86)
493 	{ "x86",          sizeof(lzma_options_bcj),   LZMA_FILTER_X86,
494 	  &parse_bcj,     bcj_optmap, 1, 1, true },
495 #endif
496 
497 #if defined(HAVE_ENCODER_ARM) || defined(HAVE_DECODER_ARM)
498 	{ "arm",          sizeof(lzma_options_bcj),   LZMA_FILTER_ARM,
499 	  &parse_bcj,     bcj_optmap, 1, 1, true },
500 #endif
501 
502 #if defined(HAVE_ENCODER_ARMTHUMB) || defined(HAVE_DECODER_ARMTHUMB)
503 	{ "armthumb",     sizeof(lzma_options_bcj),   LZMA_FILTER_ARMTHUMB,
504 	  &parse_bcj,     bcj_optmap, 1, 1, true },
505 #endif
506 
507 #if defined(HAVE_ENCODER_ARM64) || defined(HAVE_DECODER_ARM64)
508 	{ "arm64",        sizeof(lzma_options_bcj),   LZMA_FILTER_ARM64,
509 	  &parse_bcj,     bcj_optmap, 1, 1, true },
510 #endif
511 
512 #if defined(HAVE_ENCODER_POWERPC) || defined(HAVE_DECODER_POWERPC)
513 	{ "powerpc",      sizeof(lzma_options_bcj),   LZMA_FILTER_POWERPC,
514 	  &parse_bcj,     bcj_optmap, 1, 1, true },
515 #endif
516 
517 #if defined(HAVE_ENCODER_IA64) || defined(HAVE_DECODER_IA64)
518 	{ "ia64",         sizeof(lzma_options_bcj),   LZMA_FILTER_IA64,
519 	  &parse_bcj,     bcj_optmap, 1, 1, true },
520 #endif
521 
522 #if defined(HAVE_ENCODER_SPARC) || defined(HAVE_DECODER_SPARC)
523 	{ "sparc",        sizeof(lzma_options_bcj),   LZMA_FILTER_SPARC,
524 	  &parse_bcj,     bcj_optmap, 1, 1, true },
525 #endif
526 
527 #if defined(HAVE_ENCODER_DELTA) || defined(HAVE_DECODER_DELTA)
528 	{ "delta",        sizeof(lzma_options_delta), LZMA_FILTER_DELTA,
529 	  &parse_delta,   delta_optmap, 1, 1, false },
530 #endif
531 };
532 
533 
534 /// Decodes options from a string for one filter (name1=value1,name2=value2).
535 /// Caller must have allocated memory for filter_options already and set
536 /// the initial default values. This is called from the filter-specific
537 /// parse_* functions.
538 ///
539 /// The input string starts at *str and the address in str_end is the first
540 /// char that is not part of the string anymore. So no '\0' terminator is
541 /// used. *str is advanced every time something has been decoded successfully.
542 static const char *
543 parse_options(const char **const str, const char *str_end,
544 		void *filter_options,
545 		const option_map *const optmap, const size_t optmap_size)
546 {
547 	while (*str < str_end && **str != '\0') {
548 		// Each option is of the form name=value.
549 		// Commas (',') separate options. Extra commas are ignored.
550 		// Ignoring extra commas makes it simpler if an optional
551 		// option stored in a shell variable which can be empty.
552 		if (**str == ',') {
553 			++*str;
554 			continue;
555 		}
556 
557 		// Find where the next name=value ends.
558 		const size_t str_len = (size_t)(str_end - *str);
559 		const char *name_eq_value_end = memchr(*str, ',', str_len);
560 		if (name_eq_value_end == NULL)
561 			name_eq_value_end = str_end;
562 
563 		const char *equals_sign = memchr(*str, '=',
564 				(size_t)(name_eq_value_end - *str));
565 
566 		// Fail if the '=' wasn't found or the option name is missing
567 		// (the first char is '=').
568 		if (equals_sign == NULL || **str == '=')
569 			return "Options must be 'name=value' pairs separated "
570 					"with commas";
571 
572 		// Reject a too long option name so that the memcmp()
573 		// in the loop below won't read past the end of the
574 		// string in optmap[i].name.
575 		const size_t name_len = (size_t)(equals_sign - *str);
576 		if (name_len > NAME_LEN_MAX)
577 			return "Unknown option name";
578 
579 		// Find the option name from optmap[].
580 		size_t i = 0;
581 		while (true) {
582 			if (i == optmap_size)
583 				return "Unknown option name";
584 
585 			if (memcmp(*str, optmap[i].name, name_len) == 0
586 					&& optmap[i].name[name_len] == '\0')
587 				break;
588 
589 			++i;
590 		}
591 
592 		// The input string is good at least until the start of
593 		// the option value.
594 		*str = equals_sign + 1;
595 
596 		// The code assumes that the option value isn't an empty
597 		// string so check it here.
598 		const size_t value_len = (size_t)(name_eq_value_end - *str);
599 		if (value_len == 0)
600 			return "Option value cannot be empty";
601 
602 		// LZMA1/2 preset has its own parsing function.
603 		if (optmap[i].type == OPTMAP_TYPE_LZMA_PRESET) {
604 			const char *errmsg = set_lzma12_preset(str,
605 					name_eq_value_end, filter_options);
606 			if (errmsg != NULL)
607 				return errmsg;
608 
609 			continue;
610 		}
611 
612 		// It's an integer value.
613 		uint32_t v;
614 		if (optmap[i].flags & OPTMAP_USE_NAME_VALUE_MAP) {
615 			// The integer is picked from a string-to-integer map.
616 			//
617 			// Reject a too long value string so that the memcmp()
618 			// in the loop below won't read past the end of the
619 			// string in optmap[i].u.map[j].name.
620 			if (value_len > NAME_LEN_MAX)
621 				return "Invalid option value";
622 
623 			const name_value_map *map = optmap[i].u.map;
624 			size_t j = 0;
625 			while (true) {
626 				// The array is terminated with an empty name.
627 				if (map[j].name[0] == '\0')
628 					return "Invalid option value";
629 
630 				if (memcmp(*str, map[j].name, value_len) == 0
631 						&& map[j].name[value_len]
632 							== '\0') {
633 					v = map[j].value;
634 					break;
635 				}
636 
637 				++j;
638 			}
639 		} else if (**str < '0' || **str > '9') {
640 			// Note that "max" isn't supported while it is
641 			// supported in xz. It's not useful here.
642 			return "Value is not a non-negative decimal integer";
643 		} else {
644 			// strtoul() has locale-specific behavior so it cannot
645 			// be relied on to get reproducible results since we
646 			// cannot change the locate in a thread-safe library.
647 			// It also needs '\0'-termination.
648 			//
649 			// Use a temporary pointer so that *str will point
650 			// to the beginning of the value string in case
651 			// an error occurs.
652 			const char *p = *str;
653 			v = 0;
654 			do {
655 				if (v > UINT32_MAX / 10)
656 					return "Value out of range";
657 
658 				v *= 10;
659 
660 				const uint32_t add = (uint32_t)(*p - '0');
661 				if (UINT32_MAX - add < v)
662 					return "Value out of range";
663 
664 				v += add;
665 				++p;
666 			} while (p < name_eq_value_end
667 					&& *p >= '0' && *p <= '9');
668 
669 			if (p < name_eq_value_end) {
670 				// Remember this position so that it can be
671 				// used for error messages that are
672 				// specifically about the suffix. (Out of
673 				// range values are about the whole value
674 				// and those error messages point to the
675 				// beginning of the number part,
676 				// not to the suffix.)
677 				const char *multiplier_start = p;
678 
679 				// If multiplier suffix shouldn't be used
680 				// then don't allow them even if the value
681 				// would stay within limits. This is a somewhat
682 				// unnecessary check but it rejects silly
683 				// things like lzma2:pb=0MiB which xz allows.
684 				if ((optmap[i].flags & OPTMAP_USE_BYTE_SUFFIX)
685 						== 0) {
686 					*str = multiplier_start;
687 					return "This option does not support "
688 						"any integer suffixes";
689 				}
690 
691 				uint32_t shift;
692 
693 				switch (*p) {
694 				case 'k':
695 				case 'K':
696 					shift = 10;
697 					break;
698 
699 				case 'm':
700 				case 'M':
701 					shift = 20;
702 					break;
703 
704 				case 'g':
705 				case 'G':
706 					shift = 30;
707 					break;
708 
709 				default:
710 					*str = multiplier_start;
711 					return "Invalid multiplier suffix "
712 							"(KiB, MiB, or GiB)";
713 				}
714 
715 				++p;
716 
717 				// Allow "M", "Mi", "MB", "MiB" and the same
718 				// for the other five characters from the
719 				// switch-statement above. All are handled
720 				// as base-2 (perhaps a mistake, perhaps not).
721 				// Note that 'i' and 'B' are case sensitive.
722 				if (p < name_eq_value_end && *p == 'i')
723 					++p;
724 
725 				if (p < name_eq_value_end && *p == 'B')
726 					++p;
727 
728 				// Now we must have no chars remaining.
729 				if (p < name_eq_value_end) {
730 					*str = multiplier_start;
731 					return "Invalid multiplier suffix "
732 							"(KiB, MiB, or GiB)";
733 				}
734 
735 				if (v > (UINT32_MAX >> shift))
736 					return "Value out of range";
737 
738 				v <<= shift;
739 			}
740 
741 			if (v < optmap[i].u.range.min
742 					|| v > optmap[i].u.range.max)
743 				return "Value out of range";
744 		}
745 
746 		// Set the value in filter_options. Enums are handled
747 		// specially since the underlying type isn't the same
748 		// as uint32_t on all systems.
749 		void *ptr = (char *)filter_options + optmap[i].offset;
750 		switch (optmap[i].type) {
751 		case OPTMAP_TYPE_LZMA_MODE:
752 			*(lzma_mode *)ptr = (lzma_mode)v;
753 			break;
754 
755 		case OPTMAP_TYPE_LZMA_MATCH_FINDER:
756 			*(lzma_match_finder *)ptr = (lzma_match_finder)v;
757 			break;
758 
759 		default:
760 			*(uint32_t *)ptr = v;
761 			break;
762 		}
763 
764 		// This option has been successfully handled.
765 		*str = name_eq_value_end;
766 	}
767 
768 	// No errors.
769 	return NULL;
770 }
771 
772 
773 /// Finds the name of the filter at the beginning of the string and
774 /// calls filter_name_map[i].parse() to decode the filter-specific options.
775 /// The caller must have set str_end so that exactly one filter and its
776 /// options are present without any trailing characters.
777 static const char *
778 parse_filter(const char **const str, const char *str_end, lzma_filter *filter,
779 		const lzma_allocator *allocator, bool only_xz)
780 {
781 	// Search for a colon or equals sign that would separate the filter
782 	// name from filter options. If neither is found, then the input
783 	// string only contains a filter name and there are no options.
784 	//
785 	// First assume that a colon or equals sign won't be found:
786 	const char *name_end = str_end;
787 	const char *opts_start = str_end;
788 
789 	for (const char *p = *str; p < str_end; ++p) {
790 		if (*p == ':' || *p == '=') {
791 			name_end = p;
792 
793 			// Filter options (name1=value1,name2=value2,...)
794 			// begin after the colon or equals sign.
795 			opts_start = p + 1;
796 			break;
797 		}
798 	}
799 
800 	// Reject a too long filter name so that the memcmp()
801 	// in the loop below won't read past the end of the
802 	// string in filter_name_map[i].name.
803 	const size_t name_len = (size_t)(name_end - *str);
804 	if (name_len > NAME_LEN_MAX)
805 		return "Unknown filter name";
806 
807 	for (size_t i = 0; i < ARRAY_SIZE(filter_name_map); ++i) {
808 		if (memcmp(*str, filter_name_map[i].name, name_len) == 0
809 				&& filter_name_map[i].name[name_len] == '\0') {
810 			if (only_xz && filter_name_map[i].id
811 					>= LZMA_FILTER_RESERVED_START)
812 				return "This filter cannot be used in "
813 						"the .xz format";
814 
815 			// Allocate the filter-specific options and
816 			// initialize the memory with zeros.
817 			void *options = lzma_alloc_zero(
818 					filter_name_map[i].opts_size,
819 					allocator);
820 			if (options == NULL)
821 				return "Memory allocation failed";
822 
823 			// Filter name was found so the input string is good
824 			// at least this far.
825 			*str = opts_start;
826 
827 			const char *errmsg = filter_name_map[i].parse(
828 					str, str_end, options);
829 			if (errmsg != NULL) {
830 				lzma_free(options, allocator);
831 				return errmsg;
832 			}
833 
834 			// *filter is modified only when parsing is successful.
835 			filter->id = filter_name_map[i].id;
836 			filter->options = options;
837 			return NULL;
838 		}
839 	}
840 
841 	return "Unknown filter name";
842 }
843 
844 
845 /// Converts the string to a filter chain (array of lzma_filter structures).
846 ///
847 /// *str is advanced every time something has been decoded successfully.
848 /// This way the caller knows where in the string a possible error occurred.
849 static const char *
850 str_to_filters(const char **const str, lzma_filter *filters, uint32_t flags,
851 		const lzma_allocator *allocator)
852 {
853 	const char *errmsg;
854 
855 	// Skip leading spaces.
856 	while (**str == ' ')
857 		++*str;
858 
859 	if (**str == '\0')
860 		return "Empty string is not allowed, "
861 				"try \"6\" if a default value is needed";
862 
863 	// Detect the type of the string.
864 	//
865 	// A string beginning with a digit or a string beginning with
866 	// one dash and a digit are treated as presets. Trailing spaces
867 	// will be ignored too (leading spaces were already ignored above).
868 	//
869 	// For example, "6", "7  ", "-9e", or "  -3  " are treated as presets.
870 	// Strings like "-" or "- " aren't preset.
871 #define MY_IS_DIGIT(c) ((c) >= '0' && (c) <= '9')
872 	if (MY_IS_DIGIT(**str) || (**str == '-' && MY_IS_DIGIT((*str)[1]))) {
873 		if (**str == '-')
874 			++*str;
875 
876 		// Ignore trailing spaces.
877 		const size_t str_len = strlen(*str);
878 		const char *str_end = memchr(*str, ' ', str_len);
879 		if (str_end != NULL) {
880 			// There is at least one trailing space. Check that
881 			// there are no chars other than spaces.
882 			for (size_t i = 1; str_end[i] != '\0'; ++i)
883 				if (str_end[i] != ' ')
884 					return "Unsupported preset";
885 		} else {
886 			// There are no trailing spaces. Use the whole string.
887 			str_end = *str + str_len;
888 		}
889 
890 		uint32_t preset;
891 		errmsg = parse_lzma12_preset(str, str_end, &preset);
892 		if (errmsg != NULL)
893 			return errmsg;
894 
895 		lzma_options_lzma *opts = lzma_alloc(sizeof(*opts), allocator);
896 		if (opts == NULL)
897 			return "Memory allocation failed";
898 
899 		if (lzma_lzma_preset(opts, preset)) {
900 			lzma_free(opts, allocator);
901 			return "Unsupported preset";
902 		}
903 
904 		filters[0].id = LZMA_FILTER_LZMA2;
905 		filters[0].options = opts;
906 		filters[1].id = LZMA_VLI_UNKNOWN;
907 		filters[1].options = NULL;
908 
909 		return NULL;
910 	}
911 
912 	// Not a preset so it must be a filter chain.
913 	//
914 	// If LZMA_STR_ALL_FILTERS isn't used we allow only filters that
915 	// can be used in .xz.
916 	const bool only_xz = (flags & LZMA_STR_ALL_FILTERS) == 0;
917 
918 	// Use a temporary array so that we don't modify the caller-supplied
919 	// one until we know that no errors occurred.
920 	lzma_filter temp_filters[LZMA_FILTERS_MAX + 1];
921 
922 	size_t i = 0;
923 	do {
924 		if (i == LZMA_FILTERS_MAX) {
925 			errmsg = "The maximum number of filters is four";
926 			goto error;
927 		}
928 
929 		// Skip "--" if present.
930 		if ((*str)[0] == '-' && (*str)[1] == '-')
931 			*str += 2;
932 
933 		// Locate the end of "filter:name1=value1,name2=value2",
934 		// stopping at the first "--" or a single space.
935 		const char *filter_end = *str;
936 		while (filter_end[0] != '\0') {
937 			if ((filter_end[0] == '-' && filter_end[1] == '-')
938 					|| filter_end[0] == ' ')
939 				break;
940 
941 			++filter_end;
942 		}
943 
944 		// Inputs that have "--" at the end or "-- " in the middle
945 		// will result in an empty filter name.
946 		if (filter_end == *str) {
947 			errmsg = "Filter name is missing";
948 			goto error;
949 		}
950 
951 		errmsg = parse_filter(str, filter_end, &temp_filters[i],
952 				allocator, only_xz);
953 		if (errmsg != NULL)
954 			goto error;
955 
956 		// Skip trailing spaces.
957 		while (**str == ' ')
958 			++*str;
959 
960 		++i;
961 	} while (**str != '\0');
962 
963 	// Seems to be good, terminate the array so that
964 	// basic validation can be done.
965 	temp_filters[i].id = LZMA_VLI_UNKNOWN;
966 	temp_filters[i].options = NULL;
967 
968 	// Do basic validation if the application didn't prohibit it.
969 	if ((flags & LZMA_STR_NO_VALIDATION) == 0) {
970 		size_t dummy;
971 		const lzma_ret ret = lzma_validate_chain(temp_filters, &dummy);
972 		assert(ret == LZMA_OK || ret == LZMA_OPTIONS_ERROR);
973 		if (ret != LZMA_OK) {
974 			errmsg = "Invalid filter chain "
975 					"('lzma2' missing at the end?)";
976 			goto error;
977 		}
978 	}
979 
980 	// All good. Copy the filters to the application supplied array.
981 	memcpy(filters, temp_filters, (i + 1) * sizeof(lzma_filter));
982 	return NULL;
983 
984 error:
985 	// Free the filter options that were successfully decoded.
986 	while (i-- > 0)
987 		lzma_free(temp_filters[i].options, allocator);
988 
989 	return errmsg;
990 }
991 
992 
993 extern LZMA_API(const char *)
994 lzma_str_to_filters(const char *str, int *error_pos, lzma_filter *filters,
995 		uint32_t flags, const lzma_allocator *allocator)
996 {
997 	if (str == NULL || filters == NULL)
998 		return "Unexpected NULL pointer argument(s) "
999 				"to lzma_str_to_filters()";
1000 
1001 	// Validate the flags.
1002 	const uint32_t supported_flags
1003 			= LZMA_STR_ALL_FILTERS
1004 			| LZMA_STR_NO_VALIDATION;
1005 
1006 	if (flags & ~supported_flags)
1007 		return "Unsupported flags to lzma_str_to_filters()";
1008 
1009 	const char *used = str;
1010 	const char *errmsg = str_to_filters(&used, filters, flags, allocator);
1011 
1012 	if (error_pos != NULL) {
1013 		const size_t n = (size_t)(used - str);
1014 		*error_pos = n > INT_MAX ? INT_MAX : (int)n;
1015 	}
1016 
1017 	return errmsg;
1018 }
1019 
1020 
1021 /// Converts options of one filter to a string.
1022 ///
1023 /// The caller must have already put the filter name in the destination
1024 /// string. Since it is possible that no options will be needed, the caller
1025 /// won't have put a delimiter character (':' or '=') in the string yet.
1026 /// We will add it if at least one option will be added to the string.
1027 static void
1028 strfy_filter(lzma_str *dest, const char *delimiter,
1029 		const option_map *optmap, size_t optmap_count,
1030 		const void *filter_options)
1031 {
1032 	for (size_t i = 0; i < optmap_count; ++i) {
1033 		// No attempt is made to reverse LZMA1/2 preset.
1034 		if (optmap[i].type == OPTMAP_TYPE_LZMA_PRESET)
1035 			continue;
1036 
1037 		// All options have integer values, some just are mapped
1038 		// to a string with a name_value_map. LZMA1/2 preset
1039 		// isn't reversed back to preset=PRESET form.
1040 		uint32_t v;
1041 		const void *ptr
1042 			= (const char *)filter_options + optmap[i].offset;
1043 		switch (optmap[i].type) {
1044 			case OPTMAP_TYPE_LZMA_MODE:
1045 				v = *(const lzma_mode *)ptr;
1046 				break;
1047 
1048 			case OPTMAP_TYPE_LZMA_MATCH_FINDER:
1049 				v = *(const lzma_match_finder *)ptr;
1050 				break;
1051 
1052 			default:
1053 				v = *(const uint32_t *)ptr;
1054 				break;
1055 		}
1056 
1057 		// Skip this if this option should be omitted from
1058 		// the string when the value is zero.
1059 		if (v == 0 && (optmap[i].flags & OPTMAP_NO_STRFY_ZERO))
1060 			continue;
1061 
1062 		// Before the first option we add whatever delimiter
1063 		// the caller gave us. For later options a comma is used.
1064 		str_append_str(dest, delimiter);
1065 		delimiter = ",";
1066 
1067 		// Add the option name and equals sign.
1068 		str_append_str(dest, optmap[i].name);
1069 		str_append_str(dest, "=");
1070 
1071 		if (optmap[i].flags & OPTMAP_USE_NAME_VALUE_MAP) {
1072 			const name_value_map *map = optmap[i].u.map;
1073 			size_t j = 0;
1074 			while (true) {
1075 				if (map[j].name[0] == '\0') {
1076 					str_append_str(dest, "UNKNOWN");
1077 					break;
1078 				}
1079 
1080 				if (map[j].value == v) {
1081 					str_append_str(dest, map[j].name);
1082 					break;
1083 				}
1084 
1085 				++j;
1086 			}
1087 		} else {
1088 			str_append_u32(dest, v,
1089 				optmap[i].flags & OPTMAP_USE_BYTE_SUFFIX);
1090 		}
1091 	}
1092 
1093 	return;
1094 }
1095 
1096 
1097 extern LZMA_API(lzma_ret)
1098 lzma_str_from_filters(char **output_str, const lzma_filter *filters,
1099 		uint32_t flags, const lzma_allocator *allocator)
1100 {
1101 	// On error *output_str is always set to NULL.
1102 	// Do it as the very first step.
1103 	if (output_str == NULL)
1104 		return LZMA_PROG_ERROR;
1105 
1106 	*output_str = NULL;
1107 
1108 	if (filters == NULL)
1109 		return LZMA_PROG_ERROR;
1110 
1111 	// Validate the flags.
1112 	const uint32_t supported_flags
1113 			= LZMA_STR_ENCODER
1114 			| LZMA_STR_DECODER
1115 			| LZMA_STR_GETOPT_LONG
1116 			| LZMA_STR_NO_SPACES;
1117 
1118 	if (flags & ~supported_flags)
1119 		return LZMA_OPTIONS_ERROR;
1120 
1121 	// There must be at least one filter.
1122 	if (filters[0].id == LZMA_VLI_UNKNOWN)
1123 		return LZMA_OPTIONS_ERROR;
1124 
1125 	// Allocate memory for the output string.
1126 	lzma_str dest;
1127 	return_if_error(str_init(&dest, allocator));
1128 
1129 	const bool show_opts = (flags & (LZMA_STR_ENCODER | LZMA_STR_DECODER));
1130 
1131 	const char *opt_delim = (flags & LZMA_STR_GETOPT_LONG) ? "=" : ":";
1132 
1133 	for (size_t i = 0; filters[i].id != LZMA_VLI_UNKNOWN; ++i) {
1134 		// If we reach LZMA_FILTERS_MAX, then the filters array
1135 		// is too large since the ID cannot be LZMA_VLI_UNKNOWN here.
1136 		if (i == LZMA_FILTERS_MAX) {
1137 			str_free(&dest, allocator);
1138 			return LZMA_OPTIONS_ERROR;
1139 		}
1140 
1141 		// Don't add a space between filters if the caller
1142 		// doesn't want them.
1143 		if (i > 0 && !(flags & LZMA_STR_NO_SPACES))
1144 			str_append_str(&dest, " ");
1145 
1146 		// Use dashes for xz getopt_long() compatible syntax but also
1147 		// use dashes to separate filters when spaces weren't wanted.
1148 		if ((flags & LZMA_STR_GETOPT_LONG)
1149 				|| (i > 0 && (flags & LZMA_STR_NO_SPACES)))
1150 			str_append_str(&dest, "--");
1151 
1152 		size_t j = 0;
1153 		while (true) {
1154 			if (j == ARRAY_SIZE(filter_name_map)) {
1155 				// Filter ID in filters[i].id isn't supported.
1156 				str_free(&dest, allocator);
1157 				return LZMA_OPTIONS_ERROR;
1158 			}
1159 
1160 			if (filter_name_map[j].id == filters[i].id) {
1161 				// Add the filter name.
1162 				str_append_str(&dest, filter_name_map[j].name);
1163 
1164 				// If only the filter names were wanted then
1165 				// skip to the next filter. In this case
1166 				// .options is ignored and may be NULL even
1167 				// when the filter doesn't allow NULL options.
1168 				if (!show_opts)
1169 					break;
1170 
1171 				if (filters[i].options == NULL) {
1172 					if (!filter_name_map[j].allow_null) {
1173 						// Filter-specific options
1174 						// are missing but with
1175 						// this filter the options
1176 						// structure is mandatory.
1177 						str_free(&dest, allocator);
1178 						return LZMA_OPTIONS_ERROR;
1179 					}
1180 
1181 					// .options is allowed to be NULL.
1182 					// There is no need to add any
1183 					// options to the string.
1184 					break;
1185 				}
1186 
1187 				// Options structure is available. Add
1188 				// the filter options to the string.
1189 				const size_t optmap_count
1190 					= (flags & LZMA_STR_ENCODER)
1191 					? filter_name_map[j].strfy_encoder
1192 					: filter_name_map[j].strfy_decoder;
1193 				strfy_filter(&dest, opt_delim,
1194 						filter_name_map[j].optmap,
1195 						optmap_count,
1196 						filters[i].options);
1197 				break;
1198 			}
1199 
1200 			++j;
1201 		}
1202 	}
1203 
1204 	return str_finish(output_str, &dest, allocator);
1205 }
1206 
1207 
1208 extern LZMA_API(lzma_ret)
1209 lzma_str_list_filters(char **output_str, lzma_vli filter_id, uint32_t flags,
1210 		const lzma_allocator *allocator)
1211 {
1212 	// On error *output_str is always set to NULL.
1213 	// Do it as the very first step.
1214 	if (output_str == NULL)
1215 		return LZMA_PROG_ERROR;
1216 
1217 	*output_str = NULL;
1218 
1219 	// Validate the flags.
1220 	const uint32_t supported_flags
1221 			= LZMA_STR_ALL_FILTERS
1222 			| LZMA_STR_ENCODER
1223 			| LZMA_STR_DECODER
1224 			| LZMA_STR_GETOPT_LONG;
1225 
1226 	if (flags & ~supported_flags)
1227 		return LZMA_OPTIONS_ERROR;
1228 
1229 	// Allocate memory for the output string.
1230 	lzma_str dest;
1231 	return_if_error(str_init(&dest, allocator));
1232 
1233 	// If only listing the filter names then separate them with spaces.
1234 	// Otherwise use newlines.
1235 	const bool show_opts = (flags & (LZMA_STR_ENCODER | LZMA_STR_DECODER));
1236 	const char *filter_delim = show_opts ? "\n" : " ";
1237 
1238 	const char *opt_delim = (flags & LZMA_STR_GETOPT_LONG) ? "=" : ":";
1239 	bool first_filter_printed = false;
1240 
1241 	for (size_t i = 0; i < ARRAY_SIZE(filter_name_map); ++i) {
1242 		// If we are printing only one filter then skip others.
1243 		if (filter_id != LZMA_VLI_UNKNOWN
1244 				&& filter_id != filter_name_map[i].id)
1245 			continue;
1246 
1247 		// If we are printing only .xz filters then skip the others.
1248 		if (filter_name_map[i].id >= LZMA_FILTER_RESERVED_START
1249 				&& (flags & LZMA_STR_ALL_FILTERS) == 0
1250 				&& filter_id == LZMA_VLI_UNKNOWN)
1251 			continue;
1252 
1253 		// Add a new line if this isn't the first filter being
1254 		// written to the string.
1255 		if (first_filter_printed)
1256 			str_append_str(&dest, filter_delim);
1257 
1258 		first_filter_printed = true;
1259 
1260 		if (flags & LZMA_STR_GETOPT_LONG)
1261 			str_append_str(&dest, "--");
1262 
1263 		str_append_str(&dest, filter_name_map[i].name);
1264 
1265 		// If only the filter names were wanted then continue
1266 		// to the next filter.
1267 		if (!show_opts)
1268 			continue;
1269 
1270 		const option_map *optmap = filter_name_map[i].optmap;
1271 		const char *d = opt_delim;
1272 
1273 		const size_t end = (flags & LZMA_STR_ENCODER)
1274 				? filter_name_map[i].strfy_encoder
1275 				: filter_name_map[i].strfy_decoder;
1276 
1277 		for (size_t j = 0; j < end; ++j) {
1278 			// The first option is delimited from the filter
1279 			// name using "=" or ":" and the rest of the options
1280 			// are separated with ",".
1281 			str_append_str(&dest, d);
1282 			d = ",";
1283 
1284 			// optname=<possible_values>
1285 			str_append_str(&dest, optmap[j].name);
1286 			str_append_str(&dest, "=<");
1287 
1288 			if (optmap[j].type == OPTMAP_TYPE_LZMA_PRESET) {
1289 				// LZMA1/2 preset has its custom help string.
1290 				str_append_str(&dest, LZMA12_PRESET_STR);
1291 			} else if (optmap[j].flags
1292 					& OPTMAP_USE_NAME_VALUE_MAP) {
1293 				// Separate the possible option values by "|".
1294 				const name_value_map *m = optmap[j].u.map;
1295 				for (size_t k = 0; m[k].name[0] != '\0'; ++k) {
1296 					if (k > 0)
1297 						str_append_str(&dest, "|");
1298 
1299 					str_append_str(&dest, m[k].name);
1300 				}
1301 			} else {
1302 				// Integer range is shown as min-max.
1303 				const bool use_byte_suffix = optmap[j].flags
1304 						& OPTMAP_USE_BYTE_SUFFIX;
1305 				str_append_u32(&dest, optmap[j].u.range.min,
1306 						use_byte_suffix);
1307 				str_append_str(&dest, "-");
1308 				str_append_u32(&dest, optmap[j].u.range.max,
1309 						use_byte_suffix);
1310 			}
1311 
1312 			str_append_str(&dest, ">");
1313 		}
1314 	}
1315 
1316 	// If no filters were added to the string then it must be because
1317 	// the caller provided an unsupported Filter ID.
1318 	if (!first_filter_printed) {
1319 		str_free(&dest, allocator);
1320 		return LZMA_OPTIONS_ERROR;
1321 	}
1322 
1323 	return str_finish(output_str, &dest, allocator);
1324 }
1325