xref: /freebsd/contrib/xz/src/xz/args.c (revision 128836d304d93f2d00eb14069c27089ab46c38d4)
1 // SPDX-License-Identifier: 0BSD
2 
3 ///////////////////////////////////////////////////////////////////////////////
4 //
5 /// \file       args.c
6 /// \brief      Argument parsing
7 ///
8 /// \note       Filter-specific options parsing is in options.c.
9 //
10 //  Authors:    Lasse Collin
11 //              Jia Tan
12 //
13 ///////////////////////////////////////////////////////////////////////////////
14 
15 #include "private.h"
16 
17 #include "getopt.h"
18 #include <ctype.h>
19 
20 
21 bool opt_stdout = false;
22 bool opt_force = false;
23 bool opt_keep_original = false;
24 bool opt_synchronous = true;
25 bool opt_robot = false;
26 bool opt_ignore_check = false;
27 
28 // We don't modify or free() this, but we need to assign it in some
29 // non-const pointers.
30 const char stdin_filename[] = "(stdin)";
31 
32 
33 /// Parse and set the memory usage limit for compression, decompression,
34 /// and/or multithreaded decompression.
35 static void
parse_memlimit(const char * name,const char * name_percentage,const char * str,bool set_compress,bool set_decompress,bool set_mtdec)36 parse_memlimit(const char *name, const char *name_percentage, const char *str,
37 		bool set_compress, bool set_decompress, bool set_mtdec)
38 {
39 	bool is_percentage = false;
40 	uint64_t value;
41 
42 	const size_t len = strlen(str);
43 	if (len > 0 && str[len - 1] == '%') {
44 		// Make a copy so that we can get rid of %.
45 		//
46 		// In the past str wasn't const and we modified it directly
47 		// but that modified argv[] and thus affected what was visible
48 		// in "ps auxf" or similar tools which was confusing. For
49 		// example, --memlimit=50% would show up as --memlimit=50
50 		// since the percent sign was overwritten here.
51 		char *s = xstrdup(str);
52 		s[len - 1] = '\0';
53 		is_percentage = true;
54 		value = str_to_uint64(name_percentage, s, 1, 100);
55 		free(s);
56 	} else {
57 		// On 32-bit systems, SIZE_MAX would make more sense than
58 		// UINT64_MAX. But use UINT64_MAX still so that scripts
59 		// that assume > 4 GiB values don't break.
60 		value = str_to_uint64(name, str, 0, UINT64_MAX);
61 	}
62 
63 	hardware_memlimit_set(value, set_compress, set_decompress, set_mtdec,
64 			is_percentage);
65 	return;
66 }
67 
68 
69 static void
parse_block_list(const char * str_const)70 parse_block_list(const char *str_const)
71 {
72 	// We need a modifiable string in the for-loop.
73 	char *str_start = xstrdup(str_const);
74 	char *str = str_start;
75 
76 	// It must be non-empty and not begin with a comma.
77 	if (str[0] == '\0' || str[0] == ',')
78 		message_fatal(_("%s: Invalid argument to --block-list"), str);
79 
80 	// Count the number of comma-separated strings.
81 	size_t count = 1;
82 	for (size_t i = 0; str[i] != '\0'; ++i)
83 		if (str[i] == ',')
84 			++count;
85 
86 	// Prevent an unlikely integer overflow.
87 	if (count > SIZE_MAX / sizeof(block_list_entry) - 1)
88 		message_fatal(_("%s: Too many arguments to --block-list"),
89 				str);
90 
91 	// Allocate memory to hold all the sizes specified.
92 	// If --block-list was specified already, its value is forgotten.
93 	free(opt_block_list);
94 	opt_block_list = xmalloc((count + 1) * sizeof(block_list_entry));
95 
96 	// Clear the bitmask of filter chains in use.
97 	block_list_chain_mask = 0;
98 
99 	// Reset the largest Block size found in --block-list.
100 	block_list_largest = 0;
101 
102 	for (size_t i = 0; i < count; ++i) {
103 		// Locate the next comma and replace it with \0.
104 		char *p = strchr(str, ',');
105 		if (p != NULL)
106 			*p = '\0';
107 
108 		// Use the default filter chain unless overridden.
109 		opt_block_list[i].chain_num = 0;
110 
111 		// To specify a filter chain, the block list entry may be
112 		// prepended with "[filter-chain-number]:". The size is
113 		// still required for every block.
114 		// For instance:
115 		// --block-list=2:10MiB,1:5MiB,,8MiB,0:0
116 		//
117 		// Translates to:
118 		// 1. Block of 10 MiB using filter chain 2
119 		// 2. Block of 5 MiB using filter chain 1
120 		// 3. Block of 5 MiB using filter chain 1
121 		// 4. Block of 8 MiB using the default filter chain
122 		// 5. The last block uses the default filter chain
123 		//
124 		// The block list:
125 		// --block-list=2:MiB,1:,0
126 		//
127 		// Is not allowed because the second block does not specify
128 		// the block size, only the filter chain.
129 		if (str[0] >= '0' && str[0] <= '9' && str[1] == ':') {
130 			if (str[2] == '\0')
131 				message_fatal(_("In --block-list, block "
132 						"size is missing after "
133 						"filter chain number '%c:'"),
134 						str[0]);
135 
136 			const unsigned chain_num = (unsigned)(str[0] - '0');
137 			opt_block_list[i].chain_num = chain_num;
138 			block_list_chain_mask |= 1U << chain_num;
139 			str += 2;
140 		} else {
141 			// This Block uses the default filter chain.
142 			block_list_chain_mask |= 1U << 0;
143 		}
144 
145 		if (str[0] == '\0') {
146 			// There is no string, that is, a comma follows
147 			// another comma. Use the previous value.
148 			//
149 			// NOTE: We checked earlier that the first char
150 			// of the whole list cannot be a comma.
151 			assert(i > 0);
152 			opt_block_list[i] = opt_block_list[i - 1];
153 		} else {
154 			opt_block_list[i].size = str_to_uint64("block-list",
155 					str, 0, UINT64_MAX);
156 
157 			// Zero indicates no more new Blocks.
158 			if (opt_block_list[i].size == 0) {
159 				if (i + 1 != count)
160 					message_fatal(_("0 can only be used "
161 							"as the last element "
162 							"in --block-list"));
163 
164 				opt_block_list[i].size = UINT64_MAX;
165 			}
166 
167 			// Remember the largest Block size in the list.
168 			//
169 			// NOTE: Do this after handling the special value 0
170 			// because when 0 is used, we don't want to reduce
171 			// the Block size of the multithreaded encoder.
172 			if (block_list_largest < opt_block_list[i].size)
173 				block_list_largest = opt_block_list[i].size;
174 		}
175 
176 		// Be standards compliant: p + 1 is undefined behavior
177 		// if p == NULL. That occurs on the last iteration of
178 		// the loop when we won't care about the value of str
179 		// anymore anyway. That is, this is done conditionally
180 		// solely for standard conformance reasons.
181 		if (p != NULL)
182 			str = p + 1;
183 	}
184 
185 	// Terminate the array.
186 	opt_block_list[count].size = 0;
187 
188 	free(str_start);
189 	return;
190 }
191 
192 
193 static void
parse_real(args_info * args,int argc,char ** argv)194 parse_real(args_info *args, int argc, char **argv)
195 {
196 	enum {
197 		OPT_FILTERS = INT_MIN,
198 		OPT_FILTERS1,
199 		OPT_FILTERS2,
200 		OPT_FILTERS3,
201 		OPT_FILTERS4,
202 		OPT_FILTERS5,
203 		OPT_FILTERS6,
204 		OPT_FILTERS7,
205 		OPT_FILTERS8,
206 		OPT_FILTERS9,
207 		OPT_FILTERS_HELP,
208 
209 		OPT_X86,
210 		OPT_POWERPC,
211 		OPT_IA64,
212 		OPT_ARM,
213 		OPT_ARMTHUMB,
214 		OPT_ARM64,
215 		OPT_SPARC,
216 		OPT_RISCV,
217 		OPT_DELTA,
218 		OPT_LZMA1,
219 		OPT_LZMA2,
220 
221 		OPT_NO_SYNC,
222 		OPT_SINGLE_STREAM,
223 		OPT_NO_SPARSE,
224 		OPT_FILES,
225 		OPT_FILES0,
226 		OPT_BLOCK_SIZE,
227 		OPT_BLOCK_LIST,
228 		OPT_MEM_COMPRESS,
229 		OPT_MEM_DECOMPRESS,
230 		OPT_MEM_MT_DECOMPRESS,
231 		OPT_NO_ADJUST,
232 		OPT_INFO_MEMORY,
233 		OPT_ROBOT,
234 		OPT_FLUSH_TIMEOUT,
235 		OPT_IGNORE_CHECK,
236 	};
237 
238 	static const char short_opts[]
239 			= "cC:defF:hHlkM:qQrS:tT:vVz0123456789";
240 
241 	static const struct option long_opts[] = {
242 		// Operation mode
243 		{ "compress",     no_argument,       NULL,  'z' },
244 		{ "decompress",   no_argument,       NULL,  'd' },
245 		{ "uncompress",   no_argument,       NULL,  'd' },
246 		{ "test",         no_argument,       NULL,  't' },
247 		{ "list",         no_argument,       NULL,  'l' },
248 
249 		// Operation modifiers
250 		{ "keep",         no_argument,       NULL,  'k' },
251 		{ "force",        no_argument,       NULL,  'f' },
252 		{ "stdout",       no_argument,       NULL,  'c' },
253 		{ "to-stdout",    no_argument,       NULL,  'c' },
254 		{ "no-sync",      no_argument,       NULL,  OPT_NO_SYNC },
255 		{ "single-stream", no_argument,      NULL,  OPT_SINGLE_STREAM },
256 		{ "no-sparse",    no_argument,       NULL,  OPT_NO_SPARSE },
257 		{ "suffix",       required_argument, NULL,  'S' },
258 		{ "files",        optional_argument, NULL,  OPT_FILES },
259 		{ "files0",       optional_argument, NULL,  OPT_FILES0 },
260 
261 		// Basic compression settings
262 		{ "format",       required_argument, NULL,  'F' },
263 		{ "check",        required_argument, NULL,  'C' },
264 		{ "ignore-check", no_argument,       NULL,  OPT_IGNORE_CHECK },
265 		{ "block-size",   required_argument, NULL,  OPT_BLOCK_SIZE },
266 		{ "block-list",   required_argument, NULL,  OPT_BLOCK_LIST },
267 		{ "memlimit-compress",   required_argument, NULL, OPT_MEM_COMPRESS },
268 		{ "memlimit-decompress", required_argument, NULL, OPT_MEM_DECOMPRESS },
269 		{ "memlimit-mt-decompress", required_argument, NULL, OPT_MEM_MT_DECOMPRESS },
270 		{ "memlimit",     required_argument, NULL,  'M' },
271 		{ "memory",       required_argument, NULL,  'M' }, // Old alias
272 		{ "no-adjust",    no_argument,       NULL,  OPT_NO_ADJUST },
273 		{ "threads",      required_argument, NULL,  'T' },
274 		{ "flush-timeout", required_argument, NULL, OPT_FLUSH_TIMEOUT },
275 
276 		{ "extreme",      no_argument,       NULL,  'e' },
277 		{ "fast",         no_argument,       NULL,  '0' },
278 		{ "best",         no_argument,       NULL,  '9' },
279 
280 		// Filters
281 		{ "filters",      required_argument, NULL,  OPT_FILTERS},
282 		{ "filters1",     required_argument, NULL,  OPT_FILTERS1},
283 		{ "filters2",     required_argument, NULL,  OPT_FILTERS2},
284 		{ "filters3",     required_argument, NULL,  OPT_FILTERS3},
285 		{ "filters4",     required_argument, NULL,  OPT_FILTERS4},
286 		{ "filters5",     required_argument, NULL,  OPT_FILTERS5},
287 		{ "filters6",     required_argument, NULL,  OPT_FILTERS6},
288 		{ "filters7",     required_argument, NULL,  OPT_FILTERS7},
289 		{ "filters8",     required_argument, NULL,  OPT_FILTERS8},
290 		{ "filters9",     required_argument, NULL,  OPT_FILTERS9},
291 		{ "filters-help", no_argument,       NULL,  OPT_FILTERS_HELP},
292 
293 		{ "lzma1",        optional_argument, NULL,  OPT_LZMA1 },
294 		{ "lzma2",        optional_argument, NULL,  OPT_LZMA2 },
295 		{ "x86",          optional_argument, NULL,  OPT_X86 },
296 		{ "powerpc",      optional_argument, NULL,  OPT_POWERPC },
297 		{ "ia64",         optional_argument, NULL,  OPT_IA64 },
298 		{ "arm",          optional_argument, NULL,  OPT_ARM },
299 		{ "armthumb",     optional_argument, NULL,  OPT_ARMTHUMB },
300 		{ "arm64",        optional_argument, NULL,  OPT_ARM64 },
301 		{ "sparc",        optional_argument, NULL,  OPT_SPARC },
302 		{ "riscv",        optional_argument, NULL,  OPT_RISCV },
303 		{ "delta",        optional_argument, NULL,  OPT_DELTA },
304 
305 		// Other options
306 		{ "quiet",        no_argument,       NULL,  'q' },
307 		{ "verbose",      no_argument,       NULL,  'v' },
308 		{ "no-warn",      no_argument,       NULL,  'Q' },
309 		{ "robot",        no_argument,       NULL,  OPT_ROBOT },
310 		{ "info-memory",  no_argument,       NULL,  OPT_INFO_MEMORY },
311 		{ "help",         no_argument,       NULL,  'h' },
312 		{ "long-help",    no_argument,       NULL,  'H' },
313 		{ "version",      no_argument,       NULL,  'V' },
314 
315 		{ NULL,           0,                 NULL,   0 }
316 	};
317 
318 	int c;
319 
320 	while ((c = getopt_long(argc, argv, short_opts, long_opts, NULL))
321 			!= -1) {
322 		switch (c) {
323 		// Compression preset (also for decompression if --format=raw)
324 		case '0': case '1': case '2': case '3': case '4':
325 		case '5': case '6': case '7': case '8': case '9':
326 			coder_set_preset((uint32_t)(c - '0'));
327 			break;
328 
329 		// --memlimit-compress
330 		case OPT_MEM_COMPRESS:
331 			parse_memlimit("memlimit-compress",
332 					"memlimit-compress%", optarg,
333 					true, false, false);
334 			break;
335 
336 		// --memlimit-decompress
337 		case OPT_MEM_DECOMPRESS:
338 			parse_memlimit("memlimit-decompress",
339 					"memlimit-decompress%", optarg,
340 					false, true, false);
341 			break;
342 
343 		// --memlimit-mt-decompress
344 		case OPT_MEM_MT_DECOMPRESS:
345 			parse_memlimit("memlimit-mt-decompress",
346 					"memlimit-mt-decompress%", optarg,
347 					false, false, true);
348 			break;
349 
350 		// --memlimit
351 		case 'M':
352 			parse_memlimit("memlimit", "memlimit%", optarg,
353 					true, true, true);
354 			break;
355 
356 		// --suffix
357 		case 'S':
358 			suffix_set(optarg);
359 			break;
360 
361 		case 'T': {
362 			// Since xz 5.4.0: Ignore leading '+' first.
363 			const char *s = optarg;
364 			if (optarg[0] == '+')
365 				++s;
366 
367 			// The max is from src/liblzma/common/common.h.
368 			uint32_t t = str_to_uint64("threads", s, 0, 16384);
369 
370 			// If leading '+' was used then use multi-threaded
371 			// mode even if exactly one thread was specified.
372 			if (t == 1 && optarg[0] == '+')
373 				t = UINT32_MAX;
374 
375 			hardware_threads_set(t);
376 			break;
377 		}
378 
379 		// --version
380 		case 'V':
381 			// This doesn't return.
382 			message_version();
383 
384 		// --stdout
385 		case 'c':
386 			opt_stdout = true;
387 			break;
388 
389 		// --decompress
390 		case 'd':
391 			opt_mode = MODE_DECOMPRESS;
392 			break;
393 
394 		// --extreme
395 		case 'e':
396 			coder_set_extreme();
397 			break;
398 
399 		// --force
400 		case 'f':
401 			opt_force = true;
402 			break;
403 
404 		// --info-memory
405 		case OPT_INFO_MEMORY:
406 			// This doesn't return.
407 			hardware_memlimit_show();
408 
409 		// --help
410 		case 'h':
411 			// This doesn't return.
412 			message_help(false);
413 
414 		// --long-help
415 		case 'H':
416 			// This doesn't return.
417 			message_help(true);
418 
419 		// --list
420 		case 'l':
421 			opt_mode = MODE_LIST;
422 			break;
423 
424 		// --keep
425 		case 'k':
426 			opt_keep_original = true;
427 			break;
428 
429 		// --quiet
430 		case 'q':
431 			message_verbosity_decrease();
432 			break;
433 
434 		case 'Q':
435 			set_exit_no_warn();
436 			break;
437 
438 		case 't':
439 			opt_mode = MODE_TEST;
440 			break;
441 
442 		// --verbose
443 		case 'v':
444 			message_verbosity_increase();
445 			break;
446 
447 		// --robot
448 		case OPT_ROBOT:
449 			opt_robot = true;
450 
451 			// This is to make sure that floating point numbers
452 			// always have a dot as decimal separator.
453 			setlocale(LC_NUMERIC, "C");
454 			break;
455 
456 		case 'z':
457 			opt_mode = MODE_COMPRESS;
458 			break;
459 
460 		// --filters
461 		case OPT_FILTERS:
462 			coder_add_filters_from_str(optarg);
463 			break;
464 
465 		// --filters1...--filters9
466 		case OPT_FILTERS1:
467 		case OPT_FILTERS2:
468 		case OPT_FILTERS3:
469 		case OPT_FILTERS4:
470 		case OPT_FILTERS5:
471 		case OPT_FILTERS6:
472 		case OPT_FILTERS7:
473 		case OPT_FILTERS8:
474 		case OPT_FILTERS9:
475 			coder_add_block_filters(optarg,
476 					(size_t)(c - OPT_FILTERS));
477 			break;
478 
479 		// --filters-help
480 		case OPT_FILTERS_HELP:
481 			// This doesn't return.
482 			message_filters_help();
483 			break;
484 
485 		case OPT_X86:
486 			coder_add_filter(LZMA_FILTER_X86,
487 					options_bcj(optarg));
488 			break;
489 
490 		case OPT_POWERPC:
491 			coder_add_filter(LZMA_FILTER_POWERPC,
492 					options_bcj(optarg));
493 			break;
494 
495 		case OPT_IA64:
496 			coder_add_filter(LZMA_FILTER_IA64,
497 					options_bcj(optarg));
498 			break;
499 
500 		case OPT_ARM:
501 			coder_add_filter(LZMA_FILTER_ARM,
502 					options_bcj(optarg));
503 			break;
504 
505 		case OPT_ARMTHUMB:
506 			coder_add_filter(LZMA_FILTER_ARMTHUMB,
507 					options_bcj(optarg));
508 			break;
509 
510 		case OPT_ARM64:
511 			coder_add_filter(LZMA_FILTER_ARM64,
512 					options_bcj(optarg));
513 			break;
514 
515 		case OPT_SPARC:
516 			coder_add_filter(LZMA_FILTER_SPARC,
517 					options_bcj(optarg));
518 			break;
519 
520 		case OPT_RISCV:
521 			coder_add_filter(LZMA_FILTER_RISCV,
522 					options_bcj(optarg));
523 			break;
524 
525 		case OPT_DELTA:
526 			coder_add_filter(LZMA_FILTER_DELTA,
527 					options_delta(optarg));
528 			break;
529 
530 		case OPT_LZMA1:
531 			coder_add_filter(LZMA_FILTER_LZMA1,
532 					options_lzma(optarg));
533 			break;
534 
535 		case OPT_LZMA2:
536 			coder_add_filter(LZMA_FILTER_LZMA2,
537 					options_lzma(optarg));
538 			break;
539 
540 		// Other
541 
542 		// --format
543 		case 'F': {
544 			// Just in case, support both "lzma" and "alone" since
545 			// the latter was used for forward compatibility in
546 			// LZMA Utils 4.32.x.
547 			static const struct {
548 				char str[8];
549 				enum format_type format;
550 			} types[] = {
551 				{ "auto",   FORMAT_AUTO },
552 				{ "xz",     FORMAT_XZ },
553 				{ "lzma",   FORMAT_LZMA },
554 				{ "alone",  FORMAT_LZMA },
555 #ifdef HAVE_LZIP_DECODER
556 				{ "lzip",   FORMAT_LZIP },
557 #endif
558 				{ "raw",    FORMAT_RAW },
559 			};
560 
561 			size_t i = 0;
562 			while (strcmp(types[i].str, optarg) != 0)
563 				if (++i == ARRAY_SIZE(types))
564 					message_fatal(_("%s: Unknown file "
565 							"format type"),
566 							optarg);
567 
568 			opt_format = types[i].format;
569 			break;
570 		}
571 
572 		// --check
573 		case 'C': {
574 			static const struct {
575 				char str[8];
576 				lzma_check check;
577 			} types[] = {
578 				{ "none",   LZMA_CHECK_NONE },
579 				{ "crc32",  LZMA_CHECK_CRC32 },
580 				{ "crc64",  LZMA_CHECK_CRC64 },
581 				{ "sha256", LZMA_CHECK_SHA256 },
582 			};
583 
584 			size_t i = 0;
585 			while (strcmp(types[i].str, optarg) != 0) {
586 				if (++i == ARRAY_SIZE(types))
587 					message_fatal(_("%s: Unsupported "
588 							"integrity "
589 							"check type"), optarg);
590 			}
591 
592 			// Use a separate check in case we are using different
593 			// liblzma than what was used to compile us.
594 			if (!lzma_check_is_supported(types[i].check))
595 				message_fatal(_("%s: Unsupported integrity "
596 						"check type"), optarg);
597 
598 			coder_set_check(types[i].check);
599 			break;
600 		}
601 
602 		case OPT_IGNORE_CHECK:
603 			opt_ignore_check = true;
604 			break;
605 
606 		case OPT_BLOCK_SIZE:
607 			opt_block_size = str_to_uint64("block-size", optarg,
608 					0, LZMA_VLI_MAX);
609 			break;
610 
611 		case OPT_BLOCK_LIST: {
612 			parse_block_list(optarg);
613 			break;
614 		}
615 
616 		case OPT_SINGLE_STREAM:
617 			opt_single_stream = true;
618 
619 			// Since 5.7.1alpha --single-stream implies --keep.
620 			opt_keep_original = true;
621 			break;
622 
623 		case OPT_NO_SPARSE:
624 			io_no_sparse();
625 			break;
626 
627 		case OPT_FILES:
628 			args->files_delim = '\n';
629 
630 			FALLTHROUGH;
631 
632 		case OPT_FILES0:
633 			if (args->files_name != NULL)
634 				message_fatal(_("Only one file can be "
635 						"specified with '--files' "
636 						"or '--files0'."));
637 
638 			if (optarg == NULL) {
639 				args->files_name = stdin_filename;
640 				args->files_file = stdin;
641 			} else {
642 				args->files_name = optarg;
643 				args->files_file = fopen(optarg,
644 						c == OPT_FILES ? "r" : "rb");
645 				if (args->files_file == NULL)
646 					// TRANSLATORS: This is a translatable
647 					// string because French needs a space
648 					// before the colon ("%s : %s").
649 					message_fatal(_("%s: %s"), optarg,
650 							strerror(errno));
651 			}
652 
653 			break;
654 
655 		case OPT_NO_ADJUST:
656 			opt_auto_adjust = false;
657 			break;
658 
659 		case OPT_FLUSH_TIMEOUT:
660 			opt_flush_timeout = str_to_uint64("flush-timeout",
661 					optarg, 0, UINT64_MAX);
662 			break;
663 
664 		case OPT_NO_SYNC:
665 			opt_synchronous = false;
666 			break;
667 
668 		default:
669 			message_try_help();
670 			tuklib_exit(E_ERROR, E_ERROR, false);
671 		}
672 	}
673 
674 	return;
675 }
676 
677 
678 static void
parse_environment(args_info * args,char * argv0,const char * varname)679 parse_environment(args_info *args, char *argv0, const char *varname)
680 {
681 	char *env = getenv(varname);
682 	if (env == NULL)
683 		return;
684 
685 	// We modify the string, so make a copy of it.
686 	env = xstrdup(env);
687 
688 	// Calculate the number of arguments in env. argc stats at one
689 	// to include space for the program name.
690 	int argc = 1;
691 	bool prev_was_space = true;
692 	for (size_t i = 0; env[i] != '\0'; ++i) {
693 		// NOTE: Cast to unsigned char is needed so that correct
694 		// value gets passed to isspace(), which expects
695 		// unsigned char cast to int. Casting to int is done
696 		// automatically due to integer promotion, but we need to
697 		// force char to unsigned char manually. Otherwise 8-bit
698 		// characters would get promoted to wrong value if
699 		// char is signed.
700 		if (isspace((unsigned char)env[i])) {
701 			prev_was_space = true;
702 		} else if (prev_was_space) {
703 			prev_was_space = false;
704 
705 			// Keep argc small enough to fit into a signed int
706 			// and to keep it usable for memory allocation.
707 			if (++argc == my_min(
708 					INT_MAX, SIZE_MAX / sizeof(char *)))
709 				message_fatal(_("The environment variable "
710 						"%s contains too many "
711 						"arguments"), varname);
712 		}
713 	}
714 
715 	// Allocate memory to hold pointers to the arguments. Add one to get
716 	// space for the terminating NULL (if some systems happen to need it).
717 	char **argv = xmalloc(((size_t)(argc) + 1) * sizeof(char *));
718 	argv[0] = argv0;
719 	argv[argc] = NULL;
720 
721 	// Go through the string again. Split the arguments using '\0'
722 	// characters and add pointers to the resulting strings to argv.
723 	argc = 1;
724 	prev_was_space = true;
725 	for (size_t i = 0; env[i] != '\0'; ++i) {
726 		if (isspace((unsigned char)env[i])) {
727 			prev_was_space = true;
728 			env[i] = '\0';
729 		} else if (prev_was_space) {
730 			prev_was_space = false;
731 			argv[argc++] = env + i;
732 		}
733 	}
734 
735 	// Parse the argument list we got from the environment. All non-option
736 	// arguments i.e. filenames are ignored.
737 	parse_real(args, argc, argv);
738 
739 	// Reset the state of the getopt_long() so that we can parse the
740 	// command line options too. There are two incompatible ways to
741 	// do it.
742 #ifdef HAVE_OPTRESET
743 	// BSD
744 	optind = 1;
745 	optreset = 1;
746 #else
747 	// GNU, Solaris
748 	optind = 0;
749 #endif
750 
751 	// We don't need the argument list from environment anymore.
752 	free(argv);
753 	free(env);
754 
755 	return;
756 }
757 
758 
759 extern void
args_parse(args_info * args,int argc,char ** argv)760 args_parse(args_info *args, int argc, char **argv)
761 {
762 	// Initialize those parts of *args that we need later.
763 	args->files_name = NULL;
764 	args->files_file = NULL;
765 	args->files_delim = '\0';
766 
767 	// Check how we were called.
768 	{
769 		// Remove the leading path name, if any.
770 		const char *name = strrchr(argv[0], '/');
771 		if (name == NULL)
772 			name = argv[0];
773 		else
774 			++name;
775 
776 		// NOTE: It's possible that name[0] is now '\0' if argv[0]
777 		// is weird, but it doesn't matter here.
778 
779 		// Look for full command names instead of substrings like
780 		// "un", "cat", and "lz" to reduce possibility of false
781 		// positives when the programs have been renamed.
782 		if (strstr(name, "xzcat") != NULL) {
783 			opt_mode = MODE_DECOMPRESS;
784 			opt_stdout = true;
785 		} else if (strstr(name, "unxz") != NULL) {
786 			opt_mode = MODE_DECOMPRESS;
787 		} else if (strstr(name, "lzcat") != NULL) {
788 			opt_format = FORMAT_LZMA;
789 			opt_mode = MODE_DECOMPRESS;
790 			opt_stdout = true;
791 		} else if (strstr(name, "unlzma") != NULL) {
792 			opt_format = FORMAT_LZMA;
793 			opt_mode = MODE_DECOMPRESS;
794 		} else if (strstr(name, "lzma") != NULL) {
795 			opt_format = FORMAT_LZMA;
796 		}
797 	}
798 
799 	// First the flags from the environment
800 	parse_environment(args, argv[0], "XZ_DEFAULTS");
801 	parse_environment(args, argv[0], "XZ_OPT");
802 
803 	// Then from the command line
804 	parse_real(args, argc, argv);
805 
806 	// If encoder or decoder support was omitted at build time,
807 	// show an error now so that the rest of the code can rely on
808 	// that whatever is in opt_mode is also supported.
809 #ifndef HAVE_ENCODERS
810 	if (opt_mode == MODE_COMPRESS)
811 		message_fatal(_("Compression support was disabled "
812 				"at build time"));
813 #endif
814 #ifndef HAVE_DECODERS
815 	// Even MODE_LIST cannot work without decoder support so MODE_COMPRESS
816 	// is the only valid choice.
817 	if (opt_mode != MODE_COMPRESS)
818 		message_fatal(_("Decompression support was disabled "
819 				"at build time"));
820 #endif
821 
822 #ifdef HAVE_LZIP_DECODER
823 	if (opt_mode == MODE_COMPRESS && opt_format == FORMAT_LZIP)
824 		message_fatal(_("Compression of lzip files (.lz) "
825 				"is not supported"));
826 #endif
827 
828 	// Never remove the source file when the destination is not on disk.
829 	// In test mode the data is written nowhere, but setting opt_stdout
830 	// will make the rest of the code behave well.
831 	if (opt_stdout || opt_mode == MODE_TEST) {
832 		opt_keep_original = true;
833 		opt_stdout = true;
834 	}
835 
836 	// Don't use fsync() if --keep is specified or implied.
837 	// However, don't document this as "--keep implies --no-sync"
838 	// because if syncing support was added to --flush-timeout,
839 	// it would sync even if --keep was specified.
840 	if (opt_keep_original)
841 		opt_synchronous = false;
842 
843 	// When compressing, if no --format flag was used, or it
844 	// was --format=auto, we compress to the .xz format.
845 	if (opt_mode == MODE_COMPRESS && opt_format == FORMAT_AUTO)
846 		opt_format = FORMAT_XZ;
847 
848 	// Set opt_block_list to NULL if we are not compressing to the .xz
849 	// format. This option cannot be used outside of this case, and
850 	// simplifies the implementation later.
851 	if ((opt_mode != MODE_COMPRESS || opt_format != FORMAT_XZ)
852 			&& opt_block_list != NULL) {
853 		message(V_WARNING, _("--block-list is ignored unless "
854 				"compressing to the .xz format"));
855 		free(opt_block_list);
856 		opt_block_list = NULL;
857 	}
858 
859 	// If raw format is used and a custom suffix is not provided,
860 	// then only stdout mode can be used when compressing or
861 	// decompressing.
862 	if (opt_format == FORMAT_RAW && !suffix_is_set() && !opt_stdout
863 			&& (opt_mode == MODE_COMPRESS
864 				|| opt_mode == MODE_DECOMPRESS)) {
865 		if (args->files_name != NULL)
866 			message_fatal(_("With --format=raw, "
867 					"--suffix=.SUF is required "
868 					"unless writing to stdout"));
869 
870 		// If all of the filenames provided are "-" (more than one
871 		// "-" could be specified) or no filenames are provided,
872 		// then we are only going to be writing to standard out.
873 		for (int i = optind; i < argc; i++) {
874 			if (strcmp(argv[i], "-") != 0)
875 				message_fatal(_("With --format=raw, "
876 						"--suffix=.SUF is required "
877 						"unless writing to stdout"));
878 		}
879 	}
880 
881 	// Compression settings need to be validated (options themselves and
882 	// their memory usage) when compressing to any file format. It has to
883 	// be done also when uncompressing raw data, since for raw decoding
884 	// the options given on the command line are used to know what kind
885 	// of raw data we are supposed to decode.
886 	if (opt_mode == MODE_COMPRESS || (opt_format == FORMAT_RAW
887 			&& opt_mode != MODE_LIST))
888 		coder_set_compression_settings();
889 
890 	// If no filenames are given, use stdin.
891 	if (argv[optind] == NULL && args->files_name == NULL) {
892 		// We don't modify or free() the "-" constant. The caller
893 		// modifies this so don't make the struct itself const.
894 		static char *names_stdin[2] = { (char *)"-", NULL };
895 		args->arg_names = names_stdin;
896 		args->arg_count = 1;
897 	} else {
898 		// We got at least one filename from the command line, or
899 		// --files or --files0 was specified.
900 		args->arg_names = argv + optind;
901 		args->arg_count = (unsigned int)(argc - optind);
902 	}
903 
904 	return;
905 }
906 
907 
908 #ifndef NDEBUG
909 extern void
args_free(void)910 args_free(void)
911 {
912 	free(opt_block_list);
913 	return;
914 }
915 #endif
916