xref: /freebsd/contrib/xz/src/xz/suffix.c (revision 128836d304d93f2d00eb14069c27089ab46c38d4)
1 // SPDX-License-Identifier: 0BSD
2 
3 ///////////////////////////////////////////////////////////////////////////////
4 //
5 /// \file       suffix.c
6 /// \brief      Checks filename suffix and creates the destination filename
7 //
8 //  Author:     Lasse Collin
9 //
10 ///////////////////////////////////////////////////////////////////////////////
11 
12 #include "private.h"
13 
14 #ifdef __DJGPP__
15 #	include <fcntl.h>
16 #endif
17 
18 // For case-insensitive filename suffix on case-insensitive systems
19 #if defined(TUKLIB_DOSLIKE) || defined(__VMS)
20 #	ifdef HAVE_STRINGS_H
21 #		include <strings.h>
22 #	endif
23 #	ifdef _MSC_VER
24 #		define suffix_strcmp _stricmp
25 #	else
26 #		define suffix_strcmp strcasecmp
27 #	endif
28 #else
29 #	define suffix_strcmp strcmp
30 #endif
31 
32 
33 static char *custom_suffix = NULL;
34 
35 
36 /// \brief      Test if the char is a directory separator
37 static bool
is_dir_sep(char c)38 is_dir_sep(char c)
39 {
40 #ifdef TUKLIB_DOSLIKE
41 	return c == '/' || c == '\\' || c == ':';
42 #else
43 	return c == '/';
44 #endif
45 }
46 
47 
48 /// \brief      Test if the string contains a directory separator
49 static bool
has_dir_sep(const char * str)50 has_dir_sep(const char *str)
51 {
52 #ifdef TUKLIB_DOSLIKE
53 	return strpbrk(str, "/\\:") != NULL;
54 #else
55 	return strchr(str, '/') != NULL;
56 #endif
57 }
58 
59 
60 #ifdef __DJGPP__
61 /// \brief      Test for special suffix used for 8.3 short filenames (SFN)
62 ///
63 /// \return     If str matches *.?- or *.??-, true is returned. Otherwise
64 ///             false is returned.
65 static bool
has_sfn_suffix(const char * str,size_t len)66 has_sfn_suffix(const char *str, size_t len)
67 {
68 	if (len >= 4 && str[len - 1] == '-' && str[len - 2] != '.'
69 			&& !is_dir_sep(str[len - 2])) {
70 		// *.?-
71 		if (str[len - 3] == '.')
72 			return !is_dir_sep(str[len - 4]);
73 
74 		// *.??-
75 		if (len >= 5 && !is_dir_sep(str[len - 3])
76 				&& str[len - 4] == '.')
77 			return !is_dir_sep(str[len - 5]);
78 	}
79 
80 	return false;
81 }
82 #endif
83 
84 
85 /// \brief      Checks if src_name has given compressed_suffix
86 ///
87 /// \param      suffix      Filename suffix to look for
88 /// \param      src_name    Input filename
89 /// \param      src_len     strlen(src_name)
90 ///
91 /// \return     If src_name has the suffix, src_len - strlen(suffix) is
92 ///             returned. It's always a positive integer. Otherwise zero
93 ///             is returned.
94 static size_t
test_suffix(const char * suffix,const char * src_name,size_t src_len)95 test_suffix(const char *suffix, const char *src_name, size_t src_len)
96 {
97 	const size_t suffix_len = strlen(suffix);
98 
99 	// The filename must have at least one character in addition to
100 	// the suffix. src_name may contain path to the filename, so we
101 	// need to check for directory separator too.
102 	if (src_len <= suffix_len
103 			|| is_dir_sep(src_name[src_len - suffix_len - 1]))
104 		return 0;
105 
106 	if (suffix_strcmp(suffix, src_name + src_len - suffix_len) == 0)
107 		return src_len - suffix_len;
108 
109 	return 0;
110 }
111 
112 
113 /// \brief      Removes the filename suffix of the compressed file
114 ///
115 /// \return     Name of the uncompressed file, or NULL if file has unknown
116 ///             suffix.
117 static char *
uncompressed_name(const char * src_name,const size_t src_len)118 uncompressed_name(const char *src_name, const size_t src_len)
119 {
120 	static const struct {
121 		const char *compressed;
122 		const char *uncompressed;
123 	} suffixes[] = {
124 		{ ".xz",    "" },
125 		{ ".txz",   ".tar" }, // .txz abbreviation for .txt.gz is rare.
126 		{ ".lzma",  "" },
127 #ifdef __DJGPP__
128 		{ ".lzm",   "" },
129 #endif
130 		{ ".tlz",   ".tar" }, // Both .tar.lzma and .tar.lz
131 #ifdef HAVE_LZIP_DECODER
132 		{ ".lz",    "" },
133 #endif
134 	};
135 
136 	const char *new_suffix = "";
137 	size_t new_len = 0;
138 
139 	if (opt_format != FORMAT_RAW) {
140 		for (size_t i = 0; i < ARRAY_SIZE(suffixes); ++i) {
141 			new_len = test_suffix(suffixes[i].compressed,
142 					src_name, src_len);
143 			if (new_len != 0) {
144 				new_suffix = suffixes[i].uncompressed;
145 				break;
146 			}
147 		}
148 
149 #ifdef __DJGPP__
150 		// Support also *.?- -> *.? and *.??- -> *.?? on DOS.
151 		// This is done also when long filenames are available
152 		// to keep it easy to decompress files created when
153 		// long filename support wasn't available.
154 		if (new_len == 0 && has_sfn_suffix(src_name, src_len)) {
155 			new_suffix = "";
156 			new_len = src_len - 1;
157 		}
158 #endif
159 	}
160 
161 	if (new_len == 0 && custom_suffix != NULL)
162 		new_len = test_suffix(custom_suffix, src_name, src_len);
163 
164 	if (new_len == 0) {
165 		message_warning(_("%s: Filename has an unknown suffix, "
166 				"skipping"), tuklib_mask_nonprint(src_name));
167 		return NULL;
168 	}
169 
170 	const size_t new_suffix_len = strlen(new_suffix);
171 	char *dest_name = xmalloc(new_len + new_suffix_len + 1);
172 
173 	memcpy(dest_name, src_name, new_len);
174 	memcpy(dest_name + new_len, new_suffix, new_suffix_len);
175 	dest_name[new_len + new_suffix_len] = '\0';
176 
177 	return dest_name;
178 }
179 
180 
181 static void
msg_suffix(const char * src_name,const char * suffix)182 msg_suffix(const char *src_name, const char *suffix)
183 {
184 	char *mem = NULL;
185 	message_warning(_("%s: File already has '%s' suffix, skipping"),
186 			tuklib_mask_nonprint(src_name),
187 			tuklib_mask_nonprint_r(suffix, &mem));
188 	free(mem);
189 	return;
190 }
191 
192 
193 /// \brief      Appends suffix to src_name
194 ///
195 /// In contrast to uncompressed_name(), we check only suffixes that are valid
196 /// for the specified file format.
197 static char *
compressed_name(const char * src_name,size_t src_len)198 compressed_name(const char *src_name, size_t src_len)
199 {
200 	// The order of these must match the order in args.h.
201 	static const char *const all_suffixes[][4] = {
202 		{
203 			".xz",
204 			".txz",
205 			NULL
206 		}, {
207 			".lzma",
208 #ifdef __DJGPP__
209 			".lzm",
210 #endif
211 			".tlz",
212 			NULL
213 #ifdef HAVE_LZIP_DECODER
214 		// This is needed to keep the table indexing in sync with
215 		// enum format_type from coder.h.
216 		}, {
217 /*
218 			".lz",
219 */
220 			NULL
221 #endif
222 		}, {
223 			// --format=raw requires specifying the suffix
224 			// manually or using stdout.
225 			NULL
226 		}
227 	};
228 
229 	// args.c ensures these.
230 	assert(opt_format != FORMAT_AUTO);
231 #ifdef HAVE_LZIP_DECODER
232 	assert(opt_format != FORMAT_LZIP);
233 #endif
234 
235 	const size_t format = opt_format - 1;
236 	const char *const *suffixes = all_suffixes[format];
237 
238 	// Look for known filename suffixes and refuse to compress them.
239 	for (size_t i = 0; suffixes[i] != NULL; ++i) {
240 		if (test_suffix(suffixes[i], src_name, src_len) != 0) {
241 			msg_suffix(src_name, suffixes[i]);
242 			return NULL;
243 		}
244 	}
245 
246 #ifdef __DJGPP__
247 	// Recognize also the special suffix that is used when long
248 	// filename (LFN) support isn't available. This suffix is
249 	// recognized on LFN systems too.
250 	if (opt_format == FORMAT_XZ && has_sfn_suffix(src_name, src_len)) {
251 		msg_suffix(src_name, "-");
252 		return NULL;
253 	}
254 #endif
255 
256 	if (custom_suffix != NULL) {
257 		if (test_suffix(custom_suffix, src_name, src_len) != 0) {
258 			msg_suffix(src_name, custom_suffix);
259 			return NULL;
260 		}
261 	}
262 
263 	const char *suffix = custom_suffix != NULL
264 			? custom_suffix : suffixes[0];
265 	size_t suffix_len = strlen(suffix);
266 
267 #ifdef __DJGPP__
268 	if (!_use_lfn(src_name)) {
269 		// Long filename (LFN) support isn't available and we are
270 		// limited to 8.3 short filenames (SFN).
271 		//
272 		// Look for suffix separator from the filename, and make sure
273 		// that it is in the filename, not in a directory name.
274 		const char *sufsep = strrchr(src_name, '.');
275 		if (sufsep == NULL || sufsep[1] == '\0'
276 				|| has_dir_sep(sufsep)) {
277 			// src_name has no filename extension.
278 			//
279 			// Examples:
280 			// xz foo         -> foo.xz
281 			// xz -F lzma foo -> foo.lzm
282 			// xz -S x foo    -> foox
283 			// xz -S x foo.   -> foo.x
284 			// xz -S x.y foo  -> foox.y
285 			// xz -S .x foo   -> foo.x
286 			// xz -S .x foo.  -> foo.x
287 			//
288 			// Avoid double dots:
289 			if (sufsep != NULL && sufsep[1] == '\0'
290 					&& suffix[0] == '.')
291 				--src_len;
292 
293 		} else if (custom_suffix == NULL
294 				&& strcasecmp(sufsep, ".tar") == 0) {
295 			// ".tar" is handled specially.
296 			//
297 			// Examples:
298 			// xz foo.tar          -> foo.txz
299 			// xz -F lzma foo.tar  -> foo.tlz
300 			static const char *const tar_suffixes[] = {
301 				".txz", // .tar.xz
302 				".tlz", // .tar.lzma
303 /*
304 				".tlz", // .tar.lz
305 */
306 			};
307 			suffix = tar_suffixes[format];
308 			suffix_len = 4;
309 			src_len -= 4;
310 
311 		} else {
312 			if (custom_suffix == NULL && opt_format == FORMAT_XZ) {
313 				// Instead of the .xz suffix, use a single
314 				// character at the end of the filename
315 				// extension. This is to minimize name
316 				// conflicts when compressing multiple files
317 				// with the same basename. E.g. foo.txt and
318 				// foo.exe become foo.tx- and foo.ex-. Dash
319 				// is rare as the last character of the
320 				// filename extension, so it seems to be
321 				// quite safe choice and it stands out better
322 				// in directory listings than e.g. x. For
323 				// comparison, gzip uses z.
324 				suffix = "-";
325 				suffix_len = 1;
326 			}
327 
328 			if (suffix[0] == '.') {
329 				// The first character of the suffix is a dot.
330 				// Throw away the original filename extension
331 				// and replace it with the new suffix.
332 				//
333 				// Examples:
334 				// xz -F lzma foo.txt  -> foo.lzm
335 				// xz -S .x  foo.txt   -> foo.x
336 				src_len = sufsep - src_name;
337 
338 			} else {
339 				// The first character of the suffix is not
340 				// a dot. Preserve the first 0-2 characters
341 				// of the original filename extension.
342 				//
343 				// Examples:
344 				// xz foo.txt         -> foo.tx-
345 				// xz -S x  foo.c     -> foo.cx
346 				// xz -S ab foo.c     -> foo.cab
347 				// xz -S ab foo.txt   -> foo.tab
348 				// xz -S abc foo.txt  -> foo.abc
349 				//
350 				// Truncate the suffix to three chars:
351 				if (suffix_len > 3)
352 					suffix_len = 3;
353 
354 				// If needed, overwrite 1-3 characters.
355 				if (strlen(sufsep) > 4 - suffix_len)
356 					src_len = sufsep - src_name
357 							+ 4 - suffix_len;
358 			}
359 		}
360 	}
361 #endif
362 
363 	char *dest_name = xmalloc(src_len + suffix_len + 1);
364 
365 	memcpy(dest_name, src_name, src_len);
366 	memcpy(dest_name + src_len, suffix, suffix_len);
367 	dest_name[src_len + suffix_len] = '\0';
368 
369 	return dest_name;
370 }
371 
372 
373 extern char *
suffix_get_dest_name(const char * src_name)374 suffix_get_dest_name(const char *src_name)
375 {
376 	assert(src_name != NULL);
377 
378 	// Length of the name is needed in all cases to locate the end of
379 	// the string to compare the suffix, so calculate the length here.
380 	const size_t src_len = strlen(src_name);
381 
382 	return opt_mode == MODE_COMPRESS
383 			? compressed_name(src_name, src_len)
384 			: uncompressed_name(src_name, src_len);
385 }
386 
387 
388 extern void
suffix_set(const char * suffix)389 suffix_set(const char *suffix)
390 {
391 	// Empty suffix and suffixes having a directory separator are
392 	// rejected. Such suffixes would break things later.
393 	if (suffix[0] == '\0' || has_dir_sep(suffix))
394 		message_fatal(_("%s: Invalid filename suffix"),
395 				tuklib_mask_nonprint(suffix));
396 
397 	// Replace the old custom_suffix (if any) with the new suffix.
398 	free(custom_suffix);
399 	custom_suffix = xstrdup(suffix);
400 	return;
401 }
402 
403 
404 extern bool
suffix_is_set(void)405 suffix_is_set(void)
406 {
407 	return custom_suffix != NULL;
408 }
409