xref: /freebsd/contrib/libarchive/libarchive/test/test_read_format_tar_filename.c (revision b9128a37faafede823eb456aa65a11ac69997284)
1 /*-
2  * Copyright (c) 2011 Michihiro NAKAJIMA
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR
15  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
16  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
17  * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT,
18  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
19  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
20  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
21  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
23  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24  */
25 #include "test.h"
26 
27 #include <locale.h>
28 
29 /*
30  * The sample tar file was made in LANG=KOI8-R and it contains two
31  * files the charset of which are different.
32  * - the filename of first file is stored in BINARY mode.
33  * - the filename of second file is stored in UTF-8.
34  *
35  * Whenever hdrcharset option is specified, we will correctly read the
36  * filename of second file, which is stored in UTF-8 by default.
37  */
38 
39 static void
40 test_read_format_tar_filename_KOI8R_CP866(const char *refname)
41 {
42 	struct archive *a;
43 	struct archive_entry *ae;
44 
45 	/*
46  	* Read filename in ru_RU.CP866 with "hdrcharset=KOI8-R" option.
47  	* We should correctly read two filenames.
48 	*/
49 	if (NULL == setlocale(LC_ALL, "Russian_Russia.866") &&
50 	    NULL == setlocale(LC_ALL, "ru_RU.CP866")) {
51 		skipping("ru_RU.CP866 locale not available on this system.");
52 		return;
53 	}
54 
55 	/* Test if the platform can convert from UTF-8. */
56 	assert((a = archive_read_new()) != NULL);
57 	assertEqualIntA(a, ARCHIVE_OK, archive_read_support_format_tar(a));
58 	if (ARCHIVE_OK != archive_read_set_options(a, "hdrcharset=UTF-8")) {
59 		assertEqualInt(ARCHIVE_OK, archive_read_free(a));
60 		skipping("This system cannot convert character-set"
61 		    " from UTF-8 to CP866.");
62 		return;
63 	}
64 	assertEqualInt(ARCHIVE_OK, archive_read_free(a));
65 
66 	assert((a = archive_read_new()) != NULL);
67 	assertEqualIntA(a, ARCHIVE_OK, archive_read_support_filter_all(a));
68 	assertEqualIntA(a, ARCHIVE_OK, archive_read_support_format_all(a));
69 	if (ARCHIVE_OK != archive_read_set_options(a, "hdrcharset=KOI8-R")) {
70 		skipping("This system cannot convert character-set"
71 		    " from KOI8-R to CP866.");
72 		goto next_test;
73 	}
74 	assertEqualIntA(a, ARCHIVE_OK,
75 	    archive_read_open_filename(a, refname, 10240));
76 
77 	/* Verify regular first file. */
78 	assertEqualIntA(a, ARCHIVE_OK, archive_read_next_header(a, &ae));
79 	assertEqualString("\x8f\x90\x88\x82\x85\x92",
80 	    archive_entry_pathname(ae));
81 	assertEqualInt(6, archive_entry_size(ae));
82 	assertEqualInt(archive_entry_is_encrypted(ae), 0);
83 	assertEqualIntA(a, archive_read_has_encrypted_entries(a), ARCHIVE_READ_FORMAT_ENCRYPTION_UNSUPPORTED);
84 
85 	/* Verify regular second file. */
86 	assertEqualIntA(a, ARCHIVE_OK, archive_read_next_header(a, &ae));
87 	assertEqualString("\xaf\xe0\xa8\xa2\xa5\xe2",
88 	    archive_entry_pathname(ae));
89 	assertEqualInt(6, archive_entry_size(ae));
90 	assertEqualInt(archive_entry_is_encrypted(ae), 0);
91 	assertEqualIntA(a, archive_read_has_encrypted_entries(a), ARCHIVE_READ_FORMAT_ENCRYPTION_UNSUPPORTED);
92 
93 
94 	/* End of archive. */
95 	assertEqualIntA(a, ARCHIVE_EOF, archive_read_next_header(a, &ae));
96 
97 	/* Verify archive format. */
98 	assertEqualIntA(a, ARCHIVE_FILTER_COMPRESS, archive_filter_code(a, 0));
99 	assertEqualIntA(a, ARCHIVE_FORMAT_TAR_PAX_INTERCHANGE,
100 	    archive_format(a));
101 
102 	/* Close the archive. */
103 	assertEqualInt(ARCHIVE_OK, archive_read_close(a));
104 next_test:
105 	assertEqualInt(ARCHIVE_OK, archive_read_free(a));
106 
107 
108 	/*
109 	 * Read filename in ru_RU.CP866 without "hdrcharset=KOI8-R" option.
110 	 * The filename we can properly read is only second file.
111 	 */
112 
113 	assert((a = archive_read_new()) != NULL);
114 	assertEqualIntA(a, ARCHIVE_OK, archive_read_support_filter_all(a));
115 	assertEqualIntA(a, ARCHIVE_OK, archive_read_support_format_all(a));
116 	assertEqualIntA(a, ARCHIVE_OK,
117 	    archive_read_open_filename(a, refname, 10240));
118 
119 	/*
120 	 * Verify regular first file.
121 	 * The filename is not translated to CP866 because hdrcharset
122 	 * attribute is BINARY and there is not way to know its charset.
123 	 */
124 	assertEqualIntA(a, ARCHIVE_OK, archive_read_next_header(a, &ae));
125 	/* A filename is in KOI8-R. */
126 	assertEqualString("\xf0\xf2\xe9\xf7\xe5\xf4",
127 	    archive_entry_pathname(ae));
128 	assertEqualInt(6, archive_entry_size(ae));
129 	assertEqualInt(archive_entry_is_encrypted(ae), 0);
130 	assertEqualIntA(a, archive_read_has_encrypted_entries(a), ARCHIVE_READ_FORMAT_ENCRYPTION_UNSUPPORTED);
131 
132 	/*
133 	 * Verify regular second file.
134 	 * The filename is translated from UTF-8 to CP866
135 	 */
136 	assertEqualIntA(a, ARCHIVE_OK, archive_read_next_header(a, &ae));
137 	assertEqualString("\xaf\xe0\xa8\xa2\xa5\xe2",
138 	    archive_entry_pathname(ae));
139 	assertEqualInt(6, archive_entry_size(ae));
140 	assertEqualInt(archive_entry_is_encrypted(ae), 0);
141 	assertEqualIntA(a, archive_read_has_encrypted_entries(a), ARCHIVE_READ_FORMAT_ENCRYPTION_UNSUPPORTED);
142 
143 
144 	/* End of archive. */
145 	assertEqualIntA(a, ARCHIVE_EOF, archive_read_next_header(a, &ae));
146 
147 	/* Verify archive format. */
148 	assertEqualIntA(a, ARCHIVE_FILTER_COMPRESS, archive_filter_code(a, 0));
149 	assertEqualIntA(a, ARCHIVE_FORMAT_TAR_PAX_INTERCHANGE,
150 	    archive_format(a));
151 
152 	/* Close the archive. */
153 	assertEqualInt(ARCHIVE_OK, archive_read_close(a));
154 	assertEqualInt(ARCHIVE_OK, archive_read_free(a));
155 }
156 
157 static void
158 test_read_format_tar_filename_KOI8R_UTF8(const char *refname)
159 {
160 	struct archive *a;
161 	struct archive_entry *ae;
162 
163 	/*
164 	 * Read filename in en_US.UTF-8 with "hdrcharset=KOI8-R" option.
165 	 * We should correctly read two filenames.
166 	 */
167 	if (NULL == setlocale(LC_ALL, "en_US.UTF-8")) {
168 		skipping("en_US.UTF-8 locale not available on this system.");
169 		return;
170 	}
171 
172 	assert((a = archive_read_new()) != NULL);
173 	assertEqualIntA(a, ARCHIVE_OK, archive_read_support_filter_all(a));
174 	assertEqualIntA(a, ARCHIVE_OK, archive_read_support_format_all(a));
175 	if (ARCHIVE_OK != archive_read_set_options(a, "hdrcharset=KOI8-R")) {
176 		assertEqualInt(ARCHIVE_OK, archive_read_free(a));
177 		skipping("This system cannot convert character-set"
178 		    " from KOI8-R to UTF-8.");
179 		return;
180 	}
181 	assertEqualIntA(a, ARCHIVE_OK,
182 	    archive_read_open_filename(a, refname, 10240));
183 
184 	/* Verify regular file. */
185 	assertEqualIntA(a, ARCHIVE_OK, archive_read_next_header(a, &ae));
186 	assertEqualString("\xd0\x9f\xd0\xa0\xd0\x98\xd0\x92\xd0\x95\xd0\xa2",
187 	    archive_entry_pathname(ae));
188 	assertEqualInt(6, archive_entry_size(ae));
189 	assertEqualInt(archive_entry_is_encrypted(ae), 0);
190 	assertEqualIntA(a, archive_read_has_encrypted_entries(a), ARCHIVE_READ_FORMAT_ENCRYPTION_UNSUPPORTED);
191 
192 	/* Verify regular file. */
193 	assertEqualIntA(a, ARCHIVE_OK, archive_read_next_header(a, &ae));
194 	assertEqualString("\xd0\xbf\xd1\x80\xd0\xb8\xd0\xb2\xd0\xb5\xd1\x82",
195 	    archive_entry_pathname(ae));
196 	assertEqualInt(6, archive_entry_size(ae));
197 	assertEqualInt(archive_entry_is_encrypted(ae), 0);
198 	assertEqualIntA(a, archive_read_has_encrypted_entries(a), ARCHIVE_READ_FORMAT_ENCRYPTION_UNSUPPORTED);
199 
200 	/* Verify encryption status */
201 	assertEqualInt(archive_entry_is_encrypted(ae), 0);
202 	assertEqualIntA(a, archive_read_has_encrypted_entries(a), ARCHIVE_READ_FORMAT_ENCRYPTION_UNSUPPORTED);
203 
204 	/* End of archive. */
205 	assertEqualIntA(a, ARCHIVE_EOF, archive_read_next_header(a, &ae));
206 
207 	/* Verify archive format. */
208 	assertEqualIntA(a, ARCHIVE_FILTER_COMPRESS, archive_filter_code(a, 0));
209 	assertEqualIntA(a, ARCHIVE_FORMAT_TAR_PAX_INTERCHANGE,
210 	    archive_format(a));
211 
212 	/* Verify encryption status */
213 	assertEqualInt(archive_entry_is_encrypted(ae), 0);
214 	assertEqualIntA(a, archive_read_has_encrypted_entries(a), ARCHIVE_READ_FORMAT_ENCRYPTION_UNSUPPORTED);
215 
216 	/* Close the archive. */
217 	assertEqualInt(ARCHIVE_OK, archive_read_close(a));
218 	assertEqualInt(ARCHIVE_OK, archive_read_free(a));
219 
220 	/*
221 	 * Read filename in en_US.UTF-8 without "hdrcharset=KOI8-R" option.
222 	 * The filename we can properly read is only second file.
223 	 */
224 
225 	assert((a = archive_read_new()) != NULL);
226 	assertEqualIntA(a, ARCHIVE_OK, archive_read_support_filter_all(a));
227 	assertEqualIntA(a, ARCHIVE_OK, archive_read_support_format_all(a));
228 	assertEqualIntA(a, ARCHIVE_OK,
229 	    archive_read_open_filename(a, refname, 10240));
230 
231 	/*
232 	 * Verify regular first file.
233 	 * The filename is not translated to UTF-8 because hdrcharset
234 	 * attribute is BINARY and there is not way to know its charset.
235 	 */
236 	assertEqualIntA(a, ARCHIVE_OK, archive_read_next_header(a, &ae));
237 	/* A filename is in KOI8-R. */
238 	assertEqualString("\xf0\xf2\xe9\xf7\xe5\xf4",
239 	    archive_entry_pathname(ae));
240 	assertEqualInt(6, archive_entry_size(ae));
241 
242 	/* Verify encryption status */
243 	assertEqualInt(archive_entry_is_encrypted(ae), 0);
244 	assertEqualIntA(a, archive_read_has_encrypted_entries(a), ARCHIVE_READ_FORMAT_ENCRYPTION_UNSUPPORTED);
245 
246 	/*
247 	 * Verify regular second file.
248 	 */
249 	assertEqualIntA(a, ARCHIVE_OK, archive_read_next_header(a, &ae));
250 	assertEqualString("\xd0\xbf\xd1\x80\xd0\xb8\xd0\xb2\xd0\xb5\xd1\x82",
251 	    archive_entry_pathname(ae));
252 	assertEqualInt(6, archive_entry_size(ae));
253 
254 
255 	/* End of archive. */
256 	assertEqualIntA(a, ARCHIVE_EOF, archive_read_next_header(a, &ae));
257 
258 	/* Verify archive format. */
259 	assertEqualIntA(a, ARCHIVE_FILTER_COMPRESS, archive_filter_code(a, 0));
260 	assertEqualIntA(a, ARCHIVE_FORMAT_TAR_PAX_INTERCHANGE,
261 	    archive_format(a));
262 
263 	/* Close the archive. */
264 	assertEqualInt(ARCHIVE_OK, archive_read_close(a));
265 	assertEqualInt(ARCHIVE_OK, archive_read_free(a));
266 }
267 
268 static void
269 test_read_format_tar_filename_KOI8R_CP1251(const char *refname)
270 {
271 	struct archive *a;
272 	struct archive_entry *ae;
273 
274 	/*
275  	* Read filename in CP1251 with "hdrcharset=KOI8-R" option.
276  	* We should correctly read two filenames.
277 	*/
278 	if (NULL == setlocale(LC_ALL, "Russian_Russia") &&
279 	    NULL == setlocale(LC_ALL, "ru_RU.CP1251")) {
280 		skipping("CP1251 locale not available on this system.");
281 		return;
282 	}
283 
284 	/* Test if the platform can convert from UTF-8. */
285 	assert((a = archive_read_new()) != NULL);
286 	assertEqualIntA(a, ARCHIVE_OK, archive_read_support_format_tar(a));
287 	if (ARCHIVE_OK != archive_read_set_options(a, "hdrcharset=UTF-8")) {
288 		assertEqualInt(ARCHIVE_OK, archive_read_free(a));
289 		skipping("This system cannot convert character-set"
290 		    " from UTF-8 to CP1251.");
291 		return;
292 	}
293 	assertEqualInt(ARCHIVE_OK, archive_read_free(a));
294 
295 	assert((a = archive_read_new()) != NULL);
296 	assertEqualIntA(a, ARCHIVE_OK, archive_read_support_filter_all(a));
297 	assertEqualIntA(a, ARCHIVE_OK, archive_read_support_format_all(a));
298 	if (ARCHIVE_OK != archive_read_set_options(a, "hdrcharset=KOI8-R")) {
299 		skipping("This system cannot convert character-set"
300 		    " from KOI8-R to CP1251.");
301 		goto next_test;
302 	}
303 	assertEqualIntA(a, ARCHIVE_OK,
304 	    archive_read_open_filename(a, refname, 10240));
305 
306 	/* Verify regular first file. */
307 	assertEqualIntA(a, ARCHIVE_OK, archive_read_next_header(a, &ae));
308 	assertEqualString("\xcf\xd0\xc8\xc2\xc5\xd2",
309 	    archive_entry_pathname(ae));
310 	assertEqualInt(6, archive_entry_size(ae));
311 	assertEqualInt(archive_entry_is_encrypted(ae), 0);
312 	assertEqualIntA(a, archive_read_has_encrypted_entries(a), ARCHIVE_READ_FORMAT_ENCRYPTION_UNSUPPORTED);
313 
314 	/* Verify regular second file. */
315 	assertEqualIntA(a, ARCHIVE_OK, archive_read_next_header(a, &ae));
316 	assertEqualString("\xef\xf0\xe8\xe2\xe5\xf2",
317 	    archive_entry_pathname(ae));
318 	assertEqualInt(6, archive_entry_size(ae));
319 	assertEqualInt(archive_entry_is_encrypted(ae), 0);
320 	assertEqualIntA(a, archive_read_has_encrypted_entries(a), ARCHIVE_READ_FORMAT_ENCRYPTION_UNSUPPORTED);
321 
322 
323 	/* End of archive. */
324 	assertEqualIntA(a, ARCHIVE_EOF, archive_read_next_header(a, &ae));
325 
326 	/* Verify archive format. */
327 	assertEqualIntA(a, ARCHIVE_FILTER_COMPRESS, archive_filter_code(a, 0));
328 	assertEqualIntA(a, ARCHIVE_FORMAT_TAR_PAX_INTERCHANGE,
329 	    archive_format(a));
330 
331 	/* Close the archive. */
332 	assertEqualInt(ARCHIVE_OK, archive_read_close(a));
333 next_test:
334 	assertEqualInt(ARCHIVE_OK, archive_read_free(a));
335 
336 	/*
337 	 * Read filename in CP1251 without "hdrcharset=KOI8-R" option.
338 	 * The filename we can properly read is only second file.
339 	 */
340 
341 	assert((a = archive_read_new()) != NULL);
342 	assertEqualIntA(a, ARCHIVE_OK, archive_read_support_filter_all(a));
343 	assertEqualIntA(a, ARCHIVE_OK, archive_read_support_format_all(a));
344 	assertEqualIntA(a, ARCHIVE_OK,
345 	    archive_read_open_filename(a, refname, 10240));
346 
347 	/*
348 	 * Verify regular first file.
349 	 * The filename is not translated to CP1251 because hdrcharset
350 	 * attribute is BINARY and there is not way to know its charset.
351 	 */
352 	assertEqualIntA(a, ARCHIVE_OK, archive_read_next_header(a, &ae));
353 	/* A filename is in KOI8-R. */
354 	assertEqualString("\xf0\xf2\xe9\xf7\xe5\xf4",
355 	    archive_entry_pathname(ae));
356 	assertEqualInt(6, archive_entry_size(ae));
357 	assertEqualInt(archive_entry_is_encrypted(ae), 0);
358 	assertEqualIntA(a, archive_read_has_encrypted_entries(a), ARCHIVE_READ_FORMAT_ENCRYPTION_UNSUPPORTED);
359 
360 	/*
361 	 * Verify regular second file.
362 	 */
363 	assertEqualIntA(a, ARCHIVE_OK, archive_read_next_header(a, &ae));
364 	assertEqualString("\xef\xf0\xe8\xe2\xe5\xf2",
365 	    archive_entry_pathname(ae));
366 	assertEqualInt(6, archive_entry_size(ae));
367 	assertEqualInt(archive_entry_is_encrypted(ae), 0);
368 	assertEqualIntA(a, archive_read_has_encrypted_entries(a), ARCHIVE_READ_FORMAT_ENCRYPTION_UNSUPPORTED);
369 
370 
371 	/* End of archive. */
372 	assertEqualIntA(a, ARCHIVE_EOF, archive_read_next_header(a, &ae));
373 
374 	/* Verify archive format. */
375 	assertEqualIntA(a, ARCHIVE_FILTER_COMPRESS, archive_filter_code(a, 0));
376 	assertEqualIntA(a, ARCHIVE_FORMAT_TAR_PAX_INTERCHANGE,
377 	    archive_format(a));
378 
379 	/* Close the archive. */
380 	assertEqualInt(ARCHIVE_OK, archive_read_close(a));
381 	assertEqualInt(ARCHIVE_OK, archive_read_free(a));
382 }
383 
384 
385 DEFINE_TEST(test_read_format_tar_filename)
386 {
387 	const char *refname = "test_read_format_tar_filename_koi8r.tar.Z";
388 
389 	extract_reference_file(refname);
390 	test_read_format_tar_filename_KOI8R_CP866(refname);
391 	test_read_format_tar_filename_KOI8R_UTF8(refname);
392 	test_read_format_tar_filename_KOI8R_CP1251(refname);
393 }
394