xref: /freebsd/contrib/libarchive/libarchive/test/test_read_format_lha_filename_utf16.c (revision b9128a37faafede823eb456aa65a11ac69997284)
1 /*-
2  * Copyright (c) 2019 Martin Matuska
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR
15  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
16  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
17  * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT,
18  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
19  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
20  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
21  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
23  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24  */
25 #include "test.h"
26 
27 #include <locale.h>
28 
29 static void
30 test_read_format_lha_filename_UTF16_UTF8(const char *refname)
31 {
32 	struct archive *a;
33 	struct archive_entry *ae;
34 
35 	/*
36 	 * Read LHA filename in en_US.UTF-8.
37 	 */
38 	if (NULL == setlocale(LC_ALL, "en_US.UTF-8")) {
39 		skipping("en_US.UTF-8 locale not available on this system.");
40 		return;
41 	}
42 	/*
43 	 * Create a read object only for a test that platform support
44 	 * a character-set conversion because we can read a character-set
45 	 * of filenames from the header of an lha archive file and so we
46 	 * want to test that it works well.
47 	 */
48 	assert((a = archive_read_new()) != NULL);
49 	assertEqualIntA(a, ARCHIVE_OK, archive_read_support_format_all(a));
50     if (ARCHIVE_OK != archive_read_set_options(a, "hdrcharset=CP932")) {
51         assertEqualInt(ARCHIVE_OK, archive_read_free(a));
52         skipping("This system cannot convert character-set"
53             " from CP932 to UTF-8.");
54         return;
55     }
56 	if (ARCHIVE_OK != archive_read_set_options(a, "hdrcharset=UTF-16")) {
57 		assertEqualInt(ARCHIVE_OK, archive_read_free(a));
58 		skipping("This system cannot convert character-set"
59 		    " from UTF-16 to UTF-8.");
60 		return;
61 	}
62 	assertEqualInt(ARCHIVE_OK, archive_read_free(a));
63 	assert((a = archive_read_new()) != NULL);
64 	assertEqualIntA(a, ARCHIVE_OK, archive_read_support_filter_all(a));
65 	assertEqualIntA(a, ARCHIVE_OK, archive_read_support_format_all(a));
66 	assertEqualIntA(a, ARCHIVE_OK,
67 	    archive_read_open_filename(a, refname, 10240));
68 
69 	/* Note that usual Japanese filenames are tested in other cases */
70 #if defined(__APPLE__)
71  /* NFD normalization */
72  /* U:O:A:u:o:a: */
73  #define UMLAUT_DIRNAME "\x55\xcc\x88\x4f\xcc\x88\x41\xcc\x88\x75\xcc\x88\x6f"\
74 	    "\xcc\x88\x61\xcc\x88/"
75  /* a:o:u:A:O:U:.txt */
76  #define UMLAUT_FNAME "\x61\xcc\x88\x6f\xcc\x88\x75\xcc\x88\x41\xcc\x88"\
77 	    "\x4f\xcc\x88\x55\xcc\x88.txt"
78 #else
79  /* NFC normalization */
80  /* U:O:A:u:o:a: */
81  #define UMLAUT_DIRNAME "\xc3\x9c\xc3\x96\xc3\x84\xc3\xbc\xc3\xb6\xc3\xa4/"
82  /* a:o:u:A:O:U:.txt */
83  #define UMLAUT_FNAME "\xc3\xa4\xc3\xb6\xc3\xbc\xc3\x84\xc3\x96\xc3\x9c.txt"
84 #endif
85 
86 /* "Test" in Japanese Katakana */
87 #define KATAKANA_FNAME "\xe3\x83\x86\xe3\x82\xb9\xe3\x83\x88.txt"
88 #define KATAKANA_DIRNAME "\xe3\x83\x86\xe3\x82\xb9\xe3\x83\x88/"
89 
90 	/* Verify regular file. U:O:A:u:o:a:/a:o:u:A:O:U:.txt */
91 	assertEqualIntA(a, ARCHIVE_OK, archive_read_next_header(a, &ae));
92 	assertEqualString(UMLAUT_DIRNAME UMLAUT_FNAME, archive_entry_pathname(ae));
93 	assertEqualInt(12, archive_entry_size(ae));
94 
95 	/* Verify directory. U:O:A:u:o:a:/ */
96 	assertEqualIntA(a, ARCHIVE_OK, archive_read_next_header(a, &ae));
97 	assertEqualString(UMLAUT_DIRNAME, archive_entry_pathname(ae));
98 	assertEqualInt(0, archive_entry_size(ae));
99 
100 	/* Verify regular file. U:O:A:u:o:a:/("Test" in Japanese).txt */
101 	assertEqualIntA(a, ARCHIVE_OK, archive_read_next_header(a, &ae));
102 	assertEqualString(UMLAUT_DIRNAME KATAKANA_FNAME,
103 	    archive_entry_pathname(ae));
104 	assertEqualInt(25, archive_entry_size(ae));
105 
106 	/* Verify regular file. ("Test" in Japanese)/a:o:u:A:O:U:.txt */
107 	assertEqualIntA(a, ARCHIVE_OK, archive_read_next_header(a, &ae));
108 	assertEqualString(KATAKANA_DIRNAME UMLAUT_FNAME,
109 	    archive_entry_pathname(ae));
110 	assertEqualInt(12, archive_entry_size(ae));
111 
112 	/* Verify directory. ("Test" in Japanese)/ */
113 	assertEqualIntA(a, ARCHIVE_OK, archive_read_next_header(a, &ae));
114 	assertEqualString(KATAKANA_DIRNAME, archive_entry_pathname(ae));
115 	assertEqualInt(0, archive_entry_size(ae));
116 
117 	/* Verify regular file. a:o:u:A:O:U:.txt */
118 	assertEqualIntA(a, ARCHIVE_OK, archive_read_next_header(a, &ae));
119 	assertEqualString(UMLAUT_FNAME, archive_entry_pathname(ae));
120 	assertEqualInt(12, archive_entry_size(ae));
121 
122 	/* End of archive. */
123 	assertEqualIntA(a, ARCHIVE_EOF, archive_read_next_header(a, &ae));
124 
125 	/* Verify archive format. */
126 	assertEqualIntA(a, ARCHIVE_FILTER_NONE, archive_filter_code(a, 0));
127 	assertEqualIntA(a, ARCHIVE_FORMAT_LHA, archive_format(a));
128 
129 	/* Close the archive. */
130 	assertEqualInt(ARCHIVE_OK, archive_read_close(a));
131 	assertEqualInt(ARCHIVE_OK, archive_read_free(a));
132 }
133 
134 DEFINE_TEST(test_read_format_lha_filename_UTF16)
135 {
136 	/* A sample file was created with Unlha32.dll. */
137 	const char *refname = "test_read_format_lha_filename_utf16.lzh";
138 	extract_reference_file(refname);
139 
140 	test_read_format_lha_filename_UTF16_UTF8(refname);
141 }
142 
143