xref: /freebsd/contrib/libarchive/libarchive/test/test_zip_filename_encoding.c (revision bd66c1b43e33540205dbc1187c2f2a15c58b57ba)
1 /*-
2  * Copyright (c) 2011 Michihiro NAKAJIMA
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR
15  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
16  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
17  * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT,
18  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
19  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
20  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
21  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
23  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24  */
25 #include "test.h"
26 
27 #include <locale.h>
28 
DEFINE_TEST(test_zip_filename_encoding_UTF8)29 DEFINE_TEST(test_zip_filename_encoding_UTF8)
30 {
31   	struct archive *a;
32   	struct archive_entry *entry;
33 	char buff[4096];
34 	size_t used;
35 
36 	if (NULL == setlocale(LC_ALL, "en_US.UTF-8")) {
37 		skipping("en_US.UTF-8 locale not available on this system.");
38 		return;
39 	}
40 
41 	/*
42 	 * Verify that UTF-8 filenames are correctly stored with
43 	 * hdrcharset=UTF-8 option.
44 	 */
45 	a = archive_write_new();
46 	assertEqualInt(ARCHIVE_OK, archive_write_set_format_zip(a));
47 	if (archive_write_set_options(a, "hdrcharset=UTF-8") != ARCHIVE_OK) {
48 		skipping("This system cannot convert character-set"
49 		    " for UTF-8.");
50 		archive_write_free(a);
51 		return;
52 	}
53 	assertEqualInt(ARCHIVE_OK,
54 	    archive_write_open_memory(a, buff, sizeof(buff), &used));
55 
56 	entry = archive_entry_new2(a);
57 	/* Set a UTF-8 filename. */
58 	archive_entry_set_pathname(entry, "\xD0\xBF\xD1\x80\xD0\xB8");
59 	archive_entry_set_filetype(entry, AE_IFREG);
60 	archive_entry_set_size(entry, 0);
61 	assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry));
62 	archive_entry_free(entry);
63 	assertEqualInt(ARCHIVE_OK, archive_write_free(a));
64 
65 	/* A bit 11 of general purpose flag should be 0x08,
66 	 * which indicates the filename charset is UTF-8. */
67 	assertEqualInt(0x08, buff[7]);
68 	assertEqualMem(buff + 30, "\xD0\xBF\xD1\x80\xD0\xB8", 6);
69 
70 	/*
71 	 * Verify that UTF-8 filenames are correctly stored without
72 	 * hdrcharset=UTF-8 option.
73 	 * Skip on Windows where we default to OEMCP
74 	 */
75 #if !defined(_WIN32) || defined(__CYGWIN__)
76 	a = archive_write_new();
77 	assertEqualInt(ARCHIVE_OK, archive_write_set_format_zip(a));
78 	assertEqualInt(ARCHIVE_OK,
79 	    archive_write_open_memory(a, buff, sizeof(buff), &used));
80 
81 	entry = archive_entry_new2(a);
82 	/* Set a UTF-8 filename. */
83 	archive_entry_set_pathname(entry, "\xD0\xBF\xD1\x80\xD0\xB8");
84 	archive_entry_set_filetype(entry, AE_IFREG);
85 	archive_entry_set_size(entry, 0);
86 	assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry));
87 	archive_entry_free(entry);
88 	assertEqualInt(ARCHIVE_OK, archive_write_free(a));
89 
90 	/* A bit 11 of general purpose flag should be 0x08,
91 	 * which indicates the filename charset is UTF-8. */
92 	assertEqualInt(0x08, buff[7]);
93 	assertEqualMem(buff + 30, "\xD0\xBF\xD1\x80\xD0\xB8", 6);
94 #endif
95 
96 	/*
97 	 * Verify that A bit 11 of general purpose flag is not set
98 	 * when ASCII filenames are stored.
99 	 */
100 	a = archive_write_new();
101 	assertEqualInt(ARCHIVE_OK, archive_write_set_format_zip(a));
102 	assertEqualInt(ARCHIVE_OK,
103 	    archive_write_open_memory(a, buff, sizeof(buff), &used));
104 
105 	entry = archive_entry_new2(a);
106 	/* Set an ASCII filename. */
107 	archive_entry_set_pathname(entry, "abcABC");
108 	archive_entry_set_filetype(entry, AE_IFREG);
109 	archive_entry_set_size(entry, 0);
110 	assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry));
111 	archive_entry_free(entry);
112 	assertEqualInt(ARCHIVE_OK, archive_write_free(a));
113 
114 	/* A bit 11 of general purpose flag should be 0,
115 	 * which indicates the filename charset is unknown. */
116 	assertEqualInt(0, buff[7]);
117 	assertEqualMem(buff + 30, "abcABC", 6);
118 }
119 
DEFINE_TEST(test_zip_filename_encoding_KOI8R)120 DEFINE_TEST(test_zip_filename_encoding_KOI8R)
121 {
122   	struct archive *a;
123   	struct archive_entry *entry;
124 	char buff[4096];
125 	size_t used;
126 
127 	if (NULL == setlocale(LC_ALL, "ru_RU.KOI8-R")) {
128 		skipping("KOI8-R locale not available on this system.");
129 		return;
130 	}
131 
132 	/*
133 	 * Verify that KOI8-R filenames are correctly translated to UTF-8.
134 	 */
135 	a = archive_write_new();
136 	assertEqualInt(ARCHIVE_OK, archive_write_set_format_zip(a));
137 	if (archive_write_set_options(a, "hdrcharset=UTF-8") != ARCHIVE_OK) {
138 		skipping("This system cannot convert character-set"
139 		    " from KOI8-R to UTF-8.");
140 		archive_write_free(a);
141 		return;
142 	}
143 	assertEqualInt(ARCHIVE_OK,
144 	    archive_write_open_memory(a, buff, sizeof(buff), &used));
145 
146 	entry = archive_entry_new2(a);
147 	/* Set a KOI8-R filename. */
148 	archive_entry_set_pathname(entry, "\xD0\xD2\xC9");
149 	archive_entry_set_filetype(entry, AE_IFREG);
150 	archive_entry_set_size(entry, 0);
151 	assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry));
152 	archive_entry_free(entry);
153 	assertEqualInt(ARCHIVE_OK, archive_write_free(a));
154 
155 	/* A bit 11 of general purpose flag should be 0x08,
156 	 * which indicates the filename charset is UTF-8. */
157 	assertEqualInt(0x08, buff[7]);
158 	/* Above three characters in KOI8-R should translate to the following
159 	 * three characters (two bytes each) in UTF-8. */
160 	assertEqualMem(buff + 30, "\xD0\xBF\xD1\x80\xD0\xB8", 6);
161 
162 	/*
163 	 * Verify that KOI8-R filenames are not translated to UTF-8.
164 	 */
165 	a = archive_write_new();
166 	assertEqualInt(ARCHIVE_OK, archive_write_set_format_zip(a));
167 	assertEqualInt(ARCHIVE_OK,
168 	    archive_write_open_memory(a, buff, sizeof(buff), &used));
169 
170 	entry = archive_entry_new2(a);
171 	/* Set a KOI8-R filename. */
172 	archive_entry_set_pathname(entry, "\xD0\xD2\xC9");
173 	archive_entry_set_filetype(entry, AE_IFREG);
174 	archive_entry_set_size(entry, 0);
175 	assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry));
176 	archive_entry_free(entry);
177 	assertEqualInt(ARCHIVE_OK, archive_write_free(a));
178 
179 	/* A bit 11 of general purpose flag should be 0,
180 	 * which indicates the filename charset is unknown. */
181 	assertEqualInt(0, buff[7]);
182 	/* Above three characters in KOI8-R should not translate to
183 	 * any character-set. */
184 	assertEqualMem(buff + 30, "\xD0\xD2\xC9", 3);
185 
186 	/*
187 	 * Verify that A bit 11 of general purpose flag is not set
188 	 * when ASCII filenames are stored even if hdrcharset=UTF-8
189 	 * is specified.
190 	 */
191 	a = archive_write_new();
192 	assertEqualInt(ARCHIVE_OK, archive_write_set_format_zip(a));
193 	if (archive_write_set_options(a, "hdrcharset=UTF-8") != ARCHIVE_OK) {
194 		skipping("This system cannot convert character-set"
195 		    " from KOI8-R to UTF-8.");
196 		archive_write_free(a);
197 		return;
198 	}
199 	assertEqualInt(ARCHIVE_OK,
200 	    archive_write_open_memory(a, buff, sizeof(buff), &used));
201 
202 	entry = archive_entry_new2(a);
203 	/* Set an ASCII filename. */
204 	archive_entry_set_pathname(entry, "abcABC");
205 	archive_entry_set_filetype(entry, AE_IFREG);
206 	archive_entry_set_size(entry, 0);
207 	assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry));
208 	archive_entry_free(entry);
209 	assertEqualInt(ARCHIVE_OK, archive_write_free(a));
210 
211 	/* A bit 11 of general purpose flag should be 0,
212 	 * which indicates the filename charset is unknown. */
213 	assertEqualInt(0, buff[7]);
214 	assertEqualMem(buff + 30, "abcABC", 6);
215 }
216 
217 /*
218  * Do not translate CP1251 into CP866 if non Windows platform.
219  */
DEFINE_TEST(test_zip_filename_encoding_ru_RU_CP1251)220 DEFINE_TEST(test_zip_filename_encoding_ru_RU_CP1251)
221 {
222   	struct archive *a;
223   	struct archive_entry *entry;
224 	char buff[4096];
225 	size_t used;
226 
227 	if (NULL == setlocale(LC_ALL, "ru_RU.CP1251")) {
228 		skipping("Russian_Russia locale not available on this system.");
229 		return;
230 	}
231 
232 	/*
233 	 * Verify that CP1251 filenames are not translated into any
234 	 * other character-set, in particular, CP866.
235 	 */
236 	a = archive_write_new();
237 	assertEqualInt(ARCHIVE_OK, archive_write_set_format_zip(a));
238 	assertEqualInt(ARCHIVE_OK,
239 	    archive_write_open_memory(a, buff, sizeof(buff), &used));
240 
241 	entry = archive_entry_new2(a);
242 	/* Set a CP1251 filename. */
243 	archive_entry_set_pathname(entry, "\xEF\xF0\xE8");
244 	archive_entry_set_filetype(entry, AE_IFREG);
245 	archive_entry_set_size(entry, 0);
246 	assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry));
247 	archive_entry_free(entry);
248 	assertEqualInt(ARCHIVE_OK, archive_write_free(a));
249 
250 	/* A bit 11 of general purpose flag should be 0,
251 	 * which indicates the filename charset is unknown. */
252 	assertEqualInt(0, buff[7]);
253 	/* Above three characters in CP1251 should not translate into
254 	 * any other character-set. */
255 	assertEqualMem(buff + 30, "\xEF\xF0\xE8", 3);
256 }
257 
258 /*
259  * Other archiver applications on Windows translate CP1251 filenames
260  * into CP866 filenames and store it in the zip file.
261  * Test above behavior works well.
262  */
DEFINE_TEST(test_zip_filename_encoding_Russian_Russia)263 DEFINE_TEST(test_zip_filename_encoding_Russian_Russia)
264 {
265   	struct archive *a;
266   	struct archive_entry *entry;
267 	char buff[4096];
268 	size_t used;
269 
270 	if (NULL == setlocale(LC_ALL, "Russian_Russia")) {
271 		skipping("Russian_Russia locale not available on this system.");
272 		return;
273 	}
274 
275 	/*
276 	 * Verify that Russian_Russia(CP1251) filenames are correctly translated
277 	 * to UTF-8.
278 	 */
279 	a = archive_write_new();
280 	assertEqualInt(ARCHIVE_OK, archive_write_set_format_zip(a));
281 	if (archive_write_set_options(a, "hdrcharset=UTF-8") != ARCHIVE_OK) {
282 		skipping("This system cannot convert character-set"
283 		    " from Russian_Russia.CP1251 to UTF-8.");
284 		archive_write_free(a);
285 		return;
286 	}
287 	assertEqualInt(ARCHIVE_OK,
288 	    archive_write_open_memory(a, buff, sizeof(buff), &used));
289 
290 	entry = archive_entry_new2(a);
291 	/* Set a CP1251 filename. */
292 	archive_entry_set_pathname(entry, "\xEF\xF0\xE8");
293 	archive_entry_set_filetype(entry, AE_IFREG);
294 	archive_entry_set_size(entry, 0);
295 	assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry));
296 	archive_entry_free(entry);
297 	assertEqualInt(ARCHIVE_OK, archive_write_free(a));
298 
299 	/* A bit 11 of general purpose flag should be 0x08,
300 	 * which indicates the filename charset is UTF-8. */
301 	assertEqualInt(0x08, buff[7]);
302 	/* Above three characters in CP1251 should translate to the following
303 	 * three characters (two bytes each) in UTF-8. */
304 	assertEqualMem(buff + 30, "\xD0\xBF\xD1\x80\xD0\xB8", 6);
305 
306 	/*
307 	 * Verify that Russian_Russia(CP1251) filenames are correctly translated
308 	 * to CP866.
309 	 */
310 	a = archive_write_new();
311 	assertEqualInt(ARCHIVE_OK, archive_write_set_format_zip(a));
312 	assertEqualInt(ARCHIVE_OK,
313 	    archive_write_open_memory(a, buff, sizeof(buff), &used));
314 
315 	entry = archive_entry_new2(a);
316 	/* Set a CP1251 filename. */
317 	archive_entry_set_pathname(entry, "\xEF\xF0\xE8");
318 	archive_entry_set_filetype(entry, AE_IFREG);
319 	archive_entry_set_size(entry, 0);
320 	assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry));
321 	archive_entry_free(entry);
322 	assertEqualInt(ARCHIVE_OK, archive_write_free(a));
323 
324 	/* A bit 11 of general purpose flag should be 0,
325 	 * which indicates the filename charset is unknown. */
326 	assertEqualInt(0, buff[7]);
327 	/* Above three characters in CP1251 should translate to the following
328 	 * three characters in CP866. */
329 	assertEqualMem(buff + 30, "\xAF\xE0\xA8", 3);
330 }
331 
DEFINE_TEST(test_zip_filename_encoding_EUCJP)332 DEFINE_TEST(test_zip_filename_encoding_EUCJP)
333 {
334   	struct archive *a;
335   	struct archive_entry *entry;
336 	char buff[4096];
337 	size_t used;
338 
339 	if (NULL == setlocale(LC_ALL, "ja_JP.eucJP")) {
340 		skipping("eucJP locale not available on this system.");
341 		return;
342 	}
343 
344 	/*
345 	 * Verify that EUC-JP filenames are correctly translated to UTF-8.
346 	 */
347 	a = archive_write_new();
348 	assertEqualInt(ARCHIVE_OK, archive_write_set_format_zip(a));
349 	if (archive_write_set_options(a, "hdrcharset=UTF-8") != ARCHIVE_OK) {
350 		skipping("This system cannot convert character-set"
351 		    " from eucJP to UTF-8.");
352 		archive_write_free(a);
353 		return;
354 	}
355 	assertEqualInt(ARCHIVE_OK,
356 	    archive_write_open_memory(a, buff, sizeof(buff), &used));
357 
358 	entry = archive_entry_new2(a);
359 	/* Set an EUC-JP filename. */
360 	archive_entry_set_pathname(entry, "\xC9\xBD.txt");
361 	/* Check the Unicode version. */
362 	archive_entry_set_filetype(entry, AE_IFREG);
363 	archive_entry_set_size(entry, 0);
364 	assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry));
365 	archive_entry_free(entry);
366 	assertEqualInt(ARCHIVE_OK, archive_write_free(a));
367 
368 	/* A bit 11 of general purpose flag should be 0x08,
369 	 * which indicates the filename charset is UTF-8. */
370 	assertEqualInt(0x08, buff[7]);
371 	/* Check UTF-8 version. */
372 	assertEqualMem(buff + 30, "\xE8\xA1\xA8.txt", 7);
373 
374 	/*
375 	 * Verify that EUC-JP filenames are not translated to UTF-8.
376 	 */
377 	a = archive_write_new();
378 	assertEqualInt(ARCHIVE_OK, archive_write_set_format_zip(a));
379 	assertEqualInt(ARCHIVE_OK,
380 	    archive_write_open_memory(a, buff, sizeof(buff), &used));
381 
382 	entry = archive_entry_new2(a);
383 	/* Set an EUC-JP filename. */
384 	archive_entry_set_pathname(entry, "\xC9\xBD.txt");
385 	/* Check the Unicode version. */
386 	archive_entry_set_filetype(entry, AE_IFREG);
387 	archive_entry_set_size(entry, 0);
388 	assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry));
389 	archive_entry_free(entry);
390 	assertEqualInt(ARCHIVE_OK, archive_write_free(a));
391 
392 	/* A bit 11 of general purpose flag should be 0,
393 	 * which indicates the filename charset is unknown. */
394 	assertEqualInt(0, buff[7]);
395 	/* Above three characters in EUC-JP should not translate to
396 	 * any character-set. */
397 	assertEqualMem(buff + 30, "\xC9\xBD.txt", 6);
398 
399 	/*
400 	 * Verify that A bit 11 of general purpose flag is not set
401 	 * when ASCII filenames are stored even if hdrcharset=UTF-8
402 	 * is specified.
403 	 */
404 	a = archive_write_new();
405 	assertEqualInt(ARCHIVE_OK, archive_write_set_format_zip(a));
406 	if (archive_write_set_options(a, "hdrcharset=UTF-8") != ARCHIVE_OK) {
407 		skipping("This system cannot convert character-set"
408 		    " from eucJP to UTF-8.");
409 		archive_write_free(a);
410 		return;
411 	}
412 	assertEqualInt(ARCHIVE_OK,
413 	    archive_write_open_memory(a, buff, sizeof(buff), &used));
414 
415 	entry = archive_entry_new2(a);
416 	/* Set an ASCII filename. */
417 	archive_entry_set_pathname(entry, "abcABC");
418 	/* Check the Unicode version. */
419 	archive_entry_set_filetype(entry, AE_IFREG);
420 	archive_entry_set_size(entry, 0);
421 	assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry));
422 	archive_entry_free(entry);
423 	assertEqualInt(ARCHIVE_OK, archive_write_free(a));
424 
425 	/* A bit 11 of general purpose flag should be 0,
426 	 * which indicates the filename charset is unknown. */
427 	assertEqualInt(0, buff[7]);
428 	assertEqualMem(buff + 30, "abcABC", 6);
429 }
430 
DEFINE_TEST(test_zip_filename_encoding_CP932)431 DEFINE_TEST(test_zip_filename_encoding_CP932)
432 {
433   	struct archive *a;
434   	struct archive_entry *entry;
435 	char buff[4096];
436 	size_t used;
437 
438 	if (NULL == setlocale(LC_ALL, "Japanese_Japan") &&
439 	    NULL == setlocale(LC_ALL, "ja_JP.SJIS")) {
440 		skipping("CP932/SJIS locale not available on this system.");
441 		return;
442 	}
443 
444 	/*
445 	 * Verify that EUC-JP filenames are correctly translated to UTF-8.
446 	 */
447 	a = archive_write_new();
448 	assertEqualInt(ARCHIVE_OK, archive_write_set_format_zip(a));
449 	if (archive_write_set_options(a, "hdrcharset=UTF-8") != ARCHIVE_OK) {
450 		skipping("This system cannot convert character-set"
451 		    " from CP932/SJIS to UTF-8.");
452 		archive_write_free(a);
453 		return;
454 	}
455 	assertEqualInt(ARCHIVE_OK,
456 	    archive_write_open_memory(a, buff, sizeof(buff), &used));
457 
458 	entry = archive_entry_new2(a);
459 	/* Set a CP932/SJIS filename. */
460 	archive_entry_set_pathname(entry, "\x95\x5C.txt");
461 	/* Check the Unicode version. */
462 	archive_entry_set_filetype(entry, AE_IFREG);
463 	archive_entry_set_size(entry, 0);
464 	assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry));
465 	archive_entry_free(entry);
466 	assertEqualInt(ARCHIVE_OK, archive_write_free(a));
467 
468 	/* A bit 11 of general purpose flag should be 0x08,
469 	 * which indicates the filename charset is UTF-8. */
470 	assertEqualInt(0x08, buff[7]);
471 	/* Check UTF-8 version. */
472 	assertEqualMem(buff + 30, "\xE8\xA1\xA8.txt", 7);
473 
474 	/*
475 	 * Verify that CP932/SJIS filenames are not translated to UTF-8.
476 	 */
477 	a = archive_write_new();
478 	assertEqualInt(ARCHIVE_OK, archive_write_set_format_zip(a));
479 	assertEqualInt(ARCHIVE_OK,
480 	    archive_write_open_memory(a, buff, sizeof(buff), &used));
481 
482 	entry = archive_entry_new2(a);
483 	/* Set a CP932/SJIS filename. */
484 	archive_entry_set_pathname(entry, "\x95\x5C.txt");
485 	/* Check the Unicode version. */
486 	archive_entry_set_filetype(entry, AE_IFREG);
487 	archive_entry_set_size(entry, 0);
488 	assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry));
489 	archive_entry_free(entry);
490 	assertEqualInt(ARCHIVE_OK, archive_write_free(a));
491 
492 	/* A bit 11 of general purpose flag should be 0,
493 	 * which indicates the filename charset is unknown. */
494 	assertEqualInt(0, buff[7]);
495 	/* Above three characters in CP932/SJIS should not translate to
496 	 * any character-set. */
497 	assertEqualMem(buff + 30, "\x95\x5C.txt", 6);
498 
499 	/*
500 	 * Verify that A bit 11 of general purpose flag is not set
501 	 * when ASCII filenames are stored even if hdrcharset=UTF-8
502 	 * is specified.
503 	 */
504 	a = archive_write_new();
505 	assertEqualInt(ARCHIVE_OK, archive_write_set_format_zip(a));
506 	if (archive_write_set_options(a, "hdrcharset=UTF-8") != ARCHIVE_OK) {
507 		skipping("This system cannot convert character-set"
508 		    " from CP932/SJIS to UTF-8.");
509 		archive_write_free(a);
510 		return;
511 	}
512 	assertEqualInt(ARCHIVE_OK,
513 	    archive_write_open_memory(a, buff, sizeof(buff), &used));
514 
515 	entry = archive_entry_new2(a);
516 	/* Set an ASCII filename. */
517 	archive_entry_set_pathname(entry, "abcABC");
518 	/* Check the Unicode version. */
519 	archive_entry_set_filetype(entry, AE_IFREG);
520 	archive_entry_set_size(entry, 0);
521 	assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry));
522 	archive_entry_free(entry);
523 	assertEqualInt(ARCHIVE_OK, archive_write_free(a));
524 
525 	/* A bit 11 of general purpose flag should be 0,
526 	 * which indicates the filename charset is unknown. */
527 	assertEqualInt(0, buff[7]);
528 	assertEqualMem(buff + 30, "abcABC", 6);
529 }
530 
DEFINE_TEST(test_zip_filename_encoding_UTF16_win)531 DEFINE_TEST(test_zip_filename_encoding_UTF16_win)
532 {
533 #if !defined(_WIN32) || defined(__CYGWIN__)
534 	skipping("This test is meant to verify unicode string handling"
535 		" on Windows with UTF-16 names");
536 	return;
537 #else
538 	struct archive *a;
539 	struct archive_entry *entry;
540 	char buff[4096];
541 	size_t used;
542 
543 	/*
544 	 * Don't call setlocale because we're verifying that the '_w' functions
545 	 * work as expected when 'hdrcharset' is UTF-8
546 	 */
547 
548 	/* Part 1: file */
549 	a = archive_write_new();
550 	assertEqualInt(ARCHIVE_OK, archive_write_set_format_zip(a));
551 	if (archive_write_set_options(a, "hdrcharset=UTF-8") != ARCHIVE_OK) {
552 		skipping("This system cannot convert character-set"
553 		    " from UTF-16 to UTF-8.");
554 		archive_write_free(a);
555 		return;
556 	}
557 	assertEqualInt(ARCHIVE_OK,
558 	    archive_write_open_memory(a, buff, sizeof(buff), &used));
559 
560 	entry = archive_entry_new2(a);
561 	/* Set the filename using a UTF-16 string */
562 	archive_entry_copy_pathname_w(entry, L"\u8868.txt");
563 	archive_entry_set_filetype(entry, AE_IFREG);
564 	archive_entry_set_size(entry, 0);
565 	assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry));
566 	archive_entry_free(entry);
567 	assertEqualInt(ARCHIVE_OK, archive_write_free(a));
568 
569 	/* A bit 11 of general purpose flag should be 1,
570 	 * which indicates the filename charset is UTF-8. */
571 	assertEqualInt(0x08, buff[7]);
572 	/* Check UTF-8 version. */
573 	assertEqualMem(buff + 30, "\xE8\xA1\xA8.txt", 7);
574 
575 	/* Part 2: directory */
576 	a = archive_write_new();
577 	assertEqualInt(ARCHIVE_OK, archive_write_set_format_zip(a));
578 	assertEqualInt(ARCHIVE_OK, archive_write_set_options(a, "hdrcharset=UTF-8"));
579 	assertEqualInt(ARCHIVE_OK,
580 	    archive_write_open_memory(a, buff, sizeof(buff), &used));
581 
582 	entry = archive_entry_new2(a);
583 	/* Set the directory name using a UTF-16 string */
584 	/* NOTE: Explicitly not adding trailing slash to test that code path */
585 	archive_entry_copy_pathname_w(entry, L"\u8868");
586 	archive_entry_set_filetype(entry, AE_IFDIR);
587 	archive_entry_set_size(entry, 0);
588 	assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry));
589 	archive_entry_free(entry);
590 	assertEqualInt(ARCHIVE_OK, archive_write_free(a));
591 
592 	/* A bit 11 of general purpose flag should be 1,
593 	 * which indicates the filename charset is UTF-8. */
594 	assertEqualInt(0x08, buff[7]);
595 	/* Check UTF-8 version. */
596 	assertEqualMem(buff+ 30, "\xE8\xA1\xA8/", 4);
597 
598 	/* Part 3: symlink */
599 	a = archive_write_new();
600 	assertEqualInt(ARCHIVE_OK, archive_write_set_format_zip(a));
601 	assertEqualInt(ARCHIVE_OK, archive_write_set_options(a, "hdrcharset=UTF-8"));
602 	assertEqualInt(ARCHIVE_OK,
603 	    archive_write_open_memory(a, buff, sizeof(buff), &used));
604 
605 	entry = archive_entry_new2(a);
606 	/* Set the symlink target using a UTF-16 string */
607 	archive_entry_set_pathname(entry, "link.txt");
608 	archive_entry_copy_symlink_w(entry, L"\u8868.txt");
609 	archive_entry_set_filetype(entry, AE_IFLNK);
610 	archive_entry_set_symlink_type(entry, AE_SYMLINK_TYPE_FILE);
611 	archive_entry_set_size(entry, 0);
612 	assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry));
613 	archive_entry_free(entry);
614 	assertEqualInt(ARCHIVE_OK, archive_write_free(a));
615 
616 	/* A bit 11 of general purpose flag should be 0,
617 	 * because the file name is ASCII. */
618 	assertEqualInt(0, buff[7]);
619 	/* Check UTF-8 version. */
620 	assertEqualMem(buff + 38, "\xE8\xA1\xA8.txt", 7);
621 
622 	/* NOTE: ZIP does not support hardlinks */
623 #endif
624 }
625