1 /*-
2 * Copyright (c) 2011 Michihiro NAKAJIMA
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR
15 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
16 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
17 * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT,
18 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
19 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
20 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
21 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
23 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24 */
25 #include "test.h"
26
27 #include <locale.h>
28
DEFINE_TEST(test_zip_filename_encoding_UTF8)29 DEFINE_TEST(test_zip_filename_encoding_UTF8)
30 {
31 struct archive *a;
32 struct archive_entry *entry;
33 char buff[4096];
34 size_t used;
35
36 if (NULL == setlocale(LC_ALL, "en_US.UTF-8")) {
37 skipping("en_US.UTF-8 locale not available on this system.");
38 return;
39 }
40
41 /*
42 * Verify that UTF-8 filenames are correctly stored with
43 * hdrcharset=UTF-8 option.
44 */
45 a = archive_write_new();
46 assertEqualInt(ARCHIVE_OK, archive_write_set_format_zip(a));
47 if (archive_write_set_options(a, "hdrcharset=UTF-8") != ARCHIVE_OK) {
48 skipping("This system cannot convert character-set"
49 " for UTF-8.");
50 archive_write_free(a);
51 return;
52 }
53 assertEqualInt(ARCHIVE_OK,
54 archive_write_open_memory(a, buff, sizeof(buff), &used));
55
56 entry = archive_entry_new2(a);
57 /* Set a UTF-8 filename. */
58 archive_entry_set_pathname(entry, "\xD0\xBF\xD1\x80\xD0\xB8");
59 archive_entry_set_filetype(entry, AE_IFREG);
60 archive_entry_set_size(entry, 0);
61 assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry));
62 archive_entry_free(entry);
63 assertEqualInt(ARCHIVE_OK, archive_write_free(a));
64
65 /* A bit 11 of general purpose flag should be 0x08,
66 * which indicates the filename charset is UTF-8. */
67 assertEqualInt(0x08, buff[7]);
68 assertEqualMem(buff + 30, "\xD0\xBF\xD1\x80\xD0\xB8", 6);
69
70 /*
71 * Verify that UTF-8 filenames are correctly stored without
72 * hdrcharset=UTF-8 option.
73 * Skip on Windows where we default to OEMCP
74 */
75 #if !defined(_WIN32) || defined(__CYGWIN__)
76 a = archive_write_new();
77 assertEqualInt(ARCHIVE_OK, archive_write_set_format_zip(a));
78 assertEqualInt(ARCHIVE_OK,
79 archive_write_open_memory(a, buff, sizeof(buff), &used));
80
81 entry = archive_entry_new2(a);
82 /* Set a UTF-8 filename. */
83 archive_entry_set_pathname(entry, "\xD0\xBF\xD1\x80\xD0\xB8");
84 archive_entry_set_filetype(entry, AE_IFREG);
85 archive_entry_set_size(entry, 0);
86 assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry));
87 archive_entry_free(entry);
88 assertEqualInt(ARCHIVE_OK, archive_write_free(a));
89
90 /* A bit 11 of general purpose flag should be 0x08,
91 * which indicates the filename charset is UTF-8. */
92 assertEqualInt(0x08, buff[7]);
93 assertEqualMem(buff + 30, "\xD0\xBF\xD1\x80\xD0\xB8", 6);
94 #endif
95
96 /*
97 * Verify that A bit 11 of general purpose flag is not set
98 * when ASCII filenames are stored.
99 */
100 a = archive_write_new();
101 assertEqualInt(ARCHIVE_OK, archive_write_set_format_zip(a));
102 assertEqualInt(ARCHIVE_OK,
103 archive_write_open_memory(a, buff, sizeof(buff), &used));
104
105 entry = archive_entry_new2(a);
106 /* Set an ASCII filename. */
107 archive_entry_set_pathname(entry, "abcABC");
108 archive_entry_set_filetype(entry, AE_IFREG);
109 archive_entry_set_size(entry, 0);
110 assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry));
111 archive_entry_free(entry);
112 assertEqualInt(ARCHIVE_OK, archive_write_free(a));
113
114 /* A bit 11 of general purpose flag should be 0,
115 * which indicates the filename charset is unknown. */
116 assertEqualInt(0, buff[7]);
117 assertEqualMem(buff + 30, "abcABC", 6);
118 }
119
DEFINE_TEST(test_zip_filename_encoding_KOI8R)120 DEFINE_TEST(test_zip_filename_encoding_KOI8R)
121 {
122 struct archive *a;
123 struct archive_entry *entry;
124 char buff[4096];
125 size_t used;
126
127 if (NULL == setlocale(LC_ALL, "ru_RU.KOI8-R")) {
128 skipping("KOI8-R locale not available on this system.");
129 return;
130 }
131
132 /*
133 * Verify that KOI8-R filenames are correctly translated to UTF-8.
134 */
135 a = archive_write_new();
136 assertEqualInt(ARCHIVE_OK, archive_write_set_format_zip(a));
137 if (archive_write_set_options(a, "hdrcharset=UTF-8") != ARCHIVE_OK) {
138 skipping("This system cannot convert character-set"
139 " from KOI8-R to UTF-8.");
140 archive_write_free(a);
141 return;
142 }
143 assertEqualInt(ARCHIVE_OK,
144 archive_write_open_memory(a, buff, sizeof(buff), &used));
145
146 entry = archive_entry_new2(a);
147 /* Set a KOI8-R filename. */
148 archive_entry_set_pathname(entry, "\xD0\xD2\xC9");
149 archive_entry_set_filetype(entry, AE_IFREG);
150 archive_entry_set_size(entry, 0);
151 assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry));
152 archive_entry_free(entry);
153 assertEqualInt(ARCHIVE_OK, archive_write_free(a));
154
155 /* A bit 11 of general purpose flag should be 0x08,
156 * which indicates the filename charset is UTF-8. */
157 assertEqualInt(0x08, buff[7]);
158 /* Above three characters in KOI8-R should translate to the following
159 * three characters (two bytes each) in UTF-8. */
160 assertEqualMem(buff + 30, "\xD0\xBF\xD1\x80\xD0\xB8", 6);
161
162 /*
163 * Verify that KOI8-R filenames are not translated to UTF-8.
164 */
165 a = archive_write_new();
166 assertEqualInt(ARCHIVE_OK, archive_write_set_format_zip(a));
167 assertEqualInt(ARCHIVE_OK,
168 archive_write_open_memory(a, buff, sizeof(buff), &used));
169
170 entry = archive_entry_new2(a);
171 /* Set a KOI8-R filename. */
172 archive_entry_set_pathname(entry, "\xD0\xD2\xC9");
173 archive_entry_set_filetype(entry, AE_IFREG);
174 archive_entry_set_size(entry, 0);
175 assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry));
176 archive_entry_free(entry);
177 assertEqualInt(ARCHIVE_OK, archive_write_free(a));
178
179 /* A bit 11 of general purpose flag should be 0,
180 * which indicates the filename charset is unknown. */
181 assertEqualInt(0, buff[7]);
182 /* Above three characters in KOI8-R should not translate to
183 * any character-set. */
184 assertEqualMem(buff + 30, "\xD0\xD2\xC9", 3);
185
186 /*
187 * Verify that A bit 11 of general purpose flag is not set
188 * when ASCII filenames are stored even if hdrcharset=UTF-8
189 * is specified.
190 */
191 a = archive_write_new();
192 assertEqualInt(ARCHIVE_OK, archive_write_set_format_zip(a));
193 if (archive_write_set_options(a, "hdrcharset=UTF-8") != ARCHIVE_OK) {
194 skipping("This system cannot convert character-set"
195 " from KOI8-R to UTF-8.");
196 archive_write_free(a);
197 return;
198 }
199 assertEqualInt(ARCHIVE_OK,
200 archive_write_open_memory(a, buff, sizeof(buff), &used));
201
202 entry = archive_entry_new2(a);
203 /* Set an ASCII filename. */
204 archive_entry_set_pathname(entry, "abcABC");
205 archive_entry_set_filetype(entry, AE_IFREG);
206 archive_entry_set_size(entry, 0);
207 assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry));
208 archive_entry_free(entry);
209 assertEqualInt(ARCHIVE_OK, archive_write_free(a));
210
211 /* A bit 11 of general purpose flag should be 0,
212 * which indicates the filename charset is unknown. */
213 assertEqualInt(0, buff[7]);
214 assertEqualMem(buff + 30, "abcABC", 6);
215 }
216
217 /*
218 * Do not translate CP1251 into CP866 if non Windows platform.
219 */
DEFINE_TEST(test_zip_filename_encoding_ru_RU_CP1251)220 DEFINE_TEST(test_zip_filename_encoding_ru_RU_CP1251)
221 {
222 struct archive *a;
223 struct archive_entry *entry;
224 char buff[4096];
225 size_t used;
226
227 if (NULL == setlocale(LC_ALL, "ru_RU.CP1251")) {
228 skipping("Russian_Russia locale not available on this system.");
229 return;
230 }
231
232 /*
233 * Verify that CP1251 filenames are not translated into any
234 * other character-set, in particular, CP866.
235 */
236 a = archive_write_new();
237 assertEqualInt(ARCHIVE_OK, archive_write_set_format_zip(a));
238 assertEqualInt(ARCHIVE_OK,
239 archive_write_open_memory(a, buff, sizeof(buff), &used));
240
241 entry = archive_entry_new2(a);
242 /* Set a CP1251 filename. */
243 archive_entry_set_pathname(entry, "\xEF\xF0\xE8");
244 archive_entry_set_filetype(entry, AE_IFREG);
245 archive_entry_set_size(entry, 0);
246 assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry));
247 archive_entry_free(entry);
248 assertEqualInt(ARCHIVE_OK, archive_write_free(a));
249
250 /* A bit 11 of general purpose flag should be 0,
251 * which indicates the filename charset is unknown. */
252 assertEqualInt(0, buff[7]);
253 /* Above three characters in CP1251 should not translate into
254 * any other character-set. */
255 assertEqualMem(buff + 30, "\xEF\xF0\xE8", 3);
256 }
257
258 /*
259 * Other archiver applications on Windows translate CP1251 filenames
260 * into CP866 filenames and store it in the zip file.
261 * Test above behavior works well.
262 */
DEFINE_TEST(test_zip_filename_encoding_Russian_Russia)263 DEFINE_TEST(test_zip_filename_encoding_Russian_Russia)
264 {
265 struct archive *a;
266 struct archive_entry *entry;
267 char buff[4096];
268 size_t used;
269
270 if (NULL == setlocale(LC_ALL, "Russian_Russia")) {
271 skipping("Russian_Russia locale not available on this system.");
272 return;
273 }
274
275 /*
276 * Verify that Russian_Russia(CP1251) filenames are correctly translated
277 * to UTF-8.
278 */
279 a = archive_write_new();
280 assertEqualInt(ARCHIVE_OK, archive_write_set_format_zip(a));
281 if (archive_write_set_options(a, "hdrcharset=UTF-8") != ARCHIVE_OK) {
282 skipping("This system cannot convert character-set"
283 " from Russian_Russia.CP1251 to UTF-8.");
284 archive_write_free(a);
285 return;
286 }
287 assertEqualInt(ARCHIVE_OK,
288 archive_write_open_memory(a, buff, sizeof(buff), &used));
289
290 entry = archive_entry_new2(a);
291 /* Set a CP1251 filename. */
292 archive_entry_set_pathname(entry, "\xEF\xF0\xE8");
293 archive_entry_set_filetype(entry, AE_IFREG);
294 archive_entry_set_size(entry, 0);
295 assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry));
296 archive_entry_free(entry);
297 assertEqualInt(ARCHIVE_OK, archive_write_free(a));
298
299 /* A bit 11 of general purpose flag should be 0x08,
300 * which indicates the filename charset is UTF-8. */
301 assertEqualInt(0x08, buff[7]);
302 /* Above three characters in CP1251 should translate to the following
303 * three characters (two bytes each) in UTF-8. */
304 assertEqualMem(buff + 30, "\xD0\xBF\xD1\x80\xD0\xB8", 6);
305
306 /*
307 * Verify that Russian_Russia(CP1251) filenames are correctly translated
308 * to CP866.
309 */
310 a = archive_write_new();
311 assertEqualInt(ARCHIVE_OK, archive_write_set_format_zip(a));
312 assertEqualInt(ARCHIVE_OK,
313 archive_write_open_memory(a, buff, sizeof(buff), &used));
314
315 entry = archive_entry_new2(a);
316 /* Set a CP1251 filename. */
317 archive_entry_set_pathname(entry, "\xEF\xF0\xE8");
318 archive_entry_set_filetype(entry, AE_IFREG);
319 archive_entry_set_size(entry, 0);
320 assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry));
321 archive_entry_free(entry);
322 assertEqualInt(ARCHIVE_OK, archive_write_free(a));
323
324 /* A bit 11 of general purpose flag should be 0,
325 * which indicates the filename charset is unknown. */
326 assertEqualInt(0, buff[7]);
327 /* Above three characters in CP1251 should translate to the following
328 * three characters in CP866. */
329 assertEqualMem(buff + 30, "\xAF\xE0\xA8", 3);
330 }
331
DEFINE_TEST(test_zip_filename_encoding_EUCJP)332 DEFINE_TEST(test_zip_filename_encoding_EUCJP)
333 {
334 struct archive *a;
335 struct archive_entry *entry;
336 char buff[4096];
337 size_t used;
338
339 if (NULL == setlocale(LC_ALL, "ja_JP.eucJP")) {
340 skipping("eucJP locale not available on this system.");
341 return;
342 }
343
344 /*
345 * Verify that EUC-JP filenames are correctly translated to UTF-8.
346 */
347 a = archive_write_new();
348 assertEqualInt(ARCHIVE_OK, archive_write_set_format_zip(a));
349 if (archive_write_set_options(a, "hdrcharset=UTF-8") != ARCHIVE_OK) {
350 skipping("This system cannot convert character-set"
351 " from eucJP to UTF-8.");
352 archive_write_free(a);
353 return;
354 }
355 assertEqualInt(ARCHIVE_OK,
356 archive_write_open_memory(a, buff, sizeof(buff), &used));
357
358 entry = archive_entry_new2(a);
359 /* Set an EUC-JP filename. */
360 archive_entry_set_pathname(entry, "\xC9\xBD.txt");
361 /* Check the Unicode version. */
362 archive_entry_set_filetype(entry, AE_IFREG);
363 archive_entry_set_size(entry, 0);
364 assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry));
365 archive_entry_free(entry);
366 assertEqualInt(ARCHIVE_OK, archive_write_free(a));
367
368 /* A bit 11 of general purpose flag should be 0x08,
369 * which indicates the filename charset is UTF-8. */
370 assertEqualInt(0x08, buff[7]);
371 /* Check UTF-8 version. */
372 assertEqualMem(buff + 30, "\xE8\xA1\xA8.txt", 7);
373
374 /*
375 * Verify that EUC-JP filenames are not translated to UTF-8.
376 */
377 a = archive_write_new();
378 assertEqualInt(ARCHIVE_OK, archive_write_set_format_zip(a));
379 assertEqualInt(ARCHIVE_OK,
380 archive_write_open_memory(a, buff, sizeof(buff), &used));
381
382 entry = archive_entry_new2(a);
383 /* Set an EUC-JP filename. */
384 archive_entry_set_pathname(entry, "\xC9\xBD.txt");
385 /* Check the Unicode version. */
386 archive_entry_set_filetype(entry, AE_IFREG);
387 archive_entry_set_size(entry, 0);
388 assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry));
389 archive_entry_free(entry);
390 assertEqualInt(ARCHIVE_OK, archive_write_free(a));
391
392 /* A bit 11 of general purpose flag should be 0,
393 * which indicates the filename charset is unknown. */
394 assertEqualInt(0, buff[7]);
395 /* Above three characters in EUC-JP should not translate to
396 * any character-set. */
397 assertEqualMem(buff + 30, "\xC9\xBD.txt", 6);
398
399 /*
400 * Verify that A bit 11 of general purpose flag is not set
401 * when ASCII filenames are stored even if hdrcharset=UTF-8
402 * is specified.
403 */
404 a = archive_write_new();
405 assertEqualInt(ARCHIVE_OK, archive_write_set_format_zip(a));
406 if (archive_write_set_options(a, "hdrcharset=UTF-8") != ARCHIVE_OK) {
407 skipping("This system cannot convert character-set"
408 " from eucJP to UTF-8.");
409 archive_write_free(a);
410 return;
411 }
412 assertEqualInt(ARCHIVE_OK,
413 archive_write_open_memory(a, buff, sizeof(buff), &used));
414
415 entry = archive_entry_new2(a);
416 /* Set an ASCII filename. */
417 archive_entry_set_pathname(entry, "abcABC");
418 /* Check the Unicode version. */
419 archive_entry_set_filetype(entry, AE_IFREG);
420 archive_entry_set_size(entry, 0);
421 assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry));
422 archive_entry_free(entry);
423 assertEqualInt(ARCHIVE_OK, archive_write_free(a));
424
425 /* A bit 11 of general purpose flag should be 0,
426 * which indicates the filename charset is unknown. */
427 assertEqualInt(0, buff[7]);
428 assertEqualMem(buff + 30, "abcABC", 6);
429 }
430
DEFINE_TEST(test_zip_filename_encoding_CP932)431 DEFINE_TEST(test_zip_filename_encoding_CP932)
432 {
433 struct archive *a;
434 struct archive_entry *entry;
435 char buff[4096];
436 size_t used;
437
438 if (NULL == setlocale(LC_ALL, "Japanese_Japan") &&
439 NULL == setlocale(LC_ALL, "ja_JP.SJIS")) {
440 skipping("CP932/SJIS locale not available on this system.");
441 return;
442 }
443
444 /*
445 * Verify that EUC-JP filenames are correctly translated to UTF-8.
446 */
447 a = archive_write_new();
448 assertEqualInt(ARCHIVE_OK, archive_write_set_format_zip(a));
449 if (archive_write_set_options(a, "hdrcharset=UTF-8") != ARCHIVE_OK) {
450 skipping("This system cannot convert character-set"
451 " from CP932/SJIS to UTF-8.");
452 archive_write_free(a);
453 return;
454 }
455 assertEqualInt(ARCHIVE_OK,
456 archive_write_open_memory(a, buff, sizeof(buff), &used));
457
458 entry = archive_entry_new2(a);
459 /* Set a CP932/SJIS filename. */
460 archive_entry_set_pathname(entry, "\x95\x5C.txt");
461 /* Check the Unicode version. */
462 archive_entry_set_filetype(entry, AE_IFREG);
463 archive_entry_set_size(entry, 0);
464 assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry));
465 archive_entry_free(entry);
466 assertEqualInt(ARCHIVE_OK, archive_write_free(a));
467
468 /* A bit 11 of general purpose flag should be 0x08,
469 * which indicates the filename charset is UTF-8. */
470 assertEqualInt(0x08, buff[7]);
471 /* Check UTF-8 version. */
472 assertEqualMem(buff + 30, "\xE8\xA1\xA8.txt", 7);
473
474 /*
475 * Verify that CP932/SJIS filenames are not translated to UTF-8.
476 */
477 a = archive_write_new();
478 assertEqualInt(ARCHIVE_OK, archive_write_set_format_zip(a));
479 assertEqualInt(ARCHIVE_OK,
480 archive_write_open_memory(a, buff, sizeof(buff), &used));
481
482 entry = archive_entry_new2(a);
483 /* Set a CP932/SJIS filename. */
484 archive_entry_set_pathname(entry, "\x95\x5C.txt");
485 /* Check the Unicode version. */
486 archive_entry_set_filetype(entry, AE_IFREG);
487 archive_entry_set_size(entry, 0);
488 assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry));
489 archive_entry_free(entry);
490 assertEqualInt(ARCHIVE_OK, archive_write_free(a));
491
492 /* A bit 11 of general purpose flag should be 0,
493 * which indicates the filename charset is unknown. */
494 assertEqualInt(0, buff[7]);
495 /* Above three characters in CP932/SJIS should not translate to
496 * any character-set. */
497 assertEqualMem(buff + 30, "\x95\x5C.txt", 6);
498
499 /*
500 * Verify that A bit 11 of general purpose flag is not set
501 * when ASCII filenames are stored even if hdrcharset=UTF-8
502 * is specified.
503 */
504 a = archive_write_new();
505 assertEqualInt(ARCHIVE_OK, archive_write_set_format_zip(a));
506 if (archive_write_set_options(a, "hdrcharset=UTF-8") != ARCHIVE_OK) {
507 skipping("This system cannot convert character-set"
508 " from CP932/SJIS to UTF-8.");
509 archive_write_free(a);
510 return;
511 }
512 assertEqualInt(ARCHIVE_OK,
513 archive_write_open_memory(a, buff, sizeof(buff), &used));
514
515 entry = archive_entry_new2(a);
516 /* Set an ASCII filename. */
517 archive_entry_set_pathname(entry, "abcABC");
518 /* Check the Unicode version. */
519 archive_entry_set_filetype(entry, AE_IFREG);
520 archive_entry_set_size(entry, 0);
521 assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry));
522 archive_entry_free(entry);
523 assertEqualInt(ARCHIVE_OK, archive_write_free(a));
524
525 /* A bit 11 of general purpose flag should be 0,
526 * which indicates the filename charset is unknown. */
527 assertEqualInt(0, buff[7]);
528 assertEqualMem(buff + 30, "abcABC", 6);
529 }
530
DEFINE_TEST(test_zip_filename_encoding_UTF16_win)531 DEFINE_TEST(test_zip_filename_encoding_UTF16_win)
532 {
533 #if !defined(_WIN32) || defined(__CYGWIN__)
534 skipping("This test is meant to verify unicode string handling"
535 " on Windows with UTF-16 names");
536 return;
537 #else
538 struct archive *a;
539 struct archive_entry *entry;
540 char buff[4096];
541 size_t used;
542
543 /*
544 * Don't call setlocale because we're verifying that the '_w' functions
545 * work as expected when 'hdrcharset' is UTF-8
546 */
547
548 /* Part 1: file */
549 a = archive_write_new();
550 assertEqualInt(ARCHIVE_OK, archive_write_set_format_zip(a));
551 if (archive_write_set_options(a, "hdrcharset=UTF-8") != ARCHIVE_OK) {
552 skipping("This system cannot convert character-set"
553 " from UTF-16 to UTF-8.");
554 archive_write_free(a);
555 return;
556 }
557 assertEqualInt(ARCHIVE_OK,
558 archive_write_open_memory(a, buff, sizeof(buff), &used));
559
560 entry = archive_entry_new2(a);
561 /* Set the filename using a UTF-16 string */
562 archive_entry_copy_pathname_w(entry, L"\u8868.txt");
563 archive_entry_set_filetype(entry, AE_IFREG);
564 archive_entry_set_size(entry, 0);
565 assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry));
566 archive_entry_free(entry);
567 assertEqualInt(ARCHIVE_OK, archive_write_free(a));
568
569 /* A bit 11 of general purpose flag should be 1,
570 * which indicates the filename charset is UTF-8. */
571 assertEqualInt(0x08, buff[7]);
572 /* Check UTF-8 version. */
573 assertEqualMem(buff + 30, "\xE8\xA1\xA8.txt", 7);
574
575 /* Part 2: directory */
576 a = archive_write_new();
577 assertEqualInt(ARCHIVE_OK, archive_write_set_format_zip(a));
578 assertEqualInt(ARCHIVE_OK, archive_write_set_options(a, "hdrcharset=UTF-8"));
579 assertEqualInt(ARCHIVE_OK,
580 archive_write_open_memory(a, buff, sizeof(buff), &used));
581
582 entry = archive_entry_new2(a);
583 /* Set the directory name using a UTF-16 string */
584 /* NOTE: Explicitly not adding trailing slash to test that code path */
585 archive_entry_copy_pathname_w(entry, L"\u8868");
586 archive_entry_set_filetype(entry, AE_IFDIR);
587 archive_entry_set_size(entry, 0);
588 assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry));
589 archive_entry_free(entry);
590 assertEqualInt(ARCHIVE_OK, archive_write_free(a));
591
592 /* A bit 11 of general purpose flag should be 1,
593 * which indicates the filename charset is UTF-8. */
594 assertEqualInt(0x08, buff[7]);
595 /* Check UTF-8 version. */
596 assertEqualMem(buff+ 30, "\xE8\xA1\xA8/", 4);
597
598 /* Part 3: symlink */
599 a = archive_write_new();
600 assertEqualInt(ARCHIVE_OK, archive_write_set_format_zip(a));
601 assertEqualInt(ARCHIVE_OK, archive_write_set_options(a, "hdrcharset=UTF-8"));
602 assertEqualInt(ARCHIVE_OK,
603 archive_write_open_memory(a, buff, sizeof(buff), &used));
604
605 entry = archive_entry_new2(a);
606 /* Set the symlink target using a UTF-16 string */
607 archive_entry_set_pathname(entry, "link.txt");
608 archive_entry_copy_symlink_w(entry, L"\u8868.txt");
609 archive_entry_set_filetype(entry, AE_IFLNK);
610 archive_entry_set_symlink_type(entry, AE_SYMLINK_TYPE_FILE);
611 archive_entry_set_size(entry, 0);
612 assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry));
613 archive_entry_free(entry);
614 assertEqualInt(ARCHIVE_OK, archive_write_free(a));
615
616 /* A bit 11 of general purpose flag should be 0,
617 * because the file name is ASCII. */
618 assertEqualInt(0, buff[7]);
619 /* Check UTF-8 version. */
620 assertEqualMem(buff + 38, "\xE8\xA1\xA8.txt", 7);
621
622 /* NOTE: ZIP does not support hardlinks */
623 #endif
624 }
625