1 /*- 2 * Copyright (c) 2011 Michihiro NAKAJIMA 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR 15 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 16 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 17 * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT, 18 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 19 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 20 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 21 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 22 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 23 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 24 */ 25 #include "test.h" 26 27 #include <locale.h> 28 29 /* 30 * The sample tar file was made in LANG=KOI8-R and it contains two 31 * files the charset of which are different. 32 * - the filename of first file is stored in BINARY mode. 33 * - the filename of second file is stored in UTF-8. 34 * 35 * Whenever hdrcharset option is specified, we will correctly read the 36 * filename of second file, which is stored in UTF-8 by default. 37 */ 38 39 static void 40 test_read_format_tar_filename_KOI8R_CP866(const char *refname) 41 { 42 struct archive *a; 43 struct archive_entry *ae; 44 45 /* 46 * Read filename in ru_RU.CP866 with "hdrcharset=KOI8-R" option. 47 * We should correctly read two filenames. 48 */ 49 if (NULL == setlocale(LC_ALL, "Russian_Russia.866") && 50 NULL == setlocale(LC_ALL, "ru_RU.CP866")) { 51 skipping("ru_RU.CP866 locale not available on this system."); 52 return; 53 } 54 55 /* Test if the platform can convert from UTF-8. */ 56 assert((a = archive_read_new()) != NULL); 57 assertEqualIntA(a, ARCHIVE_OK, archive_read_support_format_tar(a)); 58 if (ARCHIVE_OK != archive_read_set_options(a, "hdrcharset=UTF-8")) { 59 assertEqualInt(ARCHIVE_OK, archive_read_free(a)); 60 skipping("This system cannot convert character-set" 61 " from UTF-8 to CP866."); 62 return; 63 } 64 assertEqualInt(ARCHIVE_OK, archive_read_free(a)); 65 66 assert((a = archive_read_new()) != NULL); 67 assertEqualIntA(a, ARCHIVE_OK, archive_read_support_filter_all(a)); 68 assertEqualIntA(a, ARCHIVE_OK, archive_read_support_format_all(a)); 69 if (ARCHIVE_OK != archive_read_set_options(a, "hdrcharset=KOI8-R")) { 70 skipping("This system cannot convert character-set" 71 " from KOI8-R to CP866."); 72 goto next_test; 73 } 74 assertEqualIntA(a, ARCHIVE_OK, 75 archive_read_open_filename(a, refname, 10240)); 76 77 /* Verify regular first file. */ 78 assertEqualIntA(a, ARCHIVE_OK, archive_read_next_header(a, &ae)); 79 assertEqualString("\x8f\x90\x88\x82\x85\x92", 80 archive_entry_pathname(ae)); 81 assertEqualInt(6, archive_entry_size(ae)); 82 assertEqualInt(archive_entry_is_encrypted(ae), 0); 83 assertEqualIntA(a, archive_read_has_encrypted_entries(a), ARCHIVE_READ_FORMAT_ENCRYPTION_UNSUPPORTED); 84 85 /* Verify regular second file. */ 86 assertEqualIntA(a, ARCHIVE_OK, archive_read_next_header(a, &ae)); 87 assertEqualString("\xaf\xe0\xa8\xa2\xa5\xe2", 88 archive_entry_pathname(ae)); 89 assertEqualInt(6, archive_entry_size(ae)); 90 assertEqualInt(archive_entry_is_encrypted(ae), 0); 91 assertEqualIntA(a, archive_read_has_encrypted_entries(a), ARCHIVE_READ_FORMAT_ENCRYPTION_UNSUPPORTED); 92 93 94 /* End of archive. */ 95 assertEqualIntA(a, ARCHIVE_EOF, archive_read_next_header(a, &ae)); 96 97 /* Verify archive format. */ 98 assertEqualIntA(a, ARCHIVE_FILTER_COMPRESS, archive_filter_code(a, 0)); 99 assertEqualIntA(a, ARCHIVE_FORMAT_TAR_PAX_INTERCHANGE, 100 archive_format(a)); 101 102 /* Close the archive. */ 103 assertEqualInt(ARCHIVE_OK, archive_read_close(a)); 104 next_test: 105 assertEqualInt(ARCHIVE_OK, archive_read_free(a)); 106 107 108 /* 109 * Read filename in ru_RU.CP866 without "hdrcharset=KOI8-R" option. 110 * The filename we can properly read is only second file. 111 */ 112 113 assert((a = archive_read_new()) != NULL); 114 assertEqualIntA(a, ARCHIVE_OK, archive_read_support_filter_all(a)); 115 assertEqualIntA(a, ARCHIVE_OK, archive_read_support_format_all(a)); 116 assertEqualIntA(a, ARCHIVE_OK, 117 archive_read_open_filename(a, refname, 10240)); 118 119 /* 120 * Verify regular first file. 121 * The filename is not translated to CP866 because hdrcharset 122 * attribute is BINARY and there is not way to know its charset. 123 */ 124 assertEqualIntA(a, ARCHIVE_OK, archive_read_next_header(a, &ae)); 125 /* A filename is in KOI8-R. */ 126 assertEqualString("\xf0\xf2\xe9\xf7\xe5\xf4", 127 archive_entry_pathname(ae)); 128 assertEqualInt(6, archive_entry_size(ae)); 129 assertEqualInt(archive_entry_is_encrypted(ae), 0); 130 assertEqualIntA(a, archive_read_has_encrypted_entries(a), ARCHIVE_READ_FORMAT_ENCRYPTION_UNSUPPORTED); 131 132 /* 133 * Verify regular second file. 134 * The filename is translated from UTF-8 to CP866 135 */ 136 assertEqualIntA(a, ARCHIVE_OK, archive_read_next_header(a, &ae)); 137 assertEqualString("\xaf\xe0\xa8\xa2\xa5\xe2", 138 archive_entry_pathname(ae)); 139 assertEqualInt(6, archive_entry_size(ae)); 140 assertEqualInt(archive_entry_is_encrypted(ae), 0); 141 assertEqualIntA(a, archive_read_has_encrypted_entries(a), ARCHIVE_READ_FORMAT_ENCRYPTION_UNSUPPORTED); 142 143 144 /* End of archive. */ 145 assertEqualIntA(a, ARCHIVE_EOF, archive_read_next_header(a, &ae)); 146 147 /* Verify archive format. */ 148 assertEqualIntA(a, ARCHIVE_FILTER_COMPRESS, archive_filter_code(a, 0)); 149 assertEqualIntA(a, ARCHIVE_FORMAT_TAR_PAX_INTERCHANGE, 150 archive_format(a)); 151 152 /* Close the archive. */ 153 assertEqualInt(ARCHIVE_OK, archive_read_close(a)); 154 assertEqualInt(ARCHIVE_OK, archive_read_free(a)); 155 } 156 157 static void 158 test_read_format_tar_filename_KOI8R_UTF8(const char *refname) 159 { 160 struct archive *a; 161 struct archive_entry *ae; 162 163 /* 164 * Read filename in en_US.UTF-8 with "hdrcharset=KOI8-R" option. 165 * We should correctly read two filenames. 166 */ 167 if (NULL == setlocale(LC_ALL, "en_US.UTF-8")) { 168 skipping("en_US.UTF-8 locale not available on this system."); 169 return; 170 } 171 172 assert((a = archive_read_new()) != NULL); 173 assertEqualIntA(a, ARCHIVE_OK, archive_read_support_filter_all(a)); 174 assertEqualIntA(a, ARCHIVE_OK, archive_read_support_format_all(a)); 175 if (ARCHIVE_OK != archive_read_set_options(a, "hdrcharset=KOI8-R")) { 176 assertEqualInt(ARCHIVE_OK, archive_read_free(a)); 177 skipping("This system cannot convert character-set" 178 " from KOI8-R to UTF-8."); 179 return; 180 } 181 assertEqualIntA(a, ARCHIVE_OK, 182 archive_read_open_filename(a, refname, 10240)); 183 184 /* Verify regular file. */ 185 assertEqualIntA(a, ARCHIVE_OK, archive_read_next_header(a, &ae)); 186 assertEqualString("\xd0\x9f\xd0\xa0\xd0\x98\xd0\x92\xd0\x95\xd0\xa2", 187 archive_entry_pathname(ae)); 188 assertEqualInt(6, archive_entry_size(ae)); 189 assertEqualInt(archive_entry_is_encrypted(ae), 0); 190 assertEqualIntA(a, archive_read_has_encrypted_entries(a), ARCHIVE_READ_FORMAT_ENCRYPTION_UNSUPPORTED); 191 192 /* Verify regular file. */ 193 assertEqualIntA(a, ARCHIVE_OK, archive_read_next_header(a, &ae)); 194 assertEqualString("\xd0\xbf\xd1\x80\xd0\xb8\xd0\xb2\xd0\xb5\xd1\x82", 195 archive_entry_pathname(ae)); 196 assertEqualInt(6, archive_entry_size(ae)); 197 assertEqualInt(archive_entry_is_encrypted(ae), 0); 198 assertEqualIntA(a, archive_read_has_encrypted_entries(a), ARCHIVE_READ_FORMAT_ENCRYPTION_UNSUPPORTED); 199 200 /* Verify encryption status */ 201 assertEqualInt(archive_entry_is_encrypted(ae), 0); 202 assertEqualIntA(a, archive_read_has_encrypted_entries(a), ARCHIVE_READ_FORMAT_ENCRYPTION_UNSUPPORTED); 203 204 /* End of archive. */ 205 assertEqualIntA(a, ARCHIVE_EOF, archive_read_next_header(a, &ae)); 206 207 /* Verify archive format. */ 208 assertEqualIntA(a, ARCHIVE_FILTER_COMPRESS, archive_filter_code(a, 0)); 209 assertEqualIntA(a, ARCHIVE_FORMAT_TAR_PAX_INTERCHANGE, 210 archive_format(a)); 211 212 /* Verify encryption status */ 213 assertEqualInt(archive_entry_is_encrypted(ae), 0); 214 assertEqualIntA(a, archive_read_has_encrypted_entries(a), ARCHIVE_READ_FORMAT_ENCRYPTION_UNSUPPORTED); 215 216 /* Close the archive. */ 217 assertEqualInt(ARCHIVE_OK, archive_read_close(a)); 218 assertEqualInt(ARCHIVE_OK, archive_read_free(a)); 219 220 /* 221 * Read filename in en_US.UTF-8 without "hdrcharset=KOI8-R" option. 222 * The filename we can properly read is only second file. 223 */ 224 225 assert((a = archive_read_new()) != NULL); 226 assertEqualIntA(a, ARCHIVE_OK, archive_read_support_filter_all(a)); 227 assertEqualIntA(a, ARCHIVE_OK, archive_read_support_format_all(a)); 228 assertEqualIntA(a, ARCHIVE_OK, 229 archive_read_open_filename(a, refname, 10240)); 230 231 /* 232 * Verify regular first file. 233 * The filename is not translated to UTF-8 because hdrcharset 234 * attribute is BINARY and there is not way to know its charset. 235 */ 236 assertEqualIntA(a, ARCHIVE_OK, archive_read_next_header(a, &ae)); 237 /* A filename is in KOI8-R. */ 238 assertEqualString("\xf0\xf2\xe9\xf7\xe5\xf4", 239 archive_entry_pathname(ae)); 240 assertEqualInt(6, archive_entry_size(ae)); 241 242 /* Verify encryption status */ 243 assertEqualInt(archive_entry_is_encrypted(ae), 0); 244 assertEqualIntA(a, archive_read_has_encrypted_entries(a), ARCHIVE_READ_FORMAT_ENCRYPTION_UNSUPPORTED); 245 246 /* 247 * Verify regular second file. 248 */ 249 assertEqualIntA(a, ARCHIVE_OK, archive_read_next_header(a, &ae)); 250 assertEqualString("\xd0\xbf\xd1\x80\xd0\xb8\xd0\xb2\xd0\xb5\xd1\x82", 251 archive_entry_pathname(ae)); 252 assertEqualInt(6, archive_entry_size(ae)); 253 254 255 /* End of archive. */ 256 assertEqualIntA(a, ARCHIVE_EOF, archive_read_next_header(a, &ae)); 257 258 /* Verify archive format. */ 259 assertEqualIntA(a, ARCHIVE_FILTER_COMPRESS, archive_filter_code(a, 0)); 260 assertEqualIntA(a, ARCHIVE_FORMAT_TAR_PAX_INTERCHANGE, 261 archive_format(a)); 262 263 /* Close the archive. */ 264 assertEqualInt(ARCHIVE_OK, archive_read_close(a)); 265 assertEqualInt(ARCHIVE_OK, archive_read_free(a)); 266 } 267 268 static void 269 test_read_format_tar_filename_KOI8R_CP1251(const char *refname) 270 { 271 struct archive *a; 272 struct archive_entry *ae; 273 274 /* 275 * Read filename in CP1251 with "hdrcharset=KOI8-R" option. 276 * We should correctly read two filenames. 277 */ 278 if (NULL == setlocale(LC_ALL, "Russian_Russia") && 279 NULL == setlocale(LC_ALL, "ru_RU.CP1251")) { 280 skipping("CP1251 locale not available on this system."); 281 return; 282 } 283 284 /* Test if the platform can convert from UTF-8. */ 285 assert((a = archive_read_new()) != NULL); 286 assertEqualIntA(a, ARCHIVE_OK, archive_read_support_format_tar(a)); 287 if (ARCHIVE_OK != archive_read_set_options(a, "hdrcharset=UTF-8")) { 288 assertEqualInt(ARCHIVE_OK, archive_read_free(a)); 289 skipping("This system cannot convert character-set" 290 " from UTF-8 to CP1251."); 291 return; 292 } 293 assertEqualInt(ARCHIVE_OK, archive_read_free(a)); 294 295 assert((a = archive_read_new()) != NULL); 296 assertEqualIntA(a, ARCHIVE_OK, archive_read_support_filter_all(a)); 297 assertEqualIntA(a, ARCHIVE_OK, archive_read_support_format_all(a)); 298 if (ARCHIVE_OK != archive_read_set_options(a, "hdrcharset=KOI8-R")) { 299 skipping("This system cannot convert character-set" 300 " from KOI8-R to CP1251."); 301 goto next_test; 302 } 303 assertEqualIntA(a, ARCHIVE_OK, 304 archive_read_open_filename(a, refname, 10240)); 305 306 /* Verify regular first file. */ 307 assertEqualIntA(a, ARCHIVE_OK, archive_read_next_header(a, &ae)); 308 assertEqualString("\xcf\xd0\xc8\xc2\xc5\xd2", 309 archive_entry_pathname(ae)); 310 assertEqualInt(6, archive_entry_size(ae)); 311 assertEqualInt(archive_entry_is_encrypted(ae), 0); 312 assertEqualIntA(a, archive_read_has_encrypted_entries(a), ARCHIVE_READ_FORMAT_ENCRYPTION_UNSUPPORTED); 313 314 /* Verify regular second file. */ 315 assertEqualIntA(a, ARCHIVE_OK, archive_read_next_header(a, &ae)); 316 assertEqualString("\xef\xf0\xe8\xe2\xe5\xf2", 317 archive_entry_pathname(ae)); 318 assertEqualInt(6, archive_entry_size(ae)); 319 assertEqualInt(archive_entry_is_encrypted(ae), 0); 320 assertEqualIntA(a, archive_read_has_encrypted_entries(a), ARCHIVE_READ_FORMAT_ENCRYPTION_UNSUPPORTED); 321 322 323 /* End of archive. */ 324 assertEqualIntA(a, ARCHIVE_EOF, archive_read_next_header(a, &ae)); 325 326 /* Verify archive format. */ 327 assertEqualIntA(a, ARCHIVE_FILTER_COMPRESS, archive_filter_code(a, 0)); 328 assertEqualIntA(a, ARCHIVE_FORMAT_TAR_PAX_INTERCHANGE, 329 archive_format(a)); 330 331 /* Close the archive. */ 332 assertEqualInt(ARCHIVE_OK, archive_read_close(a)); 333 next_test: 334 assertEqualInt(ARCHIVE_OK, archive_read_free(a)); 335 336 /* 337 * Read filename in CP1251 without "hdrcharset=KOI8-R" option. 338 * The filename we can properly read is only second file. 339 */ 340 341 assert((a = archive_read_new()) != NULL); 342 assertEqualIntA(a, ARCHIVE_OK, archive_read_support_filter_all(a)); 343 assertEqualIntA(a, ARCHIVE_OK, archive_read_support_format_all(a)); 344 assertEqualIntA(a, ARCHIVE_OK, 345 archive_read_open_filename(a, refname, 10240)); 346 347 /* 348 * Verify regular first file. 349 * The filename is not translated to CP1251 because hdrcharset 350 * attribute is BINARY and there is not way to know its charset. 351 */ 352 assertEqualIntA(a, ARCHIVE_OK, archive_read_next_header(a, &ae)); 353 /* A filename is in KOI8-R. */ 354 assertEqualString("\xf0\xf2\xe9\xf7\xe5\xf4", 355 archive_entry_pathname(ae)); 356 assertEqualInt(6, archive_entry_size(ae)); 357 assertEqualInt(archive_entry_is_encrypted(ae), 0); 358 assertEqualIntA(a, archive_read_has_encrypted_entries(a), ARCHIVE_READ_FORMAT_ENCRYPTION_UNSUPPORTED); 359 360 /* 361 * Verify regular second file. 362 */ 363 assertEqualIntA(a, ARCHIVE_OK, archive_read_next_header(a, &ae)); 364 assertEqualString("\xef\xf0\xe8\xe2\xe5\xf2", 365 archive_entry_pathname(ae)); 366 assertEqualInt(6, archive_entry_size(ae)); 367 assertEqualInt(archive_entry_is_encrypted(ae), 0); 368 assertEqualIntA(a, archive_read_has_encrypted_entries(a), ARCHIVE_READ_FORMAT_ENCRYPTION_UNSUPPORTED); 369 370 371 /* End of archive. */ 372 assertEqualIntA(a, ARCHIVE_EOF, archive_read_next_header(a, &ae)); 373 374 /* Verify archive format. */ 375 assertEqualIntA(a, ARCHIVE_FILTER_COMPRESS, archive_filter_code(a, 0)); 376 assertEqualIntA(a, ARCHIVE_FORMAT_TAR_PAX_INTERCHANGE, 377 archive_format(a)); 378 379 /* Close the archive. */ 380 assertEqualInt(ARCHIVE_OK, archive_read_close(a)); 381 assertEqualInt(ARCHIVE_OK, archive_read_free(a)); 382 } 383 384 385 DEFINE_TEST(test_read_format_tar_filename) 386 { 387 const char *refname = "test_read_format_tar_filename_koi8r.tar.Z"; 388 389 extract_reference_file(refname); 390 test_read_format_tar_filename_KOI8R_CP866(refname); 391 test_read_format_tar_filename_KOI8R_UTF8(refname); 392 test_read_format_tar_filename_KOI8R_CP1251(refname); 393 } 394