1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright (c) 1999 by Sun Microsystems, Inc. 23 * All rights reserved. 24 */ 25 26 27 /* 28 * For example, 29 * UCS -> UTF-8 -> IBM -> UTF-8 30 * (1) (2) (3) (4) 31 * tmp source result tmp 32 * output (1) (2) (3)line by line 33 * comparing (2) (4) 34 */ 35 36 #include <stdio.h> 37 #include <libgen.h> 38 #include <stdlib.h> 39 #include <unistd.h> 40 #include <locale.h> 41 #include <iconv.h> 42 #include <string.h> 43 #include <errno.h> 44 #include <stdarg.h> 45 #include <sys/types.h> 46 #include <sys/wait.h> 47 48 char * ME; 49 int status; 50 int flag_display = 1; 51 int flag_bubun = 1; 52 53 54 void 55 usage(int status) 56 { 57 fprintf(stderr, "Usage: %s [-b] [-d] to-code\n", ME); 58 exit(status); 59 } 60 61 62 void 63 chkprint(char *format, ...) 64 { 65 va_list ap; 66 va_start(ap, format); 67 68 if (0 != flag_display) { 69 (void) vfprintf(stdout, format, ap); 70 } 71 va_end(ap); 72 } 73 74 75 void 76 validate(uint_t i, iconv_t cd, iconv_t cd2, iconv_t cd3) 77 { 78 char source_buf[1024]; 79 char result_buf[1024]; 80 char tmp_buf[1024]; 81 char * source; 82 char * result; 83 char * tmp; 84 size_t source_len; 85 size_t result_len; 86 size_t result_len2; 87 size_t tmp_len; 88 size_t s; 89 int j; 90 ushort_t *shortp; 91 uint_t *intp; 92 93 94 #define PREPARE_ILLEGALUTF8 \ 95 if (i == 0xfffe) { \ 96 source_buf[0] = 0xef; \ 97 source_buf[1] = 0xbf; \ 98 source_buf[2] = 0xbe; \ 99 source_buf[3] = 0x00; \ 100 source = source_buf; \ 101 source_len = 3; \ 102 chkprint("U+%04x\t** %x **", i, 0xefbfbe); \ 103 } else if (i == 0xffff) { \ 104 source_buf[0] = 0xef; \ 105 source_buf[1] = 0xbf; \ 106 source_buf[2] = 0xbf; \ 107 source_buf[3] = 0x00; \ 108 source = source_buf; \ 109 source_len = 3; \ 110 chkprint("U+%04x\t** %x **", i, 0xefbfbf); \ 111 } else if (i > 0x7fffffff) { \ 112 source_buf[0] = 0x0; \ 113 source_buf[1] = 0x0; \ 114 source_buf[2] = 0x0; \ 115 source_buf[3] = 0x0; \ 116 source_buf[4] = 0x0; \ 117 source_buf[5] = 0xfe; \ 118 source_buf[6] = 0x0; \ 119 source = source_buf; \ 120 source_len = 7; \ 121 chkprint("U+%04x\t** %x **", i, 0xfe); \ 122 } 123 124 #define DATASIZE 4 125 /* 126 shortp = (ushort_t*)&tmp_buf[0]; 127 *shortp = 0xfeff; 128 shortp = (ushort_t*)&tmp_buf[2]; 129 *shortp = i; 130 */ 131 /* chkprint("U+"); */ \ 132 /* for( j = 0; j < tmp_len ; j++) */ \ 133 /* chkprint("%02x", (uchar_t)tmp[j]); */ \ 134 /* 135 shortp = (ushort_t*)&tmp_buf[0]; \ 136 *shortp = i; \ 137 */ 138 139 #define PREPAREUTF8 \ 140 tmp = tmp_buf; \ 141 tmp_len = DATASIZE; \ 142 intp = (uint_t*)&tmp_buf[0]; \ 143 *intp = i; \ 144 source = source_buf; \ 145 source_len = sizeof (source_buf); \ 146 \ 147 chkprint("U+%04x", i); \ 148 s = iconv(cd2, (const char**)&tmp, &tmp_len, &source, &source_len); \ 149 if (s != 0) { \ 150 chkprint(" \n stopped \n"); \ 151 fprintf(stderr, "fail to convert Unicode to UTF-8\n"); \ 152 exit (status); \ 153 } \ 154 chkprint("\t0x"); \ 155 for( j = 0; j < sizeof (source_buf) - source_len; j++) \ 156 chkprint("%02x", (uchar_t)source_buf[j]); \ 157 source_len = sizeof (source_buf) - source_len; \ 158 source = &source_buf[0]; 159 160 #define COMPARE_ERROR \ 161 chkprint("\t-> 0x");\ 162 for (j = 0; j < sizeof (tmp_buf) - tmp_len; j++) { \ 163 chkprint("%02x", (uchar_t)tmp_buf[j]);\ 164 } \ 165 chkprint("\n warning \n"); \ 166 fprintf(stderr, " Converting answer is not the same for (U+%04x)\n", \ 167 i); 168 169 #define COMPARE \ 170 tmp = tmp_buf; \ 171 tmp_len = sizeof (tmp_buf); \ 172 result = result_buf; \ 173 result_len2 = sizeof (result_buf) - result_len; \ 174 s = iconv(cd3, (const char**)&result, &result_len2, &tmp, &tmp_len); \ 175 if (s != 0) { \ 176 chkprint(" \n WARNING \n"); \ 177 fprintf(stderr, "fail to convert Orignal Codeset to UTF-8\n",\ 178 i); \ 179 fprintf(stderr, "errno=%d %d %d\n", \ 180 errno, \ 181 sizeof (result_buf) - result_len - result_len2, \ 182 result - result_buf); \ 183 exit (status); \ 184 } \ 185 chkprint("\t"); \ 186 if (sizeof (tmp_buf) - tmp_len != source_len) { \ 187 COMPARE_ERROR \ 188 } else { \ 189 for (j = 0; j < source_len; j++) { \ 190 if ((uchar_t)tmp_buf[j] != (uchar_t)source_buf[j]) { \ 191 COMPARE_ERROR \ 192 } \ 193 }\ 194 } 195 196 197 /* 198 * LOGIC START 199 */ 200 201 if (i == 0xfffe || i == 0xffff || i > 0x7fffffff) { 202 PREPARE_ILLEGALUTF8 203 } else { 204 PREPAREUTF8 205 } 206 207 result = result_buf; 208 result_len = sizeof (result_buf); 209 tmp_len = source_len; /* save to compare source data */ 210 s = iconv(cd, (const char**)&source, &source_len, &result, 211 &result_len); 212 213 status = 1; 214 if (i == 0xfffe || i == 0xffff || i > 0x7fffffff) { 215 if ((((size_t)0) == s) || 216 (errno != EILSEQ)) { 217 fprintf(stderr, "EILSEQ expected for 0x%x: %d %d %d\n", 218 i, 219 errno, 220 source_len, 221 source - source_buf); 222 } 223 } 224 if (((size_t)(0)) == s) { 225 if ((source_len != 0) || 226 ((source - source_buf) != tmp_len) || 227 ((result - result_buf + result_len) != 228 sizeof (result_buf))) { 229 fprintf(stderr, ": %d %d %d\n", 230 errno, 231 source_len, 232 source - source_buf); 233 exit(status); 234 } 235 chkprint("\t0x"); 236 for( j = 0; j < sizeof (result_buf) - result_len ; j++) 237 chkprint("%02x", (uchar_t)result_buf[j]); 238 source_len = tmp_len; 239 COMPARE 240 chkprint("\n"); 241 return; 242 } 243 244 status += 1; 245 if (((size_t)(-1)) == s) { 246 if (errno == EILSEQ) { 247 if (((source - source_buf) != 248 (tmp_len - source_len)) || 249 ((result - result_buf + result_len) != 250 sizeof (result_buf))) { 251 fprintf(stderr, ": %d %d %d\n", 252 errno, 253 source_len, 254 source - source_buf); 255 exit(status); 256 } 257 chkprint("\tEILSEQ\n", i); 258 return; 259 } 260 fprintf(stderr, "Error for source U+%04x: %d %d %d %d %d\n", 261 i, 262 errno, 263 (DATASIZE) - source_len, /* not converted size */ 264 source - source_buf, 265 (sizeof (result_buf)) - result_len, 266 result - result_buf); 267 exit(status); 268 } 269 270 status += 1; 271 exit(status); 272 } 273 274 main(int argc, char ** argv) 275 { 276 int r; 277 char * p; 278 iconv_t cd; 279 iconv_t cd2; 280 iconv_t cd3; 281 uint_t i, j, k; 282 283 ME = basename(argv[0]); 284 setlocale(LC_ALL, ""); 285 status = 100; 286 287 288 for (j = 1; j < argc; j++) { 289 if (argv[j][0] != '-') 290 break; 291 for (k = 1; ; k++) { 292 if (argv[j][k] == '\0') 293 break; 294 if (argv[j][k] == 'b') { 295 flag_bubun = 0; 296 continue; 297 } 298 if (argv[j][k] == 'd') { 299 flag_display = 0; 300 continue; 301 } 302 } 303 } 304 if (j >= argc) usage(-1); 305 306 chkprint( "#UCS-4\tUTF-8\t* %s *\n", argv[j]); 307 308 cd = iconv_open( argv[j], "UTF-8"); /* to, from */ 309 if (((iconv_t)(-1)) == cd) { 310 perror("iconv_open"); 311 exit(status); 312 } 313 314 cd2 = iconv_open("UTF-8", "UCS-4"); 315 if (((iconv_t)(-1)) == cd2) { 316 perror("iconv_open for UTF-8"); 317 exit(status); 318 } 319 320 cd3 = iconv_open("UTF-8", argv[j]); 321 if (((iconv_t)(-1)) == cd3) { 322 perror("iconv_open for reverse"); 323 exit(status); 324 } 325 326 327 /* 328 * main logic 329 */ 330 if (flag_bubun) { 331 for (i = 0; i <= 0xff; i++) 332 validate(i, cd, cd2, cd3); 333 validate(0x100, cd, cd2, cd3); 334 validate(0x3ff, cd, cd2, cd3); 335 validate(0x400, cd, cd2, cd3); 336 validate(0xfff, cd, cd2, cd3); 337 validate(0x1000, cd, cd2, cd3); 338 validate(0x3fff, cd, cd2, cd3); 339 validate(0x4000, cd, cd2, cd3); 340 validate(0xfffd, cd, cd2, cd3); 341 validate(0xfffe, cd, cd2, cd3); /* error */ 342 validate(0xffff, cd, cd2, cd3); /* error */ 343 validate(0x10000, cd, cd2, cd3); 344 validate(0x3ffff, cd, cd2, cd3); 345 validate(0x40000, cd, cd2, cd3); 346 validate(0xfffff, cd, cd2, cd3); 347 validate(0x100000, cd, cd2, cd3); 348 validate(0x1fffff, cd, cd2, cd3); 349 validate(0x200000, cd, cd2, cd3); 350 validate(0x3fffff, cd, cd2, cd3); 351 validate(0x400000, cd, cd2, cd3); 352 validate(0xffffff, cd, cd2, cd3); 353 validate(0x1000000, cd, cd2, cd3); 354 validate(0x3ffffff, cd, cd2, cd3); 355 validate(0x4000000, cd, cd2, cd3); 356 validate(0xfffffff, cd, cd2, cd3); 357 validate(0x10000000, cd, cd2, cd3); 358 validate(0x7fffffff, cd, cd2, cd3); 359 validate(0x80000000, cd, cd2, cd3); /* error */ 360 } else { 361 int k; 362 for (i = 0, k = 0; i <= 0x80000000; i++, k++) { 363 validate(i, cd, cd2, cd3); 364 if ((k == 0x1000000) && 365 (0 == flag_display)) { 366 printf(" i < 0x%x: checked\n", i); 367 k = 0; 368 } 369 370 } 371 } 372 373 status = 200; 374 r = iconv_close(cd); 375 if (-1 == r) { 376 perror("iconv_close"); 377 exit(status); 378 } 379 380 r = iconv_close(cd2); 381 if (-1 == r) { 382 perror("iconv_close for UTF-8"); 383 exit(status); 384 } 385 386 r = iconv_close(cd3); 387 if (-1 == r) { 388 perror("iconv_close for reverse"); 389 exit(status); 390 } 391 392 return (0); 393 } 394