1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #pragma ident "%Z%%M% %I% %E% SMI" 28 29 #include <sys/types.h> 30 #include <sys/param.h> 31 #include <sys/sysmacros.h> 32 #include <sys/systm.h> 33 #include <sys/debug.h> 34 #include <sys/kmem.h> 35 #include <sys/sunddi.h> 36 #include <sys/byteorder.h> 37 #include <sys/errno.h> 38 #include <sys/euc.h> 39 #include <sys/modctl.h> 40 #include <sys/kiconv.h> 41 42 #include <sys/kiconv_ja.h> 43 #include <sys/kiconv_ja_jis_to_unicode.h> 44 #include <sys/kiconv_ja_unicode_to_jis.h> 45 46 /* 47 * The following vector shows remaining bytes in a UTF-8 character. 48 * Index will be the first byte of the character. This is defined in 49 * u8_textprep.c. 50 */ 51 extern const int8_t u8_number_of_bytes[]; 52 53 /* 54 * The following is a vector of bit-masks to get used bits in 55 * the first byte of a UTF-8 character. Index is remaining bytes at above of 56 * the character. This is defined in uconv.c. 57 */ 58 extern const uchar_t u8_masks_tbl[]; 59 60 /* 61 * The following two vectors are to provide valid minimum and 62 * maximum values for the 2'nd byte of a multibyte UTF-8 character for 63 * better illegal sequence checking. The index value must be the value of 64 * the first byte of the UTF-8 character. These are defined in u8_textprep.c. 65 */ 66 extern const uint8_t u8_valid_min_2nd_byte[]; 67 extern const uint8_t u8_valid_max_2nd_byte[]; 68 69 static kiconv_ja_euc16_t 70 kiconv_ja_ucs2_to_euc16(kiconv_ja_ucs2_t ucs2) 71 { 72 const kiconv_ja_euc16_t *p; 73 74 if ((p = kiconv_ja_ucs2_to_euc16_index[ucs2 >> 8]) != NULL) 75 return (p[ucs2 & 0xff]); 76 77 return (KICONV_JA_NODEST); 78 } 79 80 static size_t 81 utf8_ucs(uint_t *p, uchar_t **pip, size_t *pileft, int *errno) 82 { 83 uint_t l; /* to be copied to *p on successful return */ 84 uchar_t ic; /* current byte */ 85 uchar_t ic1; /* 1st byte */ 86 uchar_t *ip = *pip; /* next byte to read */ 87 size_t ileft = *pileft; /* number of bytes available */ 88 size_t rv = 0; /* return value of this function */ 89 int remaining_bytes; 90 int u8_size; 91 92 KICONV_JA_NGET(ic1); /* read 1st byte */ 93 94 if (ic1 < 0x80) { 95 /* successfully converted */ 96 *p = (uint_t)ic1; 97 goto ret; 98 } 99 100 u8_size = u8_number_of_bytes[ic1]; 101 if (u8_size == U8_ILLEGAL_CHAR) { 102 KICONV_JA_RETERROR(EILSEQ) 103 } else if (u8_size == U8_OUT_OF_RANGE_CHAR) { 104 KICONV_JA_RETERROR(ERANGE) 105 } 106 107 remaining_bytes = u8_size - 1; 108 if (remaining_bytes != 0) { 109 l = ic1 & u8_masks_tbl[remaining_bytes]; 110 111 for (; remaining_bytes > 0; remaining_bytes--) { 112 KICONV_JA_NGET(ic); 113 if (ic1 != 0U) { 114 if ((ic < u8_valid_min_2nd_byte[ic1]) || 115 (ic > u8_valid_max_2nd_byte[ic1])) { 116 KICONV_JA_RETERROR(EILSEQ) 117 } 118 ic1 = 0U; /* 2nd byte check done */ 119 } else { 120 if ((ic < 0x80) || (ic > 0xbf)) { 121 KICONV_JA_RETERROR(EILSEQ) 122 } 123 } 124 l = (l << 6) | (ic & 0x3f); 125 } 126 127 /* successfully converted */ 128 *p = l; 129 } else { 130 KICONV_JA_RETERROR(EILSEQ) 131 } 132 133 ret: 134 if (rv == 0) { 135 /* 136 * Update rv, *pip, and *pileft on successfule return. 137 */ 138 rv = *pileft - ileft; 139 *pip = ip; 140 *pileft = ileft; 141 } 142 143 return (rv); 144 } 145 146 static size_t 147 utf8_ucs_replace(uint_t *p, uchar_t **pip, size_t *pileft, size_t *repnum) 148 { 149 uint_t l; /* to be copied to *p on successful return */ 150 uchar_t ic; /* current byte */ 151 uchar_t ic1; /* 1st byte */ 152 uchar_t *ip = *pip; /* next byte to read */ 153 size_t ileft = *pileft; /* number of bytes available */ 154 size_t rv = 0; /* return value of this function */ 155 int remaining_bytes; 156 int u8_size; 157 158 KICONV_JA_NGET_REP_TO_MB(ic1); /* read 1st byte */ 159 160 if (ic1 < 0x80) { 161 /* successfully converted */ 162 l = (uint_t)ic1; 163 goto ret; 164 } 165 166 u8_size = u8_number_of_bytes[ic1]; 167 if (u8_size == U8_ILLEGAL_CHAR || u8_size == U8_OUT_OF_RANGE_CHAR) { 168 l = KICONV_JA_DEF_SINGLE; 169 (*repnum)++; 170 goto ret; 171 } 172 173 remaining_bytes = u8_size - 1; 174 175 if (remaining_bytes != 0) { 176 l = ic1 & u8_masks_tbl[remaining_bytes]; 177 178 for (; remaining_bytes > 0; remaining_bytes--) { 179 KICONV_JA_NGET_REP_TO_MB(ic); 180 if (ic1 != 0U) { 181 if ((ic < u8_valid_min_2nd_byte[ic1]) || 182 (ic > u8_valid_max_2nd_byte[ic1])) { 183 l = KICONV_JA_DEF_SINGLE; 184 (*repnum)++; 185 ileft -= (remaining_bytes - 1); 186 ip += (remaining_bytes - 1); 187 break; 188 } 189 ic1 = 0U; /* 2nd byte check done */ 190 } else { 191 if ((ic < 0x80) || (ic > 0xbf)) { 192 l = KICONV_JA_DEF_SINGLE; 193 (*repnum)++; 194 ileft -= (remaining_bytes - 1); 195 ip += (remaining_bytes - 1); 196 break; 197 } 198 } 199 l = (l << 6) | (ic & 0x3f); 200 } 201 } else { 202 l = KICONV_JA_DEF_SINGLE; 203 (*repnum)++; 204 } 205 206 ret: 207 /* successfully converted */ 208 *p = l; 209 rv = *pileft - ileft; 210 211 *pip = ip; 212 *pileft = ileft; 213 214 return (rv); 215 } 216 217 static size_t /* return #bytes read, or -1 */ 218 read_unicode( 219 uint_t *p, /* point variable to store UTF-32 */ 220 uchar_t **pip, /* point pointer to input buf */ 221 size_t *pileft, /* point #bytes left in input buf */ 222 int *errno, /* point variable to errno */ 223 int flag, /* kiconvstr flag */ 224 size_t *rv) /* point return valuse */ 225 { 226 if (flag & KICONV_REPLACE_INVALID) 227 return (utf8_ucs_replace(p, pip, pileft, rv)); 228 else 229 return (utf8_ucs(p, pip, pileft, errno)); 230 } 231 232 static size_t 233 write_unicode( 234 uint_t u32, /* UTF-32 to write */ 235 char **pop, /* point pointer to output buf */ 236 size_t *poleft, /* point #bytes left in output buf */ 237 int *errno) /* point variable to errno */ 238 { 239 char *op = *pop; 240 size_t oleft = *poleft; 241 size_t rv = 0; /* return value */ 242 243 if (u32 <= 0x7f) { 244 KICONV_JA_NPUT((uchar_t)(u32)); 245 rv = 1; 246 } else if (u32 <= 0x7ff) { 247 KICONV_JA_NPUT((uchar_t)((((u32)>>6) & 0x1f) | 0xc0)); 248 KICONV_JA_NPUT((uchar_t)(((u32) & 0x3f) | 0x80)); 249 rv = 2; 250 } else if ((u32 >= 0xd800) && (u32 <= 0xdfff)) { 251 KICONV_JA_RETERROR(EILSEQ) 252 } else if (u32 <= 0xffff) { 253 KICONV_JA_NPUT((uchar_t)((((u32)>>12) & 0x0f) | 0xe0)); 254 KICONV_JA_NPUT((uchar_t)((((u32)>>6) & 0x3f) | 0x80)); 255 KICONV_JA_NPUT((uchar_t)(((u32) & 0x3f) | 0x80)); 256 rv = 3; 257 } else if (u32 <= 0x10ffff) { 258 KICONV_JA_NPUT((uchar_t)((((u32)>>18) & 0x07) | 0xf0)); 259 KICONV_JA_NPUT((uchar_t)((((u32)>>12) & 0x3f) | 0x80)); 260 KICONV_JA_NPUT((uchar_t)((((u32)>>6) & 0x3f) | 0x80)); 261 KICONV_JA_NPUT((uchar_t)(((u32) & 0x3f) | 0x80)); 262 rv = 4; 263 } else { 264 KICONV_JA_RETERROR(EILSEQ) 265 } 266 267 ret: 268 if (rv != (size_t)-1) { 269 /* update *pop and *poleft only on successful return */ 270 *pop = op; 271 *poleft = oleft; 272 } 273 274 return (rv); 275 } 276 277 static void * 278 _kiconv_ja_open_unicode(uint8_t id) 279 { 280 kiconv_state_t kcd; 281 282 kcd = (kiconv_state_t)kmem_alloc(sizeof (kiconv_state_data_t), 283 KM_SLEEP); 284 kcd->id = id; 285 kcd->bom_processed = 0; 286 return ((void *)kcd); 287 } 288 289 static void * 290 open_eucjp(void) 291 { 292 return (_kiconv_ja_open_unicode(KICONV_JA_TBLID_EUCJP)); 293 } 294 295 static void * 296 open_eucjpms(void) 297 { 298 return (_kiconv_ja_open_unicode(KICONV_JA_TBLID_EUCJP_MS)); 299 } 300 301 static void * 302 open_sjis(void) 303 { 304 return (_kiconv_ja_open_unicode(KICONV_JA_TBLID_SJIS)); 305 } 306 307 static void * 308 open_cp932(void) 309 { 310 return (_kiconv_ja_open_unicode(KICONV_JA_TBLID_CP932)); 311 } 312 313 int 314 close_ja(void *kcd) 315 { 316 if (! kcd || kcd == (void *)-1) 317 return (EBADF); 318 319 kmem_free(kcd, sizeof (kiconv_state_data_t)); 320 321 return (0); 322 } 323 324 static size_t 325 _do_kiconv_fr_eucjp(void *kcd, char **inbuf, size_t *inbytesleft, 326 char **outbuf, size_t *outbytesleft, int *errno) 327 { 328 uint_t u32; /* UTF-32 */ 329 uint_t index; /* index for table lookup */ 330 uchar_t ic1, ic2, ic3; /* 1st, 2nd, and 3rd bytes of a char */ 331 size_t rv = 0; /* return value of this function */ 332 333 uchar_t *ip; 334 size_t ileft; 335 char *op; 336 size_t oleft; 337 size_t id = ((kiconv_state_t)kcd)->id; 338 339 if ((inbuf == NULL) || (*inbuf == NULL)) { 340 return (0); 341 } 342 343 ip = (uchar_t *)*inbuf; 344 ileft = *inbytesleft; 345 op = *outbuf; 346 oleft = *outbytesleft; 347 348 while (ileft != 0) { 349 KICONV_JA_NGET(ic1); /* get 1st byte */ 350 351 if (KICONV_JA_ISASC(ic1)) { /* ASCII; 1 byte */ 352 u32 = kiconv_ja_jisx0201roman_to_ucs2[ic1]; 353 KICONV_JA_PUTU(u32); 354 } else if (KICONV_JA_ISCS1(ic1)) { /* 0208 or UDC; 2 bytes */ 355 KICONV_JA_NGET(ic2); 356 if (KICONV_JA_ISCS1(ic2)) { /* 2nd byte check passed */ 357 ic1 &= KICONV_JA_CMASK; 358 ic2 &= KICONV_JA_CMASK; 359 KICONV_JA_CNV_JISMS_TO_U2(id, u32, ic1, ic2); 360 if (u32 == KICONV_JA_NODEST) { 361 index = (ic1 - 0x21) * 94 + ic2 - 0x21; 362 u32 = kiconv_ja_jisx0208_to_ucs2[index]; 363 } 364 if (u32 == KICONV_JA_REPLACE) 365 rv++; 366 KICONV_JA_PUTU(u32); 367 } else { /* 2nd byte check failed */ 368 KICONV_JA_RETERROR(EILSEQ) 369 } 370 } else if (ic1 == SS2) { /* JIS X 0201 Kana; 2 bytes */ 371 KICONV_JA_NGET(ic2); 372 if (KICONV_JA_ISCS2(ic2)) { /* 2nd byte check passed */ 373 index = (ic2 - 0xa1); 374 u32 = kiconv_ja_jisx0201kana_to_ucs2[index]; 375 KICONV_JA_PUTU(u32); 376 } else { /* 2nd byte check failed */ 377 KICONV_JA_RETERROR(EILSEQ) 378 } 379 } else if (ic1 == SS3) { /* JIS X 0212 or UDC; 3 bytes */ 380 KICONV_JA_NGET(ic2); 381 if (KICONV_JA_ISCS3(ic2)) { /* 2nd byte check passed */ 382 KICONV_JA_NGET(ic3); 383 if (KICONV_JA_ISCS3(ic3)) { 384 /* 3rd byte check passed */ 385 ic2 &= KICONV_JA_CMASK; 386 ic3 &= KICONV_JA_CMASK; 387 KICONV_JA_CNV_JIS0212MS_TO_U2(id, u32, 388 ic2, ic3); 389 if (u32 == KICONV_JA_NODEST) { 390 index = ((ic2 - 0x21) * 94 + 391 (ic3 - 0x21)); 392 u32 = kiconv_ja_jisx0212_to_ucs2 393 [index]; 394 } 395 if (u32 == KICONV_JA_REPLACE) 396 rv++; 397 KICONV_JA_PUTU(u32); 398 } else { /* 3rd byte check failed */ 399 KICONV_JA_RETERROR(EILSEQ) 400 } 401 } else { /* 2nd byte check failed */ 402 KICONV_JA_RETERROR(EILSEQ) 403 } 404 } else if (KICONV_JA_ISC1CTRLEUC(ic1)) { 405 /* C1 control; 1 byte */ 406 u32 = ic1; 407 KICONV_JA_PUTU(u32); 408 } else { /* 1st byte check failed */ 409 KICONV_JA_RETERROR(EILSEQ) 410 } 411 412 /* 413 * One character successfully converted so update 414 * values outside of this function's stack. 415 */ 416 *inbuf = (char *)ip; 417 *inbytesleft = ileft; 418 *outbuf = op; 419 *outbytesleft = oleft; 420 } 421 422 ret: 423 return (rv); 424 } 425 426 static size_t 427 _do_kiconv_to_eucjp(void *kcd, char **inbuf, size_t *inbytesleft, 428 char **outbuf, size_t *outbytesleft, int *errno) 429 { 430 uchar_t ic; 431 size_t rv = 0; 432 uint_t ucs4; 433 ushort_t euc16; 434 435 uchar_t *ip; 436 size_t ileft; 437 char *op; 438 size_t oleft; 439 size_t read_len; 440 441 size_t id = ((kiconv_state_t)kcd)->id; 442 443 if ((inbuf == NULL) || (*inbuf == NULL)) { 444 return (0); 445 } 446 447 ip = (uchar_t *)*inbuf; 448 ileft = *inbytesleft; 449 op = *outbuf; 450 oleft = *outbytesleft; 451 452 KICONV_JA_CHECK_UTF8_BOM(ip, ileft); 453 454 while (ileft != 0) { 455 KICONV_JA_GETU(&ucs4, 0); 456 457 if (ucs4 > 0xffff) { 458 /* non-BMP */ 459 KICONV_JA_NPUT(KICONV_JA_DEF_SINGLE); 460 rv++; 461 goto next; 462 } 463 464 KICONV_JA_CNV_U2_TO_EUCJPMS(id, euc16, ucs4); 465 if (euc16 == KICONV_JA_NODEST) { 466 euc16 = kiconv_ja_ucs2_to_euc16((ushort_t)ucs4); 467 } 468 if (euc16 == KICONV_JA_NODEST) { 469 KICONV_JA_NPUT(KICONV_JA_DEF_SINGLE); 470 rv++; 471 goto next; 472 } 473 474 switch (euc16 & 0x8080) { 475 case 0x0000: /* CS0 */ 476 ic = (uchar_t)euc16; 477 KICONV_JA_NPUT(ic); 478 break; 479 case 0x8080: /* CS1 */ 480 ic = (uchar_t)((euc16 >> 8) & 0xff); 481 KICONV_JA_NPUT(ic); 482 ic = (uchar_t)(euc16 & 0xff); 483 KICONV_JA_NPUT(ic); 484 break; 485 case 0x0080: /* CS2 */ 486 KICONV_JA_NPUT(SS2); 487 ic = (uchar_t)euc16; 488 KICONV_JA_NPUT(ic); 489 break; 490 case 0x8000: /* CS3 */ 491 KICONV_JA_NPUT(SS3); 492 ic = (uchar_t)((euc16 >> 8) & 0xff); 493 KICONV_JA_NPUT(ic); 494 ic = (uchar_t)(euc16 & KICONV_JA_CMASK); 495 KICONV_JA_NPUT(ic | KICONV_JA_CMSB); 496 break; 497 } 498 next: 499 /* 500 * One character successfully converted so update 501 * values outside of this function's stack. 502 */ 503 *inbuf = (char *)ip; 504 *inbytesleft = ileft; 505 *outbuf = op; 506 *outbytesleft = oleft; 507 } 508 509 ret: 510 return (rv); 511 } 512 513 static size_t 514 _do_kiconvstr_fr_eucjp(char *inbuf, size_t *inbytesleft, char *outbuf, 515 size_t *outbytesleft, int flag, int *errno, uint8_t id) 516 { 517 uint_t u32; /* UTF-32 */ 518 uint_t index; /* index for table lookup */ 519 uchar_t ic1, ic2, ic3; /* 1st, 2nd, and 3rd bytes of a char */ 520 size_t rv = 0; /* return value of this function */ 521 522 uchar_t *ip; 523 size_t ileft; 524 char *op; 525 size_t oleft; 526 527 boolean_t do_not_ignore_null; 528 529 if ((inbuf == NULL) || (*inbuf == NULL)) { 530 return (0); 531 } 532 533 ip = (uchar_t *)inbuf; 534 ileft = *inbytesleft; 535 op = outbuf; 536 oleft = *outbytesleft; 537 538 do_not_ignore_null = ((flag & KICONV_IGNORE_NULL) == 0); 539 540 while (ileft != 0) { 541 KICONV_JA_NGET(ic1); /* get 1st byte */ 542 543 if (KICONV_JA_ISASC(ic1)) { /* ASCII; 1 byte */ 544 if (ic1 == '\0' && do_not_ignore_null) { 545 return (0); 546 } 547 u32 = kiconv_ja_jisx0201roman_to_ucs2[ic1]; 548 KICONV_JA_PUTU(u32); 549 } else if (KICONV_JA_ISCS1(ic1)) { /* 0208 or UDC; 2 bytes */ 550 if (flag & KICONV_REPLACE_INVALID) { 551 KICONV_JA_NGET_REP_FR_MB(ic2); 552 } else { 553 KICONV_JA_NGET(ic2); 554 } 555 if (KICONV_JA_ISCS1(ic2)) { /* 2nd byte check passed */ 556 ic1 &= KICONV_JA_CMASK; 557 ic2 &= KICONV_JA_CMASK; 558 KICONV_JA_CNV_JISMS_TO_U2(id, u32, ic1, ic2); 559 if (u32 == KICONV_JA_NODEST) { 560 index = (ic1 - 0x21) * 94 + ic2 - 0x21; 561 u32 = kiconv_ja_jisx0208_to_ucs2[index]; 562 } 563 if (u32 == KICONV_JA_REPLACE) 564 rv++; 565 KICONV_JA_PUTU(u32); 566 } else { /* 2nd byte check failed */ 567 if (flag & KICONV_REPLACE_INVALID) { 568 KICONV_JA_PUTU(KICONV_JA_REPLACE); 569 rv++; 570 } else { 571 KICONV_JA_RETERROR(EILSEQ) 572 } 573 } 574 } else if (ic1 == SS2) { /* JIS X 0201 Kana; 2bytes */ 575 if (flag & KICONV_REPLACE_INVALID) { 576 KICONV_JA_NGET_REP_FR_MB(ic2); 577 } else { 578 KICONV_JA_NGET(ic2); 579 } 580 if (KICONV_JA_ISCS2(ic2)) { /* 2nd byte check passed */ 581 index = (ic2 - 0xa1); 582 u32 = kiconv_ja_jisx0201kana_to_ucs2[index]; 583 KICONV_JA_PUTU(u32); 584 } else { /* 2nd byte check failed */ 585 if (flag & KICONV_REPLACE_INVALID) { 586 KICONV_JA_PUTU(KICONV_JA_REPLACE); 587 rv++; 588 } else { 589 KICONV_JA_RETERROR(EILSEQ) 590 } 591 } 592 } else if (ic1 == SS3) { /* JIS X 0212 or UDC; 3 bytes */ 593 if (flag & KICONV_REPLACE_INVALID) { 594 KICONV_JA_NGET_REP_FR_MB(ic2); 595 } else { 596 KICONV_JA_NGET(ic2); 597 } 598 if (KICONV_JA_ISCS3(ic2)) { /* 2nd byte check passed */ 599 if (flag & KICONV_REPLACE_INVALID) { 600 KICONV_JA_NGET_REP_FR_MB(ic3); 601 } else { 602 KICONV_JA_NGET(ic3); 603 } 604 if (KICONV_JA_ISCS3(ic3)) { 605 /* 3rd byte check passed */ 606 ic2 &= KICONV_JA_CMASK; 607 ic3 &= KICONV_JA_CMASK; 608 KICONV_JA_CNV_JIS0212MS_TO_U2(id, u32, 609 ic2, ic3); 610 if (u32 == KICONV_JA_NODEST) { 611 index = ((ic2 - 0x21) * 94 + 612 (ic3 - 0x21)); 613 u32 = kiconv_ja_jisx0212_to_ucs2 614 [index]; 615 } 616 if (u32 == KICONV_JA_REPLACE) 617 rv++; 618 KICONV_JA_PUTU(u32); 619 } else { /* 3rd byte check failed */ 620 if (flag & KICONV_REPLACE_INVALID) { 621 KICONV_JA_PUTU( 622 KICONV_JA_REPLACE); 623 rv++; 624 } else { 625 KICONV_JA_RETERROR(EILSEQ) 626 } 627 } 628 } else { /* 2nd byte check failed */ 629 if (flag & KICONV_REPLACE_INVALID) { 630 KICONV_JA_PUTU(KICONV_JA_REPLACE); 631 rv++; 632 } else { 633 KICONV_JA_RETERROR(EILSEQ) 634 } 635 } 636 } else if (KICONV_JA_ISC1CTRLEUC(ic1)) { 637 /* C1 control; 1 byte */ 638 u32 = ic1; 639 KICONV_JA_PUTU(u32); 640 } else { /* 1st byte check failed */ 641 if (flag & KICONV_REPLACE_INVALID) { 642 KICONV_JA_PUTU(KICONV_JA_REPLACE); 643 rv++; 644 } else { 645 KICONV_JA_RETERROR(EILSEQ) 646 } 647 } 648 649 next: 650 /* 651 * One character successfully converted so update 652 * values outside of this function's stack. 653 */ 654 *inbytesleft = ileft; 655 *outbytesleft = oleft; 656 } 657 658 ret: 659 return (rv); 660 } 661 662 static size_t 663 _do_kiconvstr_to_eucjp(char *inbuf, size_t *inbytesleft, char *outbuf, 664 size_t *outbytesleft, int flag, int *errno, uint8_t id) 665 { 666 uchar_t ic; 667 size_t rv = 0; 668 uint_t ucs4; 669 ushort_t euc16; 670 671 uchar_t *ip; 672 size_t ileft; 673 char *op; 674 size_t oleft; 675 size_t read_len; 676 677 boolean_t do_not_ignore_null; 678 679 if ((inbuf == NULL) || (*inbuf == NULL)) { 680 return (0); 681 } 682 683 ip = (uchar_t *)inbuf; 684 ileft = *inbytesleft; 685 op = outbuf; 686 oleft = *outbytesleft; 687 688 KICONV_JA_CHECK_UTF8_BOM_WITHOUT_STATE(ip, ileft); 689 690 do_not_ignore_null = ((flag & KICONV_IGNORE_NULL) == 0); 691 692 while (ileft != 0) { 693 KICONV_JA_GETU(&ucs4, flag); 694 695 if (ucs4 == 0x0 && do_not_ignore_null) { 696 return (0); 697 } 698 699 if (ucs4 > 0xffff) { 700 /* non-BMP */ 701 KICONV_JA_NPUT(KICONV_JA_DEF_SINGLE); 702 rv++; 703 goto next; 704 } 705 706 KICONV_JA_CNV_U2_TO_EUCJPMS(id, euc16, ucs4); 707 if (euc16 == KICONV_JA_NODEST) { 708 euc16 = kiconv_ja_ucs2_to_euc16((ushort_t)ucs4); 709 } 710 if (euc16 == KICONV_JA_NODEST) { 711 KICONV_JA_NPUT(KICONV_JA_DEF_SINGLE); 712 rv++; 713 goto next; 714 } 715 716 switch (euc16 & 0x8080) { 717 case 0x0000: /* CS0 */ 718 ic = (uchar_t)euc16; 719 KICONV_JA_NPUT(ic); 720 break; 721 case 0x8080: /* CS1 */ 722 ic = (uchar_t)((euc16 >> 8) & 0xff); 723 KICONV_JA_NPUT(ic); 724 ic = (uchar_t)(euc16 & 0xff); 725 KICONV_JA_NPUT(ic); 726 break; 727 case 0x0080: /* CS2 */ 728 KICONV_JA_NPUT(SS2); 729 ic = (uchar_t)euc16; 730 KICONV_JA_NPUT(ic); 731 break; 732 case 0x8000: /* CS3 */ 733 KICONV_JA_NPUT(SS3); 734 ic = (uchar_t)((euc16 >> 8) & 0xff); 735 KICONV_JA_NPUT(ic); 736 ic = (uchar_t)(euc16 & KICONV_JA_CMASK); 737 KICONV_JA_NPUT(ic | KICONV_JA_CMSB); 738 break; 739 } 740 next: 741 /* 742 * One character successfully converted so update 743 * values outside of this function's stack. 744 */ 745 *inbytesleft = ileft; 746 *outbytesleft = oleft; 747 } 748 749 ret: 750 return (rv); 751 } 752 753 static size_t 754 kiconv_fr_eucjp(void *kcd, char **inbuf, size_t *inbytesleft, 755 char **outbuf, size_t *outbytesleft, int *errno) 756 { 757 if (! kcd || kcd == (void *)-1) { 758 *errno = EBADF; 759 return ((size_t)-1); 760 } 761 762 return (_do_kiconv_fr_eucjp(kcd, inbuf, inbytesleft, 763 outbuf, outbytesleft, errno)); 764 } 765 766 static size_t 767 kiconv_to_eucjp(void *kcd, char **inbuf, size_t *inbytesleft, 768 char **outbuf, size_t *outbytesleft, int *errno) 769 { 770 if (! kcd || kcd == (void *)-1) { 771 *errno = EBADF; 772 return ((size_t)-1); 773 } 774 775 return (_do_kiconv_to_eucjp(kcd, inbuf, inbytesleft, 776 outbuf, outbytesleft, errno)); 777 } 778 779 static size_t 780 kiconvstr_fr_eucjp(char *inbuf, size_t *inbytesleft, char *outbuf, 781 size_t *outbytesleft, int flag, int *errno) 782 { 783 return (_do_kiconvstr_fr_eucjp(inbuf, inbytesleft, outbuf, 784 outbytesleft, flag, errno, KICONV_JA_TBLID_EUCJP)); 785 } 786 787 static size_t 788 kiconvstr_to_eucjp(char *inbuf, size_t *inbytesleft, char *outbuf, 789 size_t *outbytesleft, int flag, int *errno) 790 { 791 return (_do_kiconvstr_to_eucjp(inbuf, inbytesleft, outbuf, 792 outbytesleft, flag, errno, KICONV_JA_TBLID_EUCJP)); 793 } 794 795 static size_t 796 kiconvstr_fr_eucjpms(char *inbuf, size_t *inbytesleft, char *outbuf, 797 size_t *outbytesleft, int flag, int *errno) 798 { 799 return (_do_kiconvstr_fr_eucjp(inbuf, inbytesleft, outbuf, 800 outbytesleft, flag, errno, KICONV_JA_TBLID_EUCJP_MS)); 801 } 802 803 static size_t 804 kiconvstr_to_eucjpms(char *inbuf, size_t *inbytesleft, char *outbuf, 805 size_t *outbytesleft, int flag, int *errno) 806 { 807 return (_do_kiconvstr_to_eucjp(inbuf, inbytesleft, outbuf, 808 outbytesleft, flag, errno, KICONV_JA_TBLID_EUCJP_MS)); 809 } 810 811 static size_t 812 _do_kiconv_fr_sjis(void *kcd, char **inbuf, size_t *inbytesleft, 813 char **outbuf, size_t *outbytesleft, int *errno) 814 { 815 uint_t uni; /* UTF-32 */ 816 uint_t index; /* index for table lookup */ 817 uchar_t ic1, ic2; /* 1st and 2nd bytes of a char */ 818 size_t rv = 0; /* return value of this function */ 819 820 uchar_t *ip; 821 size_t ileft; 822 char *op; 823 size_t oleft; 824 size_t id = ((kiconv_state_t)kcd)->id; 825 826 if ((inbuf == NULL) || (*inbuf == NULL)) { 827 return (0); 828 } 829 830 ip = (uchar_t *)*inbuf; 831 ileft = *inbytesleft; 832 op = *outbuf; 833 oleft = *outbytesleft; 834 835 while (ileft != 0) { 836 KICONV_JA_NGET(ic1); /* get 1st byte */ 837 838 if (KICONV_JA_ISASC((int)ic1)) { /* ASCII; 1 byte */ 839 uni = kiconv_ja_jisx0201roman_to_ucs2[ic1]; 840 KICONV_JA_PUTU(uni); 841 } else if (KICONV_JA_ISSJKANA(ic1)) { /* 0201 Kana; 1byte */ 842 uni = kiconv_ja_jisx0201kana_to_ucs2[(ic1 - 0xa1)]; 843 KICONV_JA_PUTU(uni); 844 } else if (KICONV_JA_ISSJKANJI1(ic1)) { /* 0208/UDC; 2bytes */ 845 KICONV_JA_NGET(ic2); 846 if (KICONV_JA_ISSJKANJI2(ic2)) { 847 ic1 = kiconv_ja_sjtojis1[(ic1 - 0x80)]; 848 if (ic2 >= 0x9f) { 849 ic1++; 850 } 851 ic2 = kiconv_ja_sjtojis2[ic2]; 852 KICONV_JA_CNV_JISMS_TO_U2(id, uni, ic1, ic2); 853 if (uni == KICONV_JA_NODEST) { 854 index = ((ic1 - 0x21) * 94) 855 + (ic2 - 0x21); 856 uni = kiconv_ja_jisx0208_to_ucs2[index]; 857 } 858 if (uni == KICONV_JA_REPLACE) 859 rv++; 860 KICONV_JA_PUTU(uni); 861 } else { /* 2nd byte check failed */ 862 KICONV_JA_RETERROR(EILSEQ) 863 /* NOTREACHED */ 864 } 865 } else if (KICONV_JA_ISSJSUPKANJI1(ic1)) { /* VDC, 2 bytes */ 866 KICONV_JA_NGET(ic2); 867 if (KICONV_JA_ISSJKANJI2(ic2)) { 868 ic1 = kiconv_ja_sjtojis1[(ic1 - 0x80)]; 869 if (ic2 >= 0x9f) { 870 ic1++; 871 } 872 index = ((ic1 - 0x21) * 94) 873 + (kiconv_ja_sjtojis2[ic2] - 0x21); 874 uni = kiconv_ja_jisx0212_to_ucs2[index]; 875 if (uni == KICONV_JA_REPLACE) 876 rv++; 877 KICONV_JA_PUTU(uni); 878 } else { /* 2nd byte check failed */ 879 KICONV_JA_RETERROR(EILSEQ) 880 } 881 } else if (KICONV_JA_ISSJIBM(ic1) || /* Extended IBM area */ 882 KICONV_JA_ISSJNECIBM(ic1)) { /* NEC/IBM area */ 883 /* 884 * We need a special treatment for each codes. 885 * By adding some offset number for them, we 886 * can process them as the same way of that of 887 * extended IBM chars. 888 */ 889 KICONV_JA_NGET(ic2); 890 if (KICONV_JA_ISSJKANJI2(ic2)) { 891 ushort_t dest, upper, lower; 892 dest = (ic1 << 8) + ic2; 893 if ((0xed40 <= dest) && (dest <= 0xeffc)) { 894 KICONV_JA_REMAP_NEC(dest); 895 if (dest == 0xffff) { 896 KICONV_JA_RETERROR(EILSEQ) 897 } 898 } 899 /* 900 * XXX: 0xfa54 and 0xfa5b must be mapped 901 * to JIS0208 area. Therefore we 902 * have to do special treatment. 903 */ 904 if ((dest == 0xfa54) || (dest == 0xfa5b)) { 905 if (dest == 0xfa54) { 906 upper = 0x22; 907 lower = 0x4c; 908 } else { 909 upper = 0x22; 910 lower = 0x68; 911 } 912 KICONV_JA_CNV_JISMS_TO_U2(id, uni, 913 upper, lower); 914 if (uni == KICONV_JA_NODEST) { 915 index = (uint_t)((upper - 0x21) 916 * 94 + (lower - 0x21)); 917 uni = kiconv_ja_jisx0208_to_ucs2 918 [index]; 919 } 920 if (uni == KICONV_JA_REPLACE) 921 rv++; 922 KICONV_JA_PUTU(uni); 923 } else { 924 dest = dest - 0xfa40 - 925 (((dest>>8) - 0xfa) * 0x40); 926 dest = kiconv_ja_sjtoibmext[dest]; 927 if (dest == 0xffff) { 928 KICONV_JA_RETERROR(EILSEQ) 929 } 930 upper = (dest >> 8) & KICONV_JA_CMASK; 931 lower = dest & KICONV_JA_CMASK; 932 KICONV_JA_CNV_JIS0212MS_TO_U2(id, uni, 933 upper, lower); 934 if (uni == KICONV_JA_NODEST) { 935 index = (uint_t)((upper - 0x21) 936 * 94 + (lower - 0x21)); 937 uni = kiconv_ja_jisx0212_to_ucs2 938 [index]; 939 } 940 if (uni == KICONV_JA_REPLACE) 941 rv++; 942 KICONV_JA_PUTU(uni); 943 } 944 } else { /* 2nd byte check failed */ 945 KICONV_JA_RETERROR(EILSEQ) 946 } 947 } else if ((0xeb <= ic1) && (ic1 <= 0xec)) { 948 /* 949 * Based on the draft convention of OSF-JVC CDEWG, 950 * characters in this area will be mapped to 951 * "CHIKAN-MOJI." (convertible character) 952 * We use U+FFFD in this case. 953 */ 954 KICONV_JA_NGET(ic2); 955 if (KICONV_JA_ISSJKANJI2(ic2)) { 956 uni = 0xfffd; 957 KICONV_JA_PUTU(uni); 958 } else { /* 2nd byte check failed */ 959 KICONV_JA_RETERROR(EILSEQ) 960 } 961 } else { /* 1st byte check failed */ 962 KICONV_JA_RETERROR(EILSEQ) 963 } 964 965 /* 966 * One character successfully converted so update 967 * values outside of this function's stack. 968 */ 969 *inbuf = (char *)ip; 970 *inbytesleft = ileft; 971 *outbuf = op; 972 *outbytesleft = oleft; 973 } 974 975 ret: 976 return (rv); 977 } 978 979 /* 980 * _kiconv_ja_lookuptbl() 981 * Return the index number if its index-ed number 982 * is the same as dest value. 983 */ 984 static ushort_t 985 _kiconv_ja_lookuptbl(ushort_t dest) 986 { 987 ushort_t tmp; 988 int i; 989 int sz = (sizeof (kiconv_ja_sjtoibmext) / 990 sizeof (kiconv_ja_sjtoibmext[0])); 991 992 for (i = 0; i < sz; i++) { 993 tmp = (kiconv_ja_sjtoibmext[i] & 0x7f7f); 994 if (tmp == dest) 995 return ((i + 0xfa40 + ((i / 0xc0) * 0x40))); 996 } 997 return (0x3f); 998 } 999 1000 static size_t 1001 _do_kiconv_to_sjis(void *kcd, char **inbuf, size_t *inbytesleft, 1002 char **outbuf, size_t *outbytesleft, int *errno) 1003 { 1004 uchar_t ic; 1005 size_t rv = 0; 1006 uint_t ucs4; 1007 ushort_t euc16; 1008 ushort_t dest; 1009 1010 uchar_t *ip; 1011 size_t ileft; 1012 char *op; 1013 size_t oleft; 1014 size_t read_len; 1015 1016 size_t id = ((kiconv_state_t)kcd)->id; 1017 1018 if ((inbuf == NULL) || (*inbuf == NULL)) { 1019 return (0); 1020 } 1021 1022 ip = (uchar_t *)*inbuf; 1023 ileft = *inbytesleft; 1024 op = *outbuf; 1025 oleft = *outbytesleft; 1026 1027 KICONV_JA_CHECK_UTF8_BOM(ip, ileft); 1028 1029 while (ileft != 0) { 1030 KICONV_JA_GETU(&ucs4, 0); 1031 1032 if (ucs4 > 0xffff) { 1033 /* non-BMP */ 1034 KICONV_JA_NPUT(KICONV_JA_DEF_SINGLE); 1035 rv++; 1036 goto next; 1037 } 1038 1039 KICONV_JA_CNV_U2_TO_EUCJPMS(id, euc16, ucs4); 1040 if (euc16 == KICONV_JA_NODEST) { 1041 euc16 = kiconv_ja_ucs2_to_euc16((ushort_t)ucs4); 1042 } 1043 if (euc16 == KICONV_JA_NODEST) { 1044 KICONV_JA_NPUT(KICONV_JA_DEF_SINGLE); 1045 rv++; 1046 goto next; 1047 } 1048 1049 switch (euc16 & 0x8080) { 1050 case 0x0000: /* CS0 */ 1051 if (KICONV_JA_ISC1CTRL((uchar_t)euc16)) { 1052 KICONV_JA_NPUT(KICONV_JA_DEF_SINGLE); 1053 rv++; 1054 } else { 1055 ic = (uchar_t)euc16; 1056 KICONV_JA_NPUT(ic); 1057 } 1058 break; 1059 case 0x8080: /* CS1 */ 1060 ic = (ushort_t)((euc16 >> 8) & KICONV_JA_CMASK); 1061 KICONV_JA_NPUT(kiconv_ja_jis208tosj1[ic]); 1062 /* 1063 * for even number row (Ku), add 0x80 to 1064 * look latter half of kiconv_ja_jistosj2[] array 1065 */ 1066 ic = (uchar_t)((euc16 & KICONV_JA_CMASK) 1067 + (((ic % 2) == 0) ? 0x80 : 0x00)); 1068 KICONV_JA_NPUT(kiconv_ja_jistosj2[ic]); 1069 break; 1070 case 0x0080: /* CS2 */ 1071 ic = (uchar_t)euc16; 1072 KICONV_JA_NPUT(ic); 1073 break; 1074 case 0x8000: /* CS3 */ 1075 ic = (ushort_t)((euc16 >> 8) & KICONV_JA_CMASK); 1076 if (euc16 == 0xa271) { 1077 /* NUMERO SIGN */ 1078 KICONV_JA_NPUT(0x87); 1079 KICONV_JA_NPUT(0x82); 1080 } else if (ic < 0x75) { /* check if IBM VDC */ 1081 dest = _kiconv_ja_lookuptbl(euc16 & 0x7f7f); 1082 if (dest == 0xffff) { 1083 KICONV_JA_NPUT(KICONV_JA_DEF_SINGLE); 1084 } else { 1085 /* avoid putting NUL ('\0') */ 1086 if (dest > 0xff) { 1087 KICONV_JA_NPUT( 1088 (dest >> 8) & 0xff); 1089 KICONV_JA_NPUT(dest & 0xff); 1090 } else { 1091 KICONV_JA_NPUT(dest & 0xff); 1092 } 1093 } 1094 } else { 1095 KICONV_JA_NPUT(kiconv_ja_jis212tosj1[ic]); 1096 /* 1097 * for even number row (Ku), add 0x80 to 1098 * look latter half of kiconv_ja_jistosj2[] 1099 */ 1100 ic = (ushort_t)((euc16 & KICONV_JA_CMASK) 1101 + (((ic % 2) == 0) ? 0x80 : 0x00)); 1102 KICONV_JA_NPUT(kiconv_ja_jistosj2[ic]); 1103 } 1104 break; 1105 } 1106 1107 next: 1108 /* 1109 * One character successfully converted so update 1110 * values outside of this function's stack. 1111 */ 1112 *inbuf = (char *)ip; 1113 *inbytesleft = ileft; 1114 *outbuf = op; 1115 *outbytesleft = oleft; 1116 } 1117 1118 ret: 1119 return (rv); 1120 } 1121 1122 static size_t 1123 _do_kiconvstr_fr_sjis(char *inbuf, size_t *inbytesleft, char *outbuf, 1124 size_t *outbytesleft, int flag, int *errno, uint8_t id) 1125 { 1126 uint_t uni; /* UTF-32 */ 1127 uint_t index; /* index for table lookup */ 1128 uchar_t ic1, ic2; /* 1st and 2nd bytes of a char */ 1129 size_t rv = 0; /* return value of this function */ 1130 1131 uchar_t *ip; 1132 size_t ileft; 1133 char *op; 1134 size_t oleft; 1135 1136 boolean_t do_not_ignore_null; 1137 1138 if ((inbuf == NULL) || (*inbuf == NULL)) { 1139 return (0); 1140 } 1141 1142 ip = (uchar_t *)inbuf; 1143 ileft = *inbytesleft; 1144 op = outbuf; 1145 oleft = *outbytesleft; 1146 1147 do_not_ignore_null = ((flag & KICONV_IGNORE_NULL) == 0); 1148 1149 while (ileft != 0) { 1150 KICONV_JA_NGET(ic1); /* get 1st byte */ 1151 1152 if (KICONV_JA_ISASC((int)ic1)) { /* ASCII; 1 byte */ 1153 if (ic1 == '\0' && do_not_ignore_null) { 1154 return (0); 1155 } 1156 uni = kiconv_ja_jisx0201roman_to_ucs2[ic1]; 1157 KICONV_JA_PUTU(uni); 1158 } else if (KICONV_JA_ISSJKANA(ic1)) { 1159 /* JIS X 0201 Kana; 1 byte */ 1160 uni = kiconv_ja_jisx0201kana_to_ucs2[(ic1 - 0xa1)]; 1161 KICONV_JA_PUTU(uni); 1162 } else if (KICONV_JA_ISSJKANJI1(ic1)) { 1163 /* JIS X 0208 or UDC; 2 bytes */ 1164 if (flag & KICONV_REPLACE_INVALID) { 1165 KICONV_JA_NGET_REP_FR_MB(ic2); 1166 } else { 1167 KICONV_JA_NGET(ic2); 1168 } 1169 if (KICONV_JA_ISSJKANJI2(ic2)) { 1170 ic1 = kiconv_ja_sjtojis1[(ic1 - 0x80)]; 1171 if (ic2 >= 0x9f) { 1172 ic1++; 1173 } 1174 ic2 = kiconv_ja_sjtojis2[ic2]; 1175 KICONV_JA_CNV_JISMS_TO_U2(id, uni, ic1, ic2); 1176 if (uni == KICONV_JA_NODEST) { 1177 index = ((ic1 - 0x21) * 94) 1178 + (ic2 - 0x21); 1179 uni = kiconv_ja_jisx0208_to_ucs2[index]; 1180 } 1181 if (uni == KICONV_JA_REPLACE) 1182 rv++; 1183 KICONV_JA_PUTU(uni); 1184 } else { /* 2nd byte check failed */ 1185 if (flag & KICONV_REPLACE_INVALID) { 1186 KICONV_JA_PUTU(KICONV_JA_REPLACE); 1187 rv++; 1188 } else { 1189 KICONV_JA_RETERROR(EILSEQ) 1190 } 1191 /* NOTREACHED */ 1192 } 1193 } else if (KICONV_JA_ISSJSUPKANJI1(ic1)) { /* VDC, 2 bytes */ 1194 if (flag & KICONV_REPLACE_INVALID) { 1195 KICONV_JA_NGET_REP_FR_MB(ic2); 1196 } else { 1197 KICONV_JA_NGET(ic2); 1198 } 1199 if (KICONV_JA_ISSJKANJI2(ic2)) { 1200 ic1 = kiconv_ja_sjtojis1[(ic1 - 0x80)]; 1201 if (ic2 >= 0x9f) { 1202 ic1++; 1203 } 1204 index = ((ic1 - 0x21) * 94) 1205 + (kiconv_ja_sjtojis2[ic2] - 0x21); 1206 uni = kiconv_ja_jisx0212_to_ucs2[index]; 1207 if (uni == KICONV_JA_REPLACE) 1208 rv++; 1209 KICONV_JA_PUTU(uni); 1210 } else { /* 2nd byte check failed */ 1211 if (flag & KICONV_REPLACE_INVALID) { 1212 KICONV_JA_PUTU(KICONV_JA_REPLACE); 1213 rv++; 1214 } else { 1215 KICONV_JA_RETERROR(EILSEQ) 1216 } 1217 } 1218 } else if (KICONV_JA_ISSJIBM(ic1) || /* Extended IBM area */ 1219 KICONV_JA_ISSJNECIBM(ic1)) { /* NEC/IBM area */ 1220 /* 1221 * We need a special treatment for each codes. 1222 * By adding some offset number for them, we 1223 * can process them as the same way of that of 1224 * extended IBM chars. 1225 */ 1226 if (flag & KICONV_REPLACE_INVALID) { 1227 KICONV_JA_NGET_REP_FR_MB(ic2); 1228 } else { 1229 KICONV_JA_NGET(ic2); 1230 } 1231 if (KICONV_JA_ISSJKANJI2(ic2)) { 1232 ushort_t dest, upper, lower; 1233 dest = (ic1 << 8) + ic2; 1234 if ((0xed40 <= dest) && (dest <= 0xeffc)) { 1235 KICONV_JA_REMAP_NEC(dest); 1236 if (dest == 0xffff) { 1237 if (flag & 1238 KICONV_REPLACE_INVALID) { 1239 KICONV_JA_PUTU( 1240 KICONV_JA_REPLACE); 1241 rv++; 1242 } else { 1243 KICONV_JA_RETERROR( 1244 EILSEQ) 1245 } 1246 } 1247 } 1248 /* 1249 * XXX: 0xfa54 and 0xfa5b must be mapped 1250 * to JIS0208 area. Therefore we 1251 * have to do special treatment. 1252 */ 1253 if ((dest == 0xfa54) || (dest == 0xfa5b)) { 1254 if (dest == 0xfa54) { 1255 upper = 0x22; 1256 lower = 0x4c; 1257 } else { 1258 upper = 0x22; 1259 lower = 0x68; 1260 } 1261 KICONV_JA_CNV_JISMS_TO_U2(id, uni, 1262 upper, lower); 1263 if (uni == KICONV_JA_NODEST) { 1264 index = (uint_t)((upper - 0x21) 1265 * 94 + (lower - 0x21)); 1266 uni = kiconv_ja_jisx0208_to_ucs2 1267 [index]; 1268 } 1269 if (uni == KICONV_JA_REPLACE) 1270 rv++; 1271 KICONV_JA_PUTU(uni); 1272 } else { 1273 dest = dest - 0xfa40 - 1274 (((dest>>8) - 0xfa) * 0x40); 1275 dest = kiconv_ja_sjtoibmext[dest]; 1276 if (dest == 0xffff) { 1277 if (flag & 1278 KICONV_REPLACE_INVALID) { 1279 KICONV_JA_PUTU( 1280 KICONV_JA_REPLACE); 1281 rv++; 1282 } else { 1283 KICONV_JA_RETERROR( 1284 EILSEQ) 1285 } 1286 } 1287 upper = (dest >> 8) & KICONV_JA_CMASK; 1288 lower = dest & KICONV_JA_CMASK; 1289 KICONV_JA_CNV_JIS0212MS_TO_U2(id, uni, 1290 upper, lower); 1291 if (uni == KICONV_JA_NODEST) { 1292 index = (uint_t)((upper - 0x21) 1293 * 94 + (lower - 0x21)); 1294 uni = kiconv_ja_jisx0212_to_ucs2 1295 [index]; 1296 } 1297 if (uni == KICONV_JA_REPLACE) 1298 rv++; 1299 KICONV_JA_PUTU(uni); 1300 } 1301 } else { /* 2nd byte check failed */ 1302 if (flag & KICONV_REPLACE_INVALID) { 1303 KICONV_JA_PUTU(KICONV_JA_REPLACE); 1304 rv++; 1305 } else { 1306 KICONV_JA_RETERROR(EILSEQ) 1307 } 1308 } 1309 } else if ((0xeb <= ic1) && (ic1 <= 0xec)) { 1310 /* 1311 * Based on the draft convention of OSF-JVC CDEWG, 1312 * characters in this area will be mapped to 1313 * "CHIKAN-MOJI." (convertible character) 1314 * We use U+FFFD in this case. 1315 */ 1316 if (flag & KICONV_REPLACE_INVALID) { 1317 KICONV_JA_NGET_REP_FR_MB(ic2); 1318 } else { 1319 KICONV_JA_NGET(ic2); 1320 } 1321 if (KICONV_JA_ISSJKANJI2(ic2)) { 1322 uni = 0xfffd; 1323 KICONV_JA_PUTU(uni); 1324 } else { /* 2nd byte check failed */ 1325 if (flag & KICONV_REPLACE_INVALID) { 1326 KICONV_JA_PUTU(KICONV_JA_REPLACE); 1327 rv++; 1328 } else { 1329 KICONV_JA_RETERROR(EILSEQ) 1330 } 1331 } 1332 } else { /* 1st byte check failed */ 1333 if (flag & KICONV_REPLACE_INVALID) { 1334 KICONV_JA_PUTU(KICONV_JA_REPLACE); 1335 rv++; 1336 } else { 1337 KICONV_JA_RETERROR(EILSEQ) 1338 } 1339 } 1340 1341 next: 1342 /* 1343 * One character successfully converted so update 1344 * values outside of this function's stack. 1345 */ 1346 *inbytesleft = ileft; 1347 *outbytesleft = oleft; 1348 } 1349 1350 ret: 1351 return (rv); 1352 } 1353 1354 static size_t 1355 _do_kiconvstr_to_sjis(char *inbuf, size_t *inbytesleft, char *outbuf, 1356 size_t *outbytesleft, int flag, int *errno, uint8_t id) 1357 { 1358 uchar_t ic; 1359 size_t rv = 0; 1360 uint_t ucs4; 1361 ushort_t euc16; 1362 ushort_t dest; 1363 1364 uchar_t *ip; 1365 size_t ileft; 1366 char *op; 1367 size_t oleft; 1368 size_t read_len; 1369 1370 boolean_t do_not_ignore_null; 1371 1372 if ((inbuf == NULL) || (*inbuf == NULL)) { 1373 return (0); 1374 } 1375 1376 ip = (uchar_t *)inbuf; 1377 ileft = *inbytesleft; 1378 op = outbuf; 1379 oleft = *outbytesleft; 1380 1381 KICONV_JA_CHECK_UTF8_BOM_WITHOUT_STATE(ip, ileft); 1382 1383 do_not_ignore_null = ((flag & KICONV_IGNORE_NULL) == 0); 1384 1385 while (ileft != 0) { 1386 KICONV_JA_GETU(&ucs4, flag); 1387 1388 if (ucs4 == 0x0 && do_not_ignore_null) { 1389 return (0); 1390 } 1391 1392 if (ucs4 > 0xffff) { 1393 /* non-BMP */ 1394 KICONV_JA_NPUT(KICONV_JA_DEF_SINGLE); 1395 rv++; 1396 goto next; 1397 } 1398 1399 KICONV_JA_CNV_U2_TO_EUCJPMS(id, euc16, ucs4); 1400 if (euc16 == KICONV_JA_NODEST) { 1401 euc16 = kiconv_ja_ucs2_to_euc16((ushort_t)ucs4); 1402 } 1403 if (euc16 == KICONV_JA_NODEST) { 1404 KICONV_JA_NPUT(KICONV_JA_DEF_SINGLE); 1405 rv++; 1406 goto next; 1407 } 1408 1409 switch (euc16 & 0x8080) { 1410 case 0x0000: /* CS0 */ 1411 if (KICONV_JA_ISC1CTRL((uchar_t)euc16)) { 1412 KICONV_JA_NPUT(KICONV_JA_DEF_SINGLE); 1413 rv++; 1414 } else { 1415 ic = (uchar_t)euc16; 1416 KICONV_JA_NPUT(ic); 1417 } 1418 break; 1419 case 0x8080: /* CS1 */ 1420 ic = (ushort_t)((euc16 >> 8) & KICONV_JA_CMASK); 1421 KICONV_JA_NPUT(kiconv_ja_jis208tosj1[ic]); 1422 /* 1423 * for even number row (Ku), add 0x80 to 1424 * look latter half of kiconv_ja_jistosj2[] array 1425 */ 1426 ic = (uchar_t)((euc16 & KICONV_JA_CMASK) 1427 + (((ic % 2) == 0) ? 0x80 : 0x00)); 1428 KICONV_JA_NPUT(kiconv_ja_jistosj2[ic]); 1429 break; 1430 case 0x0080: /* CS2 */ 1431 ic = (uchar_t)euc16; 1432 KICONV_JA_NPUT(ic); 1433 break; 1434 case 0x8000: /* CS3 */ 1435 ic = (ushort_t)((euc16 >> 8) & KICONV_JA_CMASK); 1436 if (euc16 == 0xa271) { 1437 /* NUMERO SIGN */ 1438 KICONV_JA_NPUT(0x87); 1439 KICONV_JA_NPUT(0x82); 1440 } else if (ic < 0x75) { /* check if IBM VDC */ 1441 dest = _kiconv_ja_lookuptbl(euc16 & 0x7f7f); 1442 if (dest == 0xffff) { 1443 KICONV_JA_NPUT(KICONV_JA_DEF_SINGLE); 1444 } else { 1445 /* avoid putting NUL ('\0') */ 1446 if (dest > 0xff) { 1447 KICONV_JA_NPUT( 1448 (dest >> 8) & 0xff); 1449 KICONV_JA_NPUT(dest & 0xff); 1450 } else { 1451 KICONV_JA_NPUT(dest & 0xff); 1452 } 1453 } 1454 } else { 1455 KICONV_JA_NPUT(kiconv_ja_jis212tosj1[ic]); 1456 /* 1457 * for even number row (Ku), add 0x80 to 1458 * look latter half of kiconv_ja_jistosj2[] 1459 */ 1460 ic = (ushort_t)((euc16 & KICONV_JA_CMASK) 1461 + (((ic % 2) == 0) ? 0x80 : 0x00)); 1462 KICONV_JA_NPUT(kiconv_ja_jistosj2[ic]); 1463 } 1464 break; 1465 } 1466 1467 next: 1468 /* 1469 * One character successfully converted so update 1470 * values outside of this function's stack. 1471 */ 1472 *inbytesleft = ileft; 1473 *outbytesleft = oleft; 1474 } 1475 1476 ret: 1477 return (rv); 1478 } 1479 1480 static size_t 1481 kiconv_fr_sjis(void *kcd, char **inbuf, size_t *inbytesleft, 1482 char **outbuf, size_t *outbytesleft, int *errno) 1483 { 1484 if (! kcd || kcd == (void *)-1) { 1485 *errno = EBADF; 1486 return ((size_t)-1); 1487 } 1488 1489 return (_do_kiconv_fr_sjis(kcd, inbuf, inbytesleft, 1490 outbuf, outbytesleft, errno)); 1491 } 1492 1493 static size_t 1494 kiconv_to_sjis(void *kcd, char **inbuf, size_t *inbytesleft, 1495 char **outbuf, size_t *outbytesleft, int *errno) 1496 { 1497 if (! kcd || kcd == (void *)-1) { 1498 *errno = EBADF; 1499 return ((size_t)-1); 1500 } 1501 1502 return (_do_kiconv_to_sjis(kcd, inbuf, inbytesleft, 1503 outbuf, outbytesleft, errno)); 1504 } 1505 1506 static size_t 1507 kiconvstr_fr_sjis(char *inbuf, size_t *inbytesleft, char *outbuf, 1508 size_t *outbytesleft, int flag, int *errno) 1509 { 1510 return (_do_kiconvstr_fr_sjis(inbuf, inbytesleft, outbuf, 1511 outbytesleft, flag, errno, KICONV_JA_TBLID_SJIS)); 1512 } 1513 1514 static size_t 1515 kiconvstr_to_sjis(char *inbuf, size_t *inbytesleft, char *outbuf, 1516 size_t *outbytesleft, int flag, int *errno) 1517 { 1518 return (_do_kiconvstr_to_sjis(inbuf, inbytesleft, outbuf, 1519 outbytesleft, flag, errno, KICONV_JA_TBLID_SJIS)); 1520 } 1521 1522 static size_t 1523 kiconvstr_fr_cp932(char *inbuf, size_t *inbytesleft, char *outbuf, 1524 size_t *outbytesleft, int flag, int *errno) 1525 { 1526 return (_do_kiconvstr_fr_sjis(inbuf, inbytesleft, outbuf, 1527 outbytesleft, flag, errno, KICONV_JA_TBLID_CP932)); 1528 } 1529 1530 static size_t 1531 kiconvstr_to_cp932(char *inbuf, size_t *inbytesleft, char *outbuf, 1532 size_t *outbytesleft, int flag, int *errno) 1533 { 1534 return (_do_kiconvstr_to_sjis(inbuf, inbytesleft, outbuf, 1535 outbytesleft, flag, errno, KICONV_JA_TBLID_CP932)); 1536 } 1537 1538 static kiconv_ops_t kiconv_ja_ops_tbl[] = { 1539 { 1540 "eucjp", "utf-8", open_eucjp, 1541 kiconv_to_eucjp, close_ja, kiconvstr_to_eucjp 1542 }, 1543 { 1544 "utf-8", "eucjp", open_eucjp, 1545 kiconv_fr_eucjp, close_ja, kiconvstr_fr_eucjp 1546 }, 1547 { 1548 "eucjpms", "utf-8", open_eucjpms, 1549 kiconv_to_eucjp, close_ja, kiconvstr_to_eucjpms 1550 }, 1551 { 1552 "utf-8", "eucjpms", open_eucjpms, 1553 kiconv_fr_eucjp, close_ja, kiconvstr_fr_eucjpms 1554 }, 1555 { 1556 "sjis", "utf-8", open_sjis, 1557 kiconv_to_sjis, close_ja, kiconvstr_to_sjis 1558 }, 1559 { 1560 "utf-8", "sjis", open_sjis, 1561 kiconv_fr_sjis, close_ja, kiconvstr_fr_sjis 1562 }, 1563 { 1564 "cp932", "utf-8", open_cp932, 1565 kiconv_to_sjis, close_ja, kiconvstr_to_cp932 1566 }, 1567 { 1568 "utf-8", "cp932", open_cp932, 1569 kiconv_fr_sjis, close_ja, kiconvstr_fr_cp932 1570 } 1571 }; 1572 1573 static char *kiconv_ja_aliases[] = {"932", "shiftjis", "pck"}; 1574 static char *kiconv_ja_canonicals[] = {"cp932", "sjis", "sjis"}; 1575 1576 #define KICONV_JA_MAX_JA_OPS \ 1577 (sizeof (kiconv_ja_ops_tbl) / sizeof (kiconv_ops_t)) 1578 #define KICONV_JA_MAX_JA_ALIAS \ 1579 (sizeof (kiconv_ja_aliases) / sizeof (char *)) 1580 1581 static kiconv_module_info_t kiconv_ja_info = { 1582 "kiconv_ja", /* module name */ 1583 KICONV_JA_MAX_JA_OPS, /* number of conversion in kiconv_ja */ 1584 kiconv_ja_ops_tbl, /* kiconv_ja ops table */ 1585 KICONV_JA_MAX_JA_ALIAS, /* number of alias in kiconv_ja */ 1586 kiconv_ja_aliases, /* kiconv_ja aliases */ 1587 kiconv_ja_canonicals, /* kiconv_ja canonicals */ 1588 0 1589 }; 1590 1591 static struct modlkiconv modlkiconv_ja = { 1592 &mod_kiconvops, 1593 "kiconv module for Japanese", 1594 &kiconv_ja_info 1595 }; 1596 1597 static struct modlinkage modlinkage = { 1598 MODREV_1, 1599 (void *)&modlkiconv_ja, 1600 NULL 1601 }; 1602 1603 int 1604 _init(void) 1605 { 1606 int err; 1607 1608 err = mod_install(&modlinkage); 1609 if (err) 1610 cmn_err(CE_WARN, "kiconv_ja: failed to load kernel module"); 1611 1612 return (err); 1613 } 1614 1615 int 1616 _info(struct modinfo *modinfop) 1617 { 1618 return (mod_info(&modlinkage, modinfop)); 1619 } 1620 1621 int 1622 _fini(void) 1623 { 1624 int err; 1625 1626 /* 1627 * If this module is being used, then, we cannot remove the module. 1628 * The following checking will catch pretty much all usual cases. 1629 * 1630 * Any remaining will be catached by the kiconv_unregister_module() 1631 * during mod_remove() at below. 1632 */ 1633 if (kiconv_module_ref_count(KICONV_MODULE_ID_JA)) 1634 return (EBUSY); 1635 1636 err = mod_remove(&modlinkage); 1637 if (err) 1638 cmn_err(CE_WARN, "kiconv_ja: failed to remove kernel module"); 1639 1640 return (err); 1641 } 1642