1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #include <sys/types.h> 28 #include <sys/param.h> 29 #include <sys/sysmacros.h> 30 #include <sys/systm.h> 31 #include <sys/debug.h> 32 #include <sys/kmem.h> 33 #include <sys/sunddi.h> 34 #include <sys/byteorder.h> 35 #include <sys/errno.h> 36 #include <sys/euc.h> 37 #include <sys/modctl.h> 38 #include <sys/kiconv.h> 39 40 #include <sys/kiconv_ja.h> 41 #include <sys/kiconv_ja_jis_to_unicode.h> 42 #include <sys/kiconv_ja_unicode_to_jis.h> 43 44 /* 45 * The following vector shows remaining bytes in a UTF-8 character. 46 * Index will be the first byte of the character. This is defined in 47 * u8_textprep.c. 48 */ 49 extern const int8_t u8_number_of_bytes[]; 50 51 /* 52 * The following is a vector of bit-masks to get used bits in 53 * the first byte of a UTF-8 character. Index is remaining bytes at above of 54 * the character. This is defined in uconv.c. 55 */ 56 extern const uchar_t u8_masks_tbl[]; 57 58 /* 59 * The following two vectors are to provide valid minimum and 60 * maximum values for the 2'nd byte of a multibyte UTF-8 character for 61 * better illegal sequence checking. The index value must be the value of 62 * the first byte of the UTF-8 character. These are defined in u8_textprep.c. 63 */ 64 extern const uint8_t u8_valid_min_2nd_byte[]; 65 extern const uint8_t u8_valid_max_2nd_byte[]; 66 67 static kiconv_ja_euc16_t 68 kiconv_ja_ucs2_to_euc16(kiconv_ja_ucs2_t ucs2) 69 { 70 const kiconv_ja_euc16_t *p; 71 72 if ((p = kiconv_ja_ucs2_to_euc16_index[ucs2 >> 8]) != NULL) 73 return (p[ucs2 & 0xff]); 74 75 return (KICONV_JA_NODEST); 76 } 77 78 static size_t 79 utf8_ucs(uint_t *p, uchar_t **pip, size_t *pileft, int *errno) 80 { 81 uint_t l; /* to be copied to *p on successful return */ 82 uchar_t ic; /* current byte */ 83 uchar_t ic1; /* 1st byte */ 84 uchar_t *ip = *pip; /* next byte to read */ 85 size_t ileft = *pileft; /* number of bytes available */ 86 size_t rv = 0; /* return value of this function */ 87 int remaining_bytes; 88 int u8_size; 89 90 KICONV_JA_NGET(ic1); /* read 1st byte */ 91 92 if (ic1 < 0x80) { 93 /* successfully converted */ 94 *p = (uint_t)ic1; 95 goto ret; 96 } 97 98 u8_size = u8_number_of_bytes[ic1]; 99 if (u8_size == U8_ILLEGAL_CHAR) { 100 KICONV_JA_RETERROR(EILSEQ) 101 } else if (u8_size == U8_OUT_OF_RANGE_CHAR) { 102 KICONV_JA_RETERROR(ERANGE) 103 } 104 105 remaining_bytes = u8_size - 1; 106 if (remaining_bytes != 0) { 107 l = ic1 & u8_masks_tbl[remaining_bytes]; 108 109 for (; remaining_bytes > 0; remaining_bytes--) { 110 KICONV_JA_NGET(ic); 111 if (ic1 != 0U) { 112 if ((ic < u8_valid_min_2nd_byte[ic1]) || 113 (ic > u8_valid_max_2nd_byte[ic1])) { 114 KICONV_JA_RETERROR(EILSEQ) 115 } 116 ic1 = 0U; /* 2nd byte check done */ 117 } else { 118 if ((ic < 0x80) || (ic > 0xbf)) { 119 KICONV_JA_RETERROR(EILSEQ) 120 } 121 } 122 l = (l << 6) | (ic & 0x3f); 123 } 124 125 /* successfully converted */ 126 *p = l; 127 } else { 128 KICONV_JA_RETERROR(EILSEQ) 129 } 130 131 ret: 132 if (rv == 0) { 133 /* 134 * Update rv, *pip, and *pileft on successfule return. 135 */ 136 rv = *pileft - ileft; 137 *pip = ip; 138 *pileft = ileft; 139 } 140 141 return (rv); 142 } 143 144 static size_t 145 utf8_ucs_replace(uint_t *p, uchar_t **pip, size_t *pileft, size_t *repnum) 146 { 147 uint_t l; /* to be copied to *p on successful return */ 148 uchar_t ic; /* current byte */ 149 uchar_t ic1; /* 1st byte */ 150 uchar_t *ip = *pip; /* next byte to read */ 151 size_t ileft = *pileft; /* number of bytes available */ 152 size_t rv = 0; /* return value of this function */ 153 int remaining_bytes; 154 int u8_size; 155 156 KICONV_JA_NGET_REP_TO_MB(ic1); /* read 1st byte */ 157 158 if (ic1 < 0x80) { 159 /* successfully converted */ 160 l = (uint_t)ic1; 161 goto ret; 162 } 163 164 u8_size = u8_number_of_bytes[ic1]; 165 if (u8_size == U8_ILLEGAL_CHAR || u8_size == U8_OUT_OF_RANGE_CHAR) { 166 l = KICONV_JA_DEF_SINGLE; 167 (*repnum)++; 168 goto ret; 169 } 170 171 remaining_bytes = u8_size - 1; 172 173 if (remaining_bytes != 0) { 174 l = ic1 & u8_masks_tbl[remaining_bytes]; 175 176 for (; remaining_bytes > 0; remaining_bytes--) { 177 KICONV_JA_NGET_REP_TO_MB(ic); 178 if (ic1 != 0U) { 179 if ((ic < u8_valid_min_2nd_byte[ic1]) || 180 (ic > u8_valid_max_2nd_byte[ic1])) { 181 l = KICONV_JA_DEF_SINGLE; 182 (*repnum)++; 183 ileft -= (remaining_bytes - 1); 184 ip += (remaining_bytes - 1); 185 break; 186 } 187 ic1 = 0U; /* 2nd byte check done */ 188 } else { 189 if ((ic < 0x80) || (ic > 0xbf)) { 190 l = KICONV_JA_DEF_SINGLE; 191 (*repnum)++; 192 ileft -= (remaining_bytes - 1); 193 ip += (remaining_bytes - 1); 194 break; 195 } 196 } 197 l = (l << 6) | (ic & 0x3f); 198 } 199 } else { 200 l = KICONV_JA_DEF_SINGLE; 201 (*repnum)++; 202 } 203 204 ret: 205 /* successfully converted */ 206 *p = l; 207 rv = *pileft - ileft; 208 209 *pip = ip; 210 *pileft = ileft; 211 212 return (rv); 213 } 214 215 static size_t /* return #bytes read, or -1 */ 216 read_unicode( 217 uint_t *p, /* point variable to store UTF-32 */ 218 uchar_t **pip, /* point pointer to input buf */ 219 size_t *pileft, /* point #bytes left in input buf */ 220 int *errno, /* point variable to errno */ 221 int flag, /* kiconvstr flag */ 222 size_t *rv) /* point return valuse */ 223 { 224 if (flag & KICONV_REPLACE_INVALID) 225 return (utf8_ucs_replace(p, pip, pileft, rv)); 226 else 227 return (utf8_ucs(p, pip, pileft, errno)); 228 } 229 230 static size_t 231 write_unicode( 232 uint_t u32, /* UTF-32 to write */ 233 char **pop, /* point pointer to output buf */ 234 size_t *poleft, /* point #bytes left in output buf */ 235 int *errno) /* point variable to errno */ 236 { 237 char *op = *pop; 238 size_t oleft = *poleft; 239 size_t rv = 0; /* return value */ 240 241 if (u32 <= 0x7f) { 242 KICONV_JA_NPUT((uchar_t)(u32)); 243 rv = 1; 244 } else if (u32 <= 0x7ff) { 245 KICONV_JA_NPUT((uchar_t)((((u32)>>6) & 0x1f) | 0xc0)); 246 KICONV_JA_NPUT((uchar_t)(((u32) & 0x3f) | 0x80)); 247 rv = 2; 248 } else if ((u32 >= 0xd800) && (u32 <= 0xdfff)) { 249 KICONV_JA_RETERROR(EILSEQ) 250 } else if (u32 <= 0xffff) { 251 KICONV_JA_NPUT((uchar_t)((((u32)>>12) & 0x0f) | 0xe0)); 252 KICONV_JA_NPUT((uchar_t)((((u32)>>6) & 0x3f) | 0x80)); 253 KICONV_JA_NPUT((uchar_t)(((u32) & 0x3f) | 0x80)); 254 rv = 3; 255 } else if (u32 <= 0x10ffff) { 256 KICONV_JA_NPUT((uchar_t)((((u32)>>18) & 0x07) | 0xf0)); 257 KICONV_JA_NPUT((uchar_t)((((u32)>>12) & 0x3f) | 0x80)); 258 KICONV_JA_NPUT((uchar_t)((((u32)>>6) & 0x3f) | 0x80)); 259 KICONV_JA_NPUT((uchar_t)(((u32) & 0x3f) | 0x80)); 260 rv = 4; 261 } else { 262 KICONV_JA_RETERROR(EILSEQ) 263 } 264 265 ret: 266 if (rv != (size_t)-1) { 267 /* update *pop and *poleft only on successful return */ 268 *pop = op; 269 *poleft = oleft; 270 } 271 272 return (rv); 273 } 274 275 static void * 276 _kiconv_ja_open_unicode(uint8_t id) 277 { 278 kiconv_state_t kcd; 279 280 kcd = (kiconv_state_t)kmem_alloc(sizeof (kiconv_state_data_t), 281 KM_SLEEP); 282 kcd->id = id; 283 kcd->bom_processed = 0; 284 return ((void *)kcd); 285 } 286 287 static void * 288 open_eucjp(void) 289 { 290 return (_kiconv_ja_open_unicode(KICONV_JA_TBLID_EUCJP)); 291 } 292 293 static void * 294 open_eucjpms(void) 295 { 296 return (_kiconv_ja_open_unicode(KICONV_JA_TBLID_EUCJP_MS)); 297 } 298 299 static void * 300 open_sjis(void) 301 { 302 return (_kiconv_ja_open_unicode(KICONV_JA_TBLID_SJIS)); 303 } 304 305 static void * 306 open_cp932(void) 307 { 308 return (_kiconv_ja_open_unicode(KICONV_JA_TBLID_CP932)); 309 } 310 311 int 312 close_ja(void *kcd) 313 { 314 if (! kcd || kcd == (void *)-1) 315 return (EBADF); 316 317 kmem_free(kcd, sizeof (kiconv_state_data_t)); 318 319 return (0); 320 } 321 322 static size_t 323 _do_kiconv_fr_eucjp(void *kcd, char **inbuf, size_t *inbytesleft, 324 char **outbuf, size_t *outbytesleft, int *errno) 325 { 326 uint_t u32; /* UTF-32 */ 327 uint_t index; /* index for table lookup */ 328 uchar_t ic1, ic2, ic3; /* 1st, 2nd, and 3rd bytes of a char */ 329 size_t rv = 0; /* return value of this function */ 330 331 uchar_t *ip; 332 size_t ileft; 333 char *op; 334 size_t oleft; 335 size_t id = ((kiconv_state_t)kcd)->id; 336 337 if ((inbuf == NULL) || (*inbuf == NULL)) { 338 return (0); 339 } 340 341 ip = (uchar_t *)*inbuf; 342 ileft = *inbytesleft; 343 op = *outbuf; 344 oleft = *outbytesleft; 345 346 while (ileft != 0) { 347 KICONV_JA_NGET(ic1); /* get 1st byte */ 348 349 if (KICONV_JA_ISASC(ic1)) { /* ASCII; 1 byte */ 350 u32 = kiconv_ja_jisx0201roman_to_ucs2[ic1]; 351 KICONV_JA_PUTU(u32); 352 } else if (KICONV_JA_ISCS1(ic1)) { /* 0208 or UDC; 2 bytes */ 353 KICONV_JA_NGET(ic2); 354 if (KICONV_JA_ISCS1(ic2)) { /* 2nd byte check passed */ 355 ic1 &= KICONV_JA_CMASK; 356 ic2 &= KICONV_JA_CMASK; 357 KICONV_JA_CNV_JISMS_TO_U2(id, u32, ic1, ic2); 358 if (u32 == KICONV_JA_NODEST) { 359 index = (ic1 - 0x21) * 94 + ic2 - 0x21; 360 u32 = kiconv_ja_jisx0208_to_ucs2[index]; 361 } 362 if (u32 == KICONV_JA_REPLACE) 363 rv++; 364 KICONV_JA_PUTU(u32); 365 } else { /* 2nd byte check failed */ 366 KICONV_JA_RETERROR(EILSEQ) 367 } 368 } else if (ic1 == SS2) { /* JIS X 0201 Kana; 2 bytes */ 369 KICONV_JA_NGET(ic2); 370 if (KICONV_JA_ISCS2(ic2)) { /* 2nd byte check passed */ 371 index = (ic2 - 0xa1); 372 u32 = kiconv_ja_jisx0201kana_to_ucs2[index]; 373 KICONV_JA_PUTU(u32); 374 } else { /* 2nd byte check failed */ 375 KICONV_JA_RETERROR(EILSEQ) 376 } 377 } else if (ic1 == SS3) { /* JIS X 0212 or UDC; 3 bytes */ 378 KICONV_JA_NGET(ic2); 379 if (KICONV_JA_ISCS3(ic2)) { /* 2nd byte check passed */ 380 KICONV_JA_NGET(ic3); 381 if (KICONV_JA_ISCS3(ic3)) { 382 /* 3rd byte check passed */ 383 ic2 &= KICONV_JA_CMASK; 384 ic3 &= KICONV_JA_CMASK; 385 KICONV_JA_CNV_JIS0212MS_TO_U2(id, u32, 386 ic2, ic3); 387 if (u32 == KICONV_JA_NODEST) { 388 index = ((ic2 - 0x21) * 94 + 389 (ic3 - 0x21)); 390 u32 = kiconv_ja_jisx0212_to_ucs2 391 [index]; 392 } 393 if (u32 == KICONV_JA_REPLACE) 394 rv++; 395 KICONV_JA_PUTU(u32); 396 } else { /* 3rd byte check failed */ 397 KICONV_JA_RETERROR(EILSEQ) 398 } 399 } else { /* 2nd byte check failed */ 400 KICONV_JA_RETERROR(EILSEQ) 401 } 402 } else if (KICONV_JA_ISC1CTRLEUC(ic1)) { 403 /* C1 control; 1 byte */ 404 u32 = ic1; 405 KICONV_JA_PUTU(u32); 406 } else { /* 1st byte check failed */ 407 KICONV_JA_RETERROR(EILSEQ) 408 } 409 410 /* 411 * One character successfully converted so update 412 * values outside of this function's stack. 413 */ 414 *inbuf = (char *)ip; 415 *inbytesleft = ileft; 416 *outbuf = op; 417 *outbytesleft = oleft; 418 } 419 420 ret: 421 return (rv); 422 } 423 424 static size_t 425 _do_kiconv_to_eucjp(void *kcd, char **inbuf, size_t *inbytesleft, 426 char **outbuf, size_t *outbytesleft, int *errno) 427 { 428 uchar_t ic; 429 size_t rv = 0; 430 uint_t ucs4; 431 ushort_t euc16; 432 433 uchar_t *ip; 434 size_t ileft; 435 char *op; 436 size_t oleft; 437 size_t read_len; 438 439 size_t id = ((kiconv_state_t)kcd)->id; 440 441 if ((inbuf == NULL) || (*inbuf == NULL)) { 442 return (0); 443 } 444 445 ip = (uchar_t *)*inbuf; 446 ileft = *inbytesleft; 447 op = *outbuf; 448 oleft = *outbytesleft; 449 450 KICONV_JA_CHECK_UTF8_BOM(ip, ileft); 451 452 while (ileft != 0) { 453 KICONV_JA_GETU(&ucs4, 0); 454 455 if (ucs4 > 0xffff) { 456 /* non-BMP */ 457 KICONV_JA_NPUT(KICONV_JA_DEF_SINGLE); 458 rv++; 459 goto next; 460 } 461 462 KICONV_JA_CNV_U2_TO_EUCJPMS(id, euc16, ucs4); 463 if (euc16 == KICONV_JA_NODEST) { 464 euc16 = kiconv_ja_ucs2_to_euc16((ushort_t)ucs4); 465 } 466 if (euc16 == KICONV_JA_NODEST) { 467 KICONV_JA_NPUT(KICONV_JA_DEF_SINGLE); 468 rv++; 469 goto next; 470 } 471 472 switch (euc16 & 0x8080) { 473 case 0x0000: /* CS0 */ 474 ic = (uchar_t)euc16; 475 KICONV_JA_NPUT(ic); 476 break; 477 case 0x8080: /* CS1 */ 478 ic = (uchar_t)((euc16 >> 8) & 0xff); 479 KICONV_JA_NPUT(ic); 480 ic = (uchar_t)(euc16 & 0xff); 481 KICONV_JA_NPUT(ic); 482 break; 483 case 0x0080: /* CS2 */ 484 KICONV_JA_NPUT(SS2); 485 ic = (uchar_t)euc16; 486 KICONV_JA_NPUT(ic); 487 break; 488 case 0x8000: /* CS3 */ 489 KICONV_JA_NPUT(SS3); 490 ic = (uchar_t)((euc16 >> 8) & 0xff); 491 KICONV_JA_NPUT(ic); 492 ic = (uchar_t)(euc16 & KICONV_JA_CMASK); 493 KICONV_JA_NPUT(ic | KICONV_JA_CMSB); 494 break; 495 } 496 next: 497 /* 498 * One character successfully converted so update 499 * values outside of this function's stack. 500 */ 501 *inbuf = (char *)ip; 502 *inbytesleft = ileft; 503 *outbuf = op; 504 *outbytesleft = oleft; 505 } 506 507 ret: 508 return (rv); 509 } 510 511 static size_t 512 _do_kiconvstr_fr_eucjp(char *inbuf, size_t *inbytesleft, char *outbuf, 513 size_t *outbytesleft, int flag, int *errno, uint8_t id) 514 { 515 uint_t u32; /* UTF-32 */ 516 uint_t index; /* index for table lookup */ 517 uchar_t ic1, ic2, ic3; /* 1st, 2nd, and 3rd bytes of a char */ 518 size_t rv = 0; /* return value of this function */ 519 520 uchar_t *ip; 521 size_t ileft; 522 char *op; 523 size_t oleft; 524 525 boolean_t do_not_ignore_null; 526 527 if ((inbuf == NULL) || (*inbuf == '\0')) { 528 return (0); 529 } 530 531 ip = (uchar_t *)inbuf; 532 ileft = *inbytesleft; 533 op = outbuf; 534 oleft = *outbytesleft; 535 536 do_not_ignore_null = ((flag & KICONV_IGNORE_NULL) == 0); 537 538 while (ileft != 0) { 539 KICONV_JA_NGET(ic1); /* get 1st byte */ 540 541 if (KICONV_JA_ISASC(ic1)) { /* ASCII; 1 byte */ 542 if (ic1 == '\0' && do_not_ignore_null) { 543 return (0); 544 } 545 u32 = kiconv_ja_jisx0201roman_to_ucs2[ic1]; 546 KICONV_JA_PUTU(u32); 547 } else if (KICONV_JA_ISCS1(ic1)) { /* 0208 or UDC; 2 bytes */ 548 if (flag & KICONV_REPLACE_INVALID) { 549 KICONV_JA_NGET_REP_FR_MB(ic2); 550 } else { 551 KICONV_JA_NGET(ic2); 552 } 553 if (KICONV_JA_ISCS1(ic2)) { /* 2nd byte check passed */ 554 ic1 &= KICONV_JA_CMASK; 555 ic2 &= KICONV_JA_CMASK; 556 KICONV_JA_CNV_JISMS_TO_U2(id, u32, ic1, ic2); 557 if (u32 == KICONV_JA_NODEST) { 558 index = (ic1 - 0x21) * 94 + ic2 - 0x21; 559 u32 = kiconv_ja_jisx0208_to_ucs2[index]; 560 } 561 if (u32 == KICONV_JA_REPLACE) 562 rv++; 563 KICONV_JA_PUTU(u32); 564 } else { /* 2nd byte check failed */ 565 if (flag & KICONV_REPLACE_INVALID) { 566 KICONV_JA_PUTU(KICONV_JA_REPLACE); 567 rv++; 568 } else { 569 KICONV_JA_RETERROR(EILSEQ) 570 } 571 } 572 } else if (ic1 == SS2) { /* JIS X 0201 Kana; 2bytes */ 573 if (flag & KICONV_REPLACE_INVALID) { 574 KICONV_JA_NGET_REP_FR_MB(ic2); 575 } else { 576 KICONV_JA_NGET(ic2); 577 } 578 if (KICONV_JA_ISCS2(ic2)) { /* 2nd byte check passed */ 579 index = (ic2 - 0xa1); 580 u32 = kiconv_ja_jisx0201kana_to_ucs2[index]; 581 KICONV_JA_PUTU(u32); 582 } else { /* 2nd byte check failed */ 583 if (flag & KICONV_REPLACE_INVALID) { 584 KICONV_JA_PUTU(KICONV_JA_REPLACE); 585 rv++; 586 } else { 587 KICONV_JA_RETERROR(EILSEQ) 588 } 589 } 590 } else if (ic1 == SS3) { /* JIS X 0212 or UDC; 3 bytes */ 591 if (flag & KICONV_REPLACE_INVALID) { 592 KICONV_JA_NGET_REP_FR_MB(ic2); 593 } else { 594 KICONV_JA_NGET(ic2); 595 } 596 if (KICONV_JA_ISCS3(ic2)) { /* 2nd byte check passed */ 597 if (flag & KICONV_REPLACE_INVALID) { 598 KICONV_JA_NGET_REP_FR_MB(ic3); 599 } else { 600 KICONV_JA_NGET(ic3); 601 } 602 if (KICONV_JA_ISCS3(ic3)) { 603 /* 3rd byte check passed */ 604 ic2 &= KICONV_JA_CMASK; 605 ic3 &= KICONV_JA_CMASK; 606 KICONV_JA_CNV_JIS0212MS_TO_U2(id, u32, 607 ic2, ic3); 608 if (u32 == KICONV_JA_NODEST) { 609 index = ((ic2 - 0x21) * 94 + 610 (ic3 - 0x21)); 611 u32 = kiconv_ja_jisx0212_to_ucs2 612 [index]; 613 } 614 if (u32 == KICONV_JA_REPLACE) 615 rv++; 616 KICONV_JA_PUTU(u32); 617 } else { /* 3rd byte check failed */ 618 if (flag & KICONV_REPLACE_INVALID) { 619 KICONV_JA_PUTU( 620 KICONV_JA_REPLACE); 621 rv++; 622 } else { 623 KICONV_JA_RETERROR(EILSEQ) 624 } 625 } 626 } else { /* 2nd byte check failed */ 627 if (flag & KICONV_REPLACE_INVALID) { 628 KICONV_JA_PUTU(KICONV_JA_REPLACE); 629 rv++; 630 } else { 631 KICONV_JA_RETERROR(EILSEQ) 632 } 633 } 634 } else if (KICONV_JA_ISC1CTRLEUC(ic1)) { 635 /* C1 control; 1 byte */ 636 u32 = ic1; 637 KICONV_JA_PUTU(u32); 638 } else { /* 1st byte check failed */ 639 if (flag & KICONV_REPLACE_INVALID) { 640 KICONV_JA_PUTU(KICONV_JA_REPLACE); 641 rv++; 642 } else { 643 KICONV_JA_RETERROR(EILSEQ) 644 } 645 } 646 647 next: 648 /* 649 * One character successfully converted so update 650 * values outside of this function's stack. 651 */ 652 *inbytesleft = ileft; 653 *outbytesleft = oleft; 654 } 655 656 ret: 657 return (rv); 658 } 659 660 static size_t 661 _do_kiconvstr_to_eucjp(char *inbuf, size_t *inbytesleft, char *outbuf, 662 size_t *outbytesleft, int flag, int *errno, uint8_t id) 663 { 664 uchar_t ic; 665 size_t rv = 0; 666 uint_t ucs4; 667 ushort_t euc16; 668 669 uchar_t *ip; 670 size_t ileft; 671 char *op; 672 size_t oleft; 673 size_t read_len; 674 675 boolean_t do_not_ignore_null; 676 677 if ((inbuf == NULL) || (*inbuf == '\0')) { 678 return (0); 679 } 680 681 ip = (uchar_t *)inbuf; 682 ileft = *inbytesleft; 683 op = outbuf; 684 oleft = *outbytesleft; 685 686 KICONV_JA_CHECK_UTF8_BOM_WITHOUT_STATE(ip, ileft); 687 688 do_not_ignore_null = ((flag & KICONV_IGNORE_NULL) == 0); 689 690 while (ileft != 0) { 691 KICONV_JA_GETU(&ucs4, flag); 692 693 if (ucs4 == 0x0 && do_not_ignore_null) { 694 return (0); 695 } 696 697 if (ucs4 > 0xffff) { 698 /* non-BMP */ 699 KICONV_JA_NPUT(KICONV_JA_DEF_SINGLE); 700 rv++; 701 goto next; 702 } 703 704 KICONV_JA_CNV_U2_TO_EUCJPMS(id, euc16, ucs4); 705 if (euc16 == KICONV_JA_NODEST) { 706 euc16 = kiconv_ja_ucs2_to_euc16((ushort_t)ucs4); 707 } 708 if (euc16 == KICONV_JA_NODEST) { 709 KICONV_JA_NPUT(KICONV_JA_DEF_SINGLE); 710 rv++; 711 goto next; 712 } 713 714 switch (euc16 & 0x8080) { 715 case 0x0000: /* CS0 */ 716 ic = (uchar_t)euc16; 717 KICONV_JA_NPUT(ic); 718 break; 719 case 0x8080: /* CS1 */ 720 ic = (uchar_t)((euc16 >> 8) & 0xff); 721 KICONV_JA_NPUT(ic); 722 ic = (uchar_t)(euc16 & 0xff); 723 KICONV_JA_NPUT(ic); 724 break; 725 case 0x0080: /* CS2 */ 726 KICONV_JA_NPUT(SS2); 727 ic = (uchar_t)euc16; 728 KICONV_JA_NPUT(ic); 729 break; 730 case 0x8000: /* CS3 */ 731 KICONV_JA_NPUT(SS3); 732 ic = (uchar_t)((euc16 >> 8) & 0xff); 733 KICONV_JA_NPUT(ic); 734 ic = (uchar_t)(euc16 & KICONV_JA_CMASK); 735 KICONV_JA_NPUT(ic | KICONV_JA_CMSB); 736 break; 737 } 738 next: 739 /* 740 * One character successfully converted so update 741 * values outside of this function's stack. 742 */ 743 *inbytesleft = ileft; 744 *outbytesleft = oleft; 745 } 746 747 ret: 748 return (rv); 749 } 750 751 static size_t 752 kiconv_fr_eucjp(void *kcd, char **inbuf, size_t *inbytesleft, 753 char **outbuf, size_t *outbytesleft, int *errno) 754 { 755 if (! kcd || kcd == (void *)-1) { 756 *errno = EBADF; 757 return ((size_t)-1); 758 } 759 760 return (_do_kiconv_fr_eucjp(kcd, inbuf, inbytesleft, 761 outbuf, outbytesleft, errno)); 762 } 763 764 static size_t 765 kiconv_to_eucjp(void *kcd, char **inbuf, size_t *inbytesleft, 766 char **outbuf, size_t *outbytesleft, int *errno) 767 { 768 if (! kcd || kcd == (void *)-1) { 769 *errno = EBADF; 770 return ((size_t)-1); 771 } 772 773 return (_do_kiconv_to_eucjp(kcd, inbuf, inbytesleft, 774 outbuf, outbytesleft, errno)); 775 } 776 777 static size_t 778 kiconvstr_fr_eucjp(char *inbuf, size_t *inbytesleft, char *outbuf, 779 size_t *outbytesleft, int flag, int *errno) 780 { 781 return (_do_kiconvstr_fr_eucjp(inbuf, inbytesleft, outbuf, 782 outbytesleft, flag, errno, KICONV_JA_TBLID_EUCJP)); 783 } 784 785 static size_t 786 kiconvstr_to_eucjp(char *inbuf, size_t *inbytesleft, char *outbuf, 787 size_t *outbytesleft, int flag, int *errno) 788 { 789 return (_do_kiconvstr_to_eucjp(inbuf, inbytesleft, outbuf, 790 outbytesleft, flag, errno, KICONV_JA_TBLID_EUCJP)); 791 } 792 793 static size_t 794 kiconvstr_fr_eucjpms(char *inbuf, size_t *inbytesleft, char *outbuf, 795 size_t *outbytesleft, int flag, int *errno) 796 { 797 return (_do_kiconvstr_fr_eucjp(inbuf, inbytesleft, outbuf, 798 outbytesleft, flag, errno, KICONV_JA_TBLID_EUCJP_MS)); 799 } 800 801 static size_t 802 kiconvstr_to_eucjpms(char *inbuf, size_t *inbytesleft, char *outbuf, 803 size_t *outbytesleft, int flag, int *errno) 804 { 805 return (_do_kiconvstr_to_eucjp(inbuf, inbytesleft, outbuf, 806 outbytesleft, flag, errno, KICONV_JA_TBLID_EUCJP_MS)); 807 } 808 809 static size_t 810 _do_kiconv_fr_sjis(void *kcd, char **inbuf, size_t *inbytesleft, 811 char **outbuf, size_t *outbytesleft, int *errno) 812 { 813 uint_t uni; /* UTF-32 */ 814 uint_t index; /* index for table lookup */ 815 uchar_t ic1, ic2; /* 1st and 2nd bytes of a char */ 816 size_t rv = 0; /* return value of this function */ 817 818 uchar_t *ip; 819 size_t ileft; 820 char *op; 821 size_t oleft; 822 size_t id = ((kiconv_state_t)kcd)->id; 823 824 if ((inbuf == NULL) || (*inbuf == NULL)) { 825 return (0); 826 } 827 828 ip = (uchar_t *)*inbuf; 829 ileft = *inbytesleft; 830 op = *outbuf; 831 oleft = *outbytesleft; 832 833 while (ileft != 0) { 834 KICONV_JA_NGET(ic1); /* get 1st byte */ 835 836 if (KICONV_JA_ISASC((int)ic1)) { /* ASCII; 1 byte */ 837 uni = kiconv_ja_jisx0201roman_to_ucs2[ic1]; 838 KICONV_JA_PUTU(uni); 839 } else if (KICONV_JA_ISSJKANA(ic1)) { /* 0201 Kana; 1byte */ 840 uni = kiconv_ja_jisx0201kana_to_ucs2[(ic1 - 0xa1)]; 841 KICONV_JA_PUTU(uni); 842 } else if (KICONV_JA_ISSJKANJI1(ic1)) { /* 0208/UDC; 2bytes */ 843 KICONV_JA_NGET(ic2); 844 if (KICONV_JA_ISSJKANJI2(ic2)) { 845 ic1 = kiconv_ja_sjtojis1[(ic1 - 0x80)]; 846 if (ic2 >= 0x9f) { 847 ic1++; 848 } 849 ic2 = kiconv_ja_sjtojis2[ic2]; 850 KICONV_JA_CNV_JISMS_TO_U2(id, uni, ic1, ic2); 851 if (uni == KICONV_JA_NODEST) { 852 index = ((ic1 - 0x21) * 94) 853 + (ic2 - 0x21); 854 uni = kiconv_ja_jisx0208_to_ucs2[index]; 855 } 856 if (uni == KICONV_JA_REPLACE) 857 rv++; 858 KICONV_JA_PUTU(uni); 859 } else { /* 2nd byte check failed */ 860 KICONV_JA_RETERROR(EILSEQ) 861 /* NOTREACHED */ 862 } 863 } else if (KICONV_JA_ISSJSUPKANJI1(ic1)) { /* VDC, 2 bytes */ 864 KICONV_JA_NGET(ic2); 865 if (KICONV_JA_ISSJKANJI2(ic2)) { 866 ic1 = kiconv_ja_sjtojis1[(ic1 - 0x80)]; 867 if (ic2 >= 0x9f) { 868 ic1++; 869 } 870 index = ((ic1 - 0x21) * 94) 871 + (kiconv_ja_sjtojis2[ic2] - 0x21); 872 uni = kiconv_ja_jisx0212_to_ucs2[index]; 873 if (uni == KICONV_JA_REPLACE) 874 rv++; 875 KICONV_JA_PUTU(uni); 876 } else { /* 2nd byte check failed */ 877 KICONV_JA_RETERROR(EILSEQ) 878 } 879 } else if (KICONV_JA_ISSJIBM(ic1) || /* Extended IBM area */ 880 KICONV_JA_ISSJNECIBM(ic1)) { /* NEC/IBM area */ 881 /* 882 * We need a special treatment for each codes. 883 * By adding some offset number for them, we 884 * can process them as the same way of that of 885 * extended IBM chars. 886 */ 887 KICONV_JA_NGET(ic2); 888 if (KICONV_JA_ISSJKANJI2(ic2)) { 889 ushort_t dest, upper, lower; 890 dest = (ic1 << 8) + ic2; 891 if ((0xed40 <= dest) && (dest <= 0xeffc)) { 892 KICONV_JA_REMAP_NEC(dest); 893 if (dest == 0xffff) { 894 KICONV_JA_RETERROR(EILSEQ) 895 } 896 } 897 /* 898 * XXX: 0xfa54 and 0xfa5b must be mapped 899 * to JIS0208 area. Therefore we 900 * have to do special treatment. 901 */ 902 if ((dest == 0xfa54) || (dest == 0xfa5b)) { 903 if (dest == 0xfa54) { 904 upper = 0x22; 905 lower = 0x4c; 906 } else { 907 upper = 0x22; 908 lower = 0x68; 909 } 910 KICONV_JA_CNV_JISMS_TO_U2(id, uni, 911 upper, lower); 912 if (uni == KICONV_JA_NODEST) { 913 index = (uint_t)((upper - 0x21) 914 * 94 + (lower - 0x21)); 915 uni = kiconv_ja_jisx0208_to_ucs2 916 [index]; 917 } 918 if (uni == KICONV_JA_REPLACE) 919 rv++; 920 KICONV_JA_PUTU(uni); 921 } else { 922 dest = dest - 0xfa40 - 923 (((dest>>8) - 0xfa) * 0x40); 924 dest = kiconv_ja_sjtoibmext[dest]; 925 if (dest == 0xffff) { 926 KICONV_JA_RETERROR(EILSEQ) 927 } 928 upper = (dest >> 8) & KICONV_JA_CMASK; 929 lower = dest & KICONV_JA_CMASK; 930 KICONV_JA_CNV_JIS0212MS_TO_U2(id, uni, 931 upper, lower); 932 if (uni == KICONV_JA_NODEST) { 933 index = (uint_t)((upper - 0x21) 934 * 94 + (lower - 0x21)); 935 uni = kiconv_ja_jisx0212_to_ucs2 936 [index]; 937 } 938 if (uni == KICONV_JA_REPLACE) 939 rv++; 940 KICONV_JA_PUTU(uni); 941 } 942 } else { /* 2nd byte check failed */ 943 KICONV_JA_RETERROR(EILSEQ) 944 } 945 } else if ((0xeb <= ic1) && (ic1 <= 0xec)) { 946 /* 947 * Based on the draft convention of OSF-JVC CDEWG, 948 * characters in this area will be mapped to 949 * "CHIKAN-MOJI." (convertible character) 950 * We use U+FFFD in this case. 951 */ 952 KICONV_JA_NGET(ic2); 953 if (KICONV_JA_ISSJKANJI2(ic2)) { 954 uni = 0xfffd; 955 KICONV_JA_PUTU(uni); 956 } else { /* 2nd byte check failed */ 957 KICONV_JA_RETERROR(EILSEQ) 958 } 959 } else { /* 1st byte check failed */ 960 KICONV_JA_RETERROR(EILSEQ) 961 } 962 963 /* 964 * One character successfully converted so update 965 * values outside of this function's stack. 966 */ 967 *inbuf = (char *)ip; 968 *inbytesleft = ileft; 969 *outbuf = op; 970 *outbytesleft = oleft; 971 } 972 973 ret: 974 return (rv); 975 } 976 977 /* 978 * _kiconv_ja_lookuptbl() 979 * Return the index number if its index-ed number 980 * is the same as dest value. 981 */ 982 static ushort_t 983 _kiconv_ja_lookuptbl(ushort_t dest) 984 { 985 ushort_t tmp; 986 int i; 987 int sz = (sizeof (kiconv_ja_sjtoibmext) / 988 sizeof (kiconv_ja_sjtoibmext[0])); 989 990 for (i = 0; i < sz; i++) { 991 tmp = (kiconv_ja_sjtoibmext[i] & 0x7f7f); 992 if (tmp == dest) 993 return ((i + 0xfa40 + ((i / 0xc0) * 0x40))); 994 } 995 return (0x3f); 996 } 997 998 static size_t 999 _do_kiconv_to_sjis(void *kcd, char **inbuf, size_t *inbytesleft, 1000 char **outbuf, size_t *outbytesleft, int *errno) 1001 { 1002 uchar_t ic; 1003 size_t rv = 0; 1004 uint_t ucs4; 1005 ushort_t euc16; 1006 ushort_t dest; 1007 1008 uchar_t *ip; 1009 size_t ileft; 1010 char *op; 1011 size_t oleft; 1012 size_t read_len; 1013 1014 size_t id = ((kiconv_state_t)kcd)->id; 1015 1016 if ((inbuf == NULL) || (*inbuf == NULL)) { 1017 return (0); 1018 } 1019 1020 ip = (uchar_t *)*inbuf; 1021 ileft = *inbytesleft; 1022 op = *outbuf; 1023 oleft = *outbytesleft; 1024 1025 KICONV_JA_CHECK_UTF8_BOM(ip, ileft); 1026 1027 while (ileft != 0) { 1028 KICONV_JA_GETU(&ucs4, 0); 1029 1030 if (ucs4 > 0xffff) { 1031 /* non-BMP */ 1032 KICONV_JA_NPUT(KICONV_JA_DEF_SINGLE); 1033 rv++; 1034 goto next; 1035 } 1036 1037 KICONV_JA_CNV_U2_TO_EUCJPMS(id, euc16, ucs4); 1038 if (euc16 == KICONV_JA_NODEST) { 1039 euc16 = kiconv_ja_ucs2_to_euc16((ushort_t)ucs4); 1040 } 1041 if (euc16 == KICONV_JA_NODEST) { 1042 KICONV_JA_NPUT(KICONV_JA_DEF_SINGLE); 1043 rv++; 1044 goto next; 1045 } 1046 1047 switch (euc16 & 0x8080) { 1048 case 0x0000: /* CS0 */ 1049 if (KICONV_JA_ISC1CTRL((uchar_t)euc16)) { 1050 KICONV_JA_NPUT(KICONV_JA_DEF_SINGLE); 1051 rv++; 1052 } else { 1053 ic = (uchar_t)euc16; 1054 KICONV_JA_NPUT(ic); 1055 } 1056 break; 1057 case 0x8080: /* CS1 */ 1058 ic = (ushort_t)((euc16 >> 8) & KICONV_JA_CMASK); 1059 KICONV_JA_NPUT(kiconv_ja_jis208tosj1[ic]); 1060 /* 1061 * for even number row (Ku), add 0x80 to 1062 * look latter half of kiconv_ja_jistosj2[] array 1063 */ 1064 ic = (uchar_t)((euc16 & KICONV_JA_CMASK) 1065 + (((ic % 2) == 0) ? 0x80 : 0x00)); 1066 KICONV_JA_NPUT(kiconv_ja_jistosj2[ic]); 1067 break; 1068 case 0x0080: /* CS2 */ 1069 ic = (uchar_t)euc16; 1070 KICONV_JA_NPUT(ic); 1071 break; 1072 case 0x8000: /* CS3 */ 1073 ic = (ushort_t)((euc16 >> 8) & KICONV_JA_CMASK); 1074 if (euc16 == 0xa271) { 1075 /* NUMERO SIGN */ 1076 KICONV_JA_NPUT(0x87); 1077 KICONV_JA_NPUT(0x82); 1078 } else if (ic < 0x75) { /* check if IBM VDC */ 1079 dest = _kiconv_ja_lookuptbl(euc16 & 0x7f7f); 1080 if (dest == 0xffff) { 1081 KICONV_JA_NPUT(KICONV_JA_DEF_SINGLE); 1082 } else { 1083 /* avoid putting NUL ('\0') */ 1084 if (dest > 0xff) { 1085 KICONV_JA_NPUT( 1086 (dest >> 8) & 0xff); 1087 KICONV_JA_NPUT(dest & 0xff); 1088 } else { 1089 KICONV_JA_NPUT(dest & 0xff); 1090 } 1091 } 1092 } else { 1093 KICONV_JA_NPUT(kiconv_ja_jis212tosj1[ic]); 1094 /* 1095 * for even number row (Ku), add 0x80 to 1096 * look latter half of kiconv_ja_jistosj2[] 1097 */ 1098 ic = (ushort_t)((euc16 & KICONV_JA_CMASK) 1099 + (((ic % 2) == 0) ? 0x80 : 0x00)); 1100 KICONV_JA_NPUT(kiconv_ja_jistosj2[ic]); 1101 } 1102 break; 1103 } 1104 1105 next: 1106 /* 1107 * One character successfully converted so update 1108 * values outside of this function's stack. 1109 */ 1110 *inbuf = (char *)ip; 1111 *inbytesleft = ileft; 1112 *outbuf = op; 1113 *outbytesleft = oleft; 1114 } 1115 1116 ret: 1117 return (rv); 1118 } 1119 1120 static size_t 1121 _do_kiconvstr_fr_sjis(char *inbuf, size_t *inbytesleft, char *outbuf, 1122 size_t *outbytesleft, int flag, int *errno, uint8_t id) 1123 { 1124 uint_t uni; /* UTF-32 */ 1125 uint_t index; /* index for table lookup */ 1126 uchar_t ic1, ic2; /* 1st and 2nd bytes of a char */ 1127 size_t rv = 0; /* return value of this function */ 1128 1129 uchar_t *ip; 1130 size_t ileft; 1131 char *op; 1132 size_t oleft; 1133 1134 boolean_t do_not_ignore_null; 1135 1136 if ((inbuf == NULL) || (*inbuf == '\0')) { 1137 return (0); 1138 } 1139 1140 ip = (uchar_t *)inbuf; 1141 ileft = *inbytesleft; 1142 op = outbuf; 1143 oleft = *outbytesleft; 1144 1145 do_not_ignore_null = ((flag & KICONV_IGNORE_NULL) == 0); 1146 1147 while (ileft != 0) { 1148 KICONV_JA_NGET(ic1); /* get 1st byte */ 1149 1150 if (KICONV_JA_ISASC((int)ic1)) { /* ASCII; 1 byte */ 1151 if (ic1 == '\0' && do_not_ignore_null) { 1152 return (0); 1153 } 1154 uni = kiconv_ja_jisx0201roman_to_ucs2[ic1]; 1155 KICONV_JA_PUTU(uni); 1156 } else if (KICONV_JA_ISSJKANA(ic1)) { 1157 /* JIS X 0201 Kana; 1 byte */ 1158 uni = kiconv_ja_jisx0201kana_to_ucs2[(ic1 - 0xa1)]; 1159 KICONV_JA_PUTU(uni); 1160 } else if (KICONV_JA_ISSJKANJI1(ic1)) { 1161 /* JIS X 0208 or UDC; 2 bytes */ 1162 if (flag & KICONV_REPLACE_INVALID) { 1163 KICONV_JA_NGET_REP_FR_MB(ic2); 1164 } else { 1165 KICONV_JA_NGET(ic2); 1166 } 1167 if (KICONV_JA_ISSJKANJI2(ic2)) { 1168 ic1 = kiconv_ja_sjtojis1[(ic1 - 0x80)]; 1169 if (ic2 >= 0x9f) { 1170 ic1++; 1171 } 1172 ic2 = kiconv_ja_sjtojis2[ic2]; 1173 KICONV_JA_CNV_JISMS_TO_U2(id, uni, ic1, ic2); 1174 if (uni == KICONV_JA_NODEST) { 1175 index = ((ic1 - 0x21) * 94) 1176 + (ic2 - 0x21); 1177 uni = kiconv_ja_jisx0208_to_ucs2[index]; 1178 } 1179 if (uni == KICONV_JA_REPLACE) 1180 rv++; 1181 KICONV_JA_PUTU(uni); 1182 } else { /* 2nd byte check failed */ 1183 if (flag & KICONV_REPLACE_INVALID) { 1184 KICONV_JA_PUTU(KICONV_JA_REPLACE); 1185 rv++; 1186 } else { 1187 KICONV_JA_RETERROR(EILSEQ) 1188 } 1189 /* NOTREACHED */ 1190 } 1191 } else if (KICONV_JA_ISSJSUPKANJI1(ic1)) { /* VDC, 2 bytes */ 1192 if (flag & KICONV_REPLACE_INVALID) { 1193 KICONV_JA_NGET_REP_FR_MB(ic2); 1194 } else { 1195 KICONV_JA_NGET(ic2); 1196 } 1197 if (KICONV_JA_ISSJKANJI2(ic2)) { 1198 ic1 = kiconv_ja_sjtojis1[(ic1 - 0x80)]; 1199 if (ic2 >= 0x9f) { 1200 ic1++; 1201 } 1202 index = ((ic1 - 0x21) * 94) 1203 + (kiconv_ja_sjtojis2[ic2] - 0x21); 1204 uni = kiconv_ja_jisx0212_to_ucs2[index]; 1205 if (uni == KICONV_JA_REPLACE) 1206 rv++; 1207 KICONV_JA_PUTU(uni); 1208 } else { /* 2nd byte check failed */ 1209 if (flag & KICONV_REPLACE_INVALID) { 1210 KICONV_JA_PUTU(KICONV_JA_REPLACE); 1211 rv++; 1212 } else { 1213 KICONV_JA_RETERROR(EILSEQ) 1214 } 1215 } 1216 } else if (KICONV_JA_ISSJIBM(ic1) || /* Extended IBM area */ 1217 KICONV_JA_ISSJNECIBM(ic1)) { /* NEC/IBM area */ 1218 /* 1219 * We need a special treatment for each codes. 1220 * By adding some offset number for them, we 1221 * can process them as the same way of that of 1222 * extended IBM chars. 1223 */ 1224 if (flag & KICONV_REPLACE_INVALID) { 1225 KICONV_JA_NGET_REP_FR_MB(ic2); 1226 } else { 1227 KICONV_JA_NGET(ic2); 1228 } 1229 if (KICONV_JA_ISSJKANJI2(ic2)) { 1230 ushort_t dest, upper, lower; 1231 dest = (ic1 << 8) + ic2; 1232 if ((0xed40 <= dest) && (dest <= 0xeffc)) { 1233 KICONV_JA_REMAP_NEC(dest); 1234 if (dest == 0xffff) { 1235 if (flag & 1236 KICONV_REPLACE_INVALID) { 1237 KICONV_JA_PUTU( 1238 KICONV_JA_REPLACE); 1239 rv++; 1240 } else { 1241 KICONV_JA_RETERROR( 1242 EILSEQ) 1243 } 1244 } 1245 } 1246 /* 1247 * XXX: 0xfa54 and 0xfa5b must be mapped 1248 * to JIS0208 area. Therefore we 1249 * have to do special treatment. 1250 */ 1251 if ((dest == 0xfa54) || (dest == 0xfa5b)) { 1252 if (dest == 0xfa54) { 1253 upper = 0x22; 1254 lower = 0x4c; 1255 } else { 1256 upper = 0x22; 1257 lower = 0x68; 1258 } 1259 KICONV_JA_CNV_JISMS_TO_U2(id, uni, 1260 upper, lower); 1261 if (uni == KICONV_JA_NODEST) { 1262 index = (uint_t)((upper - 0x21) 1263 * 94 + (lower - 0x21)); 1264 uni = kiconv_ja_jisx0208_to_ucs2 1265 [index]; 1266 } 1267 if (uni == KICONV_JA_REPLACE) 1268 rv++; 1269 KICONV_JA_PUTU(uni); 1270 } else { 1271 dest = dest - 0xfa40 - 1272 (((dest>>8) - 0xfa) * 0x40); 1273 dest = kiconv_ja_sjtoibmext[dest]; 1274 if (dest == 0xffff) { 1275 if (flag & 1276 KICONV_REPLACE_INVALID) { 1277 KICONV_JA_PUTU( 1278 KICONV_JA_REPLACE); 1279 rv++; 1280 } else { 1281 KICONV_JA_RETERROR( 1282 EILSEQ) 1283 } 1284 } 1285 upper = (dest >> 8) & KICONV_JA_CMASK; 1286 lower = dest & KICONV_JA_CMASK; 1287 KICONV_JA_CNV_JIS0212MS_TO_U2(id, uni, 1288 upper, lower); 1289 if (uni == KICONV_JA_NODEST) { 1290 index = (uint_t)((upper - 0x21) 1291 * 94 + (lower - 0x21)); 1292 uni = kiconv_ja_jisx0212_to_ucs2 1293 [index]; 1294 } 1295 if (uni == KICONV_JA_REPLACE) 1296 rv++; 1297 KICONV_JA_PUTU(uni); 1298 } 1299 } else { /* 2nd byte check failed */ 1300 if (flag & KICONV_REPLACE_INVALID) { 1301 KICONV_JA_PUTU(KICONV_JA_REPLACE); 1302 rv++; 1303 } else { 1304 KICONV_JA_RETERROR(EILSEQ) 1305 } 1306 } 1307 } else if ((0xeb <= ic1) && (ic1 <= 0xec)) { 1308 /* 1309 * Based on the draft convention of OSF-JVC CDEWG, 1310 * characters in this area will be mapped to 1311 * "CHIKAN-MOJI." (convertible character) 1312 * We use U+FFFD in this case. 1313 */ 1314 if (flag & KICONV_REPLACE_INVALID) { 1315 KICONV_JA_NGET_REP_FR_MB(ic2); 1316 } else { 1317 KICONV_JA_NGET(ic2); 1318 } 1319 if (KICONV_JA_ISSJKANJI2(ic2)) { 1320 uni = 0xfffd; 1321 KICONV_JA_PUTU(uni); 1322 } else { /* 2nd byte check failed */ 1323 if (flag & KICONV_REPLACE_INVALID) { 1324 KICONV_JA_PUTU(KICONV_JA_REPLACE); 1325 rv++; 1326 } else { 1327 KICONV_JA_RETERROR(EILSEQ) 1328 } 1329 } 1330 } else { /* 1st byte check failed */ 1331 if (flag & KICONV_REPLACE_INVALID) { 1332 KICONV_JA_PUTU(KICONV_JA_REPLACE); 1333 rv++; 1334 } else { 1335 KICONV_JA_RETERROR(EILSEQ) 1336 } 1337 } 1338 1339 next: 1340 /* 1341 * One character successfully converted so update 1342 * values outside of this function's stack. 1343 */ 1344 *inbytesleft = ileft; 1345 *outbytesleft = oleft; 1346 } 1347 1348 ret: 1349 return (rv); 1350 } 1351 1352 static size_t 1353 _do_kiconvstr_to_sjis(char *inbuf, size_t *inbytesleft, char *outbuf, 1354 size_t *outbytesleft, int flag, int *errno, uint8_t id) 1355 { 1356 uchar_t ic; 1357 size_t rv = 0; 1358 uint_t ucs4; 1359 ushort_t euc16; 1360 ushort_t dest; 1361 1362 uchar_t *ip; 1363 size_t ileft; 1364 char *op; 1365 size_t oleft; 1366 size_t read_len; 1367 1368 boolean_t do_not_ignore_null; 1369 1370 if ((inbuf == NULL) || (*inbuf == '\0')) { 1371 return (0); 1372 } 1373 1374 ip = (uchar_t *)inbuf; 1375 ileft = *inbytesleft; 1376 op = outbuf; 1377 oleft = *outbytesleft; 1378 1379 KICONV_JA_CHECK_UTF8_BOM_WITHOUT_STATE(ip, ileft); 1380 1381 do_not_ignore_null = ((flag & KICONV_IGNORE_NULL) == 0); 1382 1383 while (ileft != 0) { 1384 KICONV_JA_GETU(&ucs4, flag); 1385 1386 if (ucs4 == 0x0 && do_not_ignore_null) { 1387 return (0); 1388 } 1389 1390 if (ucs4 > 0xffff) { 1391 /* non-BMP */ 1392 KICONV_JA_NPUT(KICONV_JA_DEF_SINGLE); 1393 rv++; 1394 goto next; 1395 } 1396 1397 KICONV_JA_CNV_U2_TO_EUCJPMS(id, euc16, ucs4); 1398 if (euc16 == KICONV_JA_NODEST) { 1399 euc16 = kiconv_ja_ucs2_to_euc16((ushort_t)ucs4); 1400 } 1401 if (euc16 == KICONV_JA_NODEST) { 1402 KICONV_JA_NPUT(KICONV_JA_DEF_SINGLE); 1403 rv++; 1404 goto next; 1405 } 1406 1407 switch (euc16 & 0x8080) { 1408 case 0x0000: /* CS0 */ 1409 if (KICONV_JA_ISC1CTRL((uchar_t)euc16)) { 1410 KICONV_JA_NPUT(KICONV_JA_DEF_SINGLE); 1411 rv++; 1412 } else { 1413 ic = (uchar_t)euc16; 1414 KICONV_JA_NPUT(ic); 1415 } 1416 break; 1417 case 0x8080: /* CS1 */ 1418 ic = (ushort_t)((euc16 >> 8) & KICONV_JA_CMASK); 1419 KICONV_JA_NPUT(kiconv_ja_jis208tosj1[ic]); 1420 /* 1421 * for even number row (Ku), add 0x80 to 1422 * look latter half of kiconv_ja_jistosj2[] array 1423 */ 1424 ic = (uchar_t)((euc16 & KICONV_JA_CMASK) 1425 + (((ic % 2) == 0) ? 0x80 : 0x00)); 1426 KICONV_JA_NPUT(kiconv_ja_jistosj2[ic]); 1427 break; 1428 case 0x0080: /* CS2 */ 1429 ic = (uchar_t)euc16; 1430 KICONV_JA_NPUT(ic); 1431 break; 1432 case 0x8000: /* CS3 */ 1433 ic = (ushort_t)((euc16 >> 8) & KICONV_JA_CMASK); 1434 if (euc16 == 0xa271) { 1435 /* NUMERO SIGN */ 1436 KICONV_JA_NPUT(0x87); 1437 KICONV_JA_NPUT(0x82); 1438 } else if (ic < 0x75) { /* check if IBM VDC */ 1439 dest = _kiconv_ja_lookuptbl(euc16 & 0x7f7f); 1440 if (dest == 0xffff) { 1441 KICONV_JA_NPUT(KICONV_JA_DEF_SINGLE); 1442 } else { 1443 /* avoid putting NUL ('\0') */ 1444 if (dest > 0xff) { 1445 KICONV_JA_NPUT( 1446 (dest >> 8) & 0xff); 1447 KICONV_JA_NPUT(dest & 0xff); 1448 } else { 1449 KICONV_JA_NPUT(dest & 0xff); 1450 } 1451 } 1452 } else { 1453 KICONV_JA_NPUT(kiconv_ja_jis212tosj1[ic]); 1454 /* 1455 * for even number row (Ku), add 0x80 to 1456 * look latter half of kiconv_ja_jistosj2[] 1457 */ 1458 ic = (ushort_t)((euc16 & KICONV_JA_CMASK) 1459 + (((ic % 2) == 0) ? 0x80 : 0x00)); 1460 KICONV_JA_NPUT(kiconv_ja_jistosj2[ic]); 1461 } 1462 break; 1463 } 1464 1465 next: 1466 /* 1467 * One character successfully converted so update 1468 * values outside of this function's stack. 1469 */ 1470 *inbytesleft = ileft; 1471 *outbytesleft = oleft; 1472 } 1473 1474 ret: 1475 return (rv); 1476 } 1477 1478 static size_t 1479 kiconv_fr_sjis(void *kcd, char **inbuf, size_t *inbytesleft, 1480 char **outbuf, size_t *outbytesleft, int *errno) 1481 { 1482 if (! kcd || kcd == (void *)-1) { 1483 *errno = EBADF; 1484 return ((size_t)-1); 1485 } 1486 1487 return (_do_kiconv_fr_sjis(kcd, inbuf, inbytesleft, 1488 outbuf, outbytesleft, errno)); 1489 } 1490 1491 static size_t 1492 kiconv_to_sjis(void *kcd, char **inbuf, size_t *inbytesleft, 1493 char **outbuf, size_t *outbytesleft, int *errno) 1494 { 1495 if (! kcd || kcd == (void *)-1) { 1496 *errno = EBADF; 1497 return ((size_t)-1); 1498 } 1499 1500 return (_do_kiconv_to_sjis(kcd, inbuf, inbytesleft, 1501 outbuf, outbytesleft, errno)); 1502 } 1503 1504 static size_t 1505 kiconvstr_fr_sjis(char *inbuf, size_t *inbytesleft, char *outbuf, 1506 size_t *outbytesleft, int flag, int *errno) 1507 { 1508 return (_do_kiconvstr_fr_sjis(inbuf, inbytesleft, outbuf, 1509 outbytesleft, flag, errno, KICONV_JA_TBLID_SJIS)); 1510 } 1511 1512 static size_t 1513 kiconvstr_to_sjis(char *inbuf, size_t *inbytesleft, char *outbuf, 1514 size_t *outbytesleft, int flag, int *errno) 1515 { 1516 return (_do_kiconvstr_to_sjis(inbuf, inbytesleft, outbuf, 1517 outbytesleft, flag, errno, KICONV_JA_TBLID_SJIS)); 1518 } 1519 1520 static size_t 1521 kiconvstr_fr_cp932(char *inbuf, size_t *inbytesleft, char *outbuf, 1522 size_t *outbytesleft, int flag, int *errno) 1523 { 1524 return (_do_kiconvstr_fr_sjis(inbuf, inbytesleft, outbuf, 1525 outbytesleft, flag, errno, KICONV_JA_TBLID_CP932)); 1526 } 1527 1528 static size_t 1529 kiconvstr_to_cp932(char *inbuf, size_t *inbytesleft, char *outbuf, 1530 size_t *outbytesleft, int flag, int *errno) 1531 { 1532 return (_do_kiconvstr_to_sjis(inbuf, inbytesleft, outbuf, 1533 outbytesleft, flag, errno, KICONV_JA_TBLID_CP932)); 1534 } 1535 1536 static kiconv_ops_t kiconv_ja_ops_tbl[] = { 1537 { 1538 "eucjp", "utf-8", open_eucjp, 1539 kiconv_to_eucjp, close_ja, kiconvstr_to_eucjp 1540 }, 1541 { 1542 "utf-8", "eucjp", open_eucjp, 1543 kiconv_fr_eucjp, close_ja, kiconvstr_fr_eucjp 1544 }, 1545 { 1546 "eucjpms", "utf-8", open_eucjpms, 1547 kiconv_to_eucjp, close_ja, kiconvstr_to_eucjpms 1548 }, 1549 { 1550 "utf-8", "eucjpms", open_eucjpms, 1551 kiconv_fr_eucjp, close_ja, kiconvstr_fr_eucjpms 1552 }, 1553 { 1554 "sjis", "utf-8", open_sjis, 1555 kiconv_to_sjis, close_ja, kiconvstr_to_sjis 1556 }, 1557 { 1558 "utf-8", "sjis", open_sjis, 1559 kiconv_fr_sjis, close_ja, kiconvstr_fr_sjis 1560 }, 1561 { 1562 "cp932", "utf-8", open_cp932, 1563 kiconv_to_sjis, close_ja, kiconvstr_to_cp932 1564 }, 1565 { 1566 "utf-8", "cp932", open_cp932, 1567 kiconv_fr_sjis, close_ja, kiconvstr_fr_cp932 1568 } 1569 }; 1570 1571 static char *kiconv_ja_aliases[] = {"932", "shiftjis", "pck"}; 1572 static char *kiconv_ja_canonicals[] = {"cp932", "sjis", "sjis"}; 1573 1574 #define KICONV_JA_MAX_JA_OPS \ 1575 (sizeof (kiconv_ja_ops_tbl) / sizeof (kiconv_ops_t)) 1576 #define KICONV_JA_MAX_JA_ALIAS \ 1577 (sizeof (kiconv_ja_aliases) / sizeof (char *)) 1578 1579 static kiconv_module_info_t kiconv_ja_info = { 1580 "kiconv_ja", /* module name */ 1581 KICONV_JA_MAX_JA_OPS, /* number of conversion in kiconv_ja */ 1582 kiconv_ja_ops_tbl, /* kiconv_ja ops table */ 1583 KICONV_JA_MAX_JA_ALIAS, /* number of alias in kiconv_ja */ 1584 kiconv_ja_aliases, /* kiconv_ja aliases */ 1585 kiconv_ja_canonicals, /* kiconv_ja canonicals */ 1586 0 1587 }; 1588 1589 static struct modlkiconv modlkiconv_ja = { 1590 &mod_kiconvops, 1591 "kiconv module for Japanese", 1592 &kiconv_ja_info 1593 }; 1594 1595 static struct modlinkage modlinkage = { 1596 MODREV_1, 1597 (void *)&modlkiconv_ja, 1598 NULL 1599 }; 1600 1601 int 1602 _init(void) 1603 { 1604 int err; 1605 1606 err = mod_install(&modlinkage); 1607 if (err) 1608 cmn_err(CE_WARN, "kiconv_ja: failed to load kernel module"); 1609 1610 return (err); 1611 } 1612 1613 int 1614 _info(struct modinfo *modinfop) 1615 { 1616 return (mod_info(&modlinkage, modinfop)); 1617 } 1618 1619 int 1620 _fini(void) 1621 { 1622 int err; 1623 1624 /* 1625 * If this module is being used, then, we cannot remove the module. 1626 * The following checking will catch pretty much all usual cases. 1627 * 1628 * Any remaining will be catached by the kiconv_unregister_module() 1629 * during mod_remove() at below. 1630 */ 1631 if (kiconv_module_ref_count(KICONV_MODULE_ID_JA)) 1632 return (EBUSY); 1633 1634 err = mod_remove(&modlinkage); 1635 if (err) 1636 cmn_err(CE_WARN, "kiconv_ja: failed to remove kernel module"); 1637 1638 return (err); 1639 } 1640