1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
24 * Use is subject to license terms.
25 */
26
27 #pragma ident "%Z%%M% %I% %E% SMI"
28
29 #include <sys/types.h>
30 #include <sys/param.h>
31 #include <sys/sysmacros.h>
32 #include <sys/systm.h>
33 #include <sys/debug.h>
34 #include <sys/kmem.h>
35 #include <sys/sunddi.h>
36 #include <sys/byteorder.h>
37 #include <sys/errno.h>
38 #include <sys/euc.h>
39 #include <sys/modctl.h>
40 #include <sys/kiconv.h>
41
42 #include <sys/kiconv_ja.h>
43 #include <sys/kiconv_ja_jis_to_unicode.h>
44 #include <sys/kiconv_ja_unicode_to_jis.h>
45
46 /*
47 * The following vector shows remaining bytes in a UTF-8 character.
48 * Index will be the first byte of the character. This is defined in
49 * u8_textprep.c.
50 */
51 extern const int8_t u8_number_of_bytes[];
52
53 /*
54 * The following is a vector of bit-masks to get used bits in
55 * the first byte of a UTF-8 character. Index is remaining bytes at above of
56 * the character. This is defined in uconv.c.
57 */
58 extern const uchar_t u8_masks_tbl[];
59
60 /*
61 * The following two vectors are to provide valid minimum and
62 * maximum values for the 2'nd byte of a multibyte UTF-8 character for
63 * better illegal sequence checking. The index value must be the value of
64 * the first byte of the UTF-8 character. These are defined in u8_textprep.c.
65 */
66 extern const uint8_t u8_valid_min_2nd_byte[];
67 extern const uint8_t u8_valid_max_2nd_byte[];
68
69 static kiconv_ja_euc16_t
kiconv_ja_ucs2_to_euc16(kiconv_ja_ucs2_t ucs2)70 kiconv_ja_ucs2_to_euc16(kiconv_ja_ucs2_t ucs2)
71 {
72 const kiconv_ja_euc16_t *p;
73
74 if ((p = kiconv_ja_ucs2_to_euc16_index[ucs2 >> 8]) != NULL)
75 return (p[ucs2 & 0xff]);
76
77 return (KICONV_JA_NODEST);
78 }
79
80 static size_t
utf8_ucs(uint_t * p,uchar_t ** pip,size_t * pileft,int * errno)81 utf8_ucs(uint_t *p, uchar_t **pip, size_t *pileft, int *errno)
82 {
83 uint_t l; /* to be copied to *p on successful return */
84 uchar_t ic; /* current byte */
85 uchar_t ic1; /* 1st byte */
86 uchar_t *ip = *pip; /* next byte to read */
87 size_t ileft = *pileft; /* number of bytes available */
88 size_t rv = 0; /* return value of this function */
89 int remaining_bytes;
90 int u8_size;
91
92 KICONV_JA_NGET(ic1); /* read 1st byte */
93
94 if (ic1 < 0x80) {
95 /* successfully converted */
96 *p = (uint_t)ic1;
97 goto ret;
98 }
99
100 u8_size = u8_number_of_bytes[ic1];
101 if (u8_size == U8_ILLEGAL_CHAR) {
102 KICONV_JA_RETERROR(EILSEQ)
103 } else if (u8_size == U8_OUT_OF_RANGE_CHAR) {
104 KICONV_JA_RETERROR(ERANGE)
105 }
106
107 remaining_bytes = u8_size - 1;
108 if (remaining_bytes != 0) {
109 l = ic1 & u8_masks_tbl[remaining_bytes];
110
111 for (; remaining_bytes > 0; remaining_bytes--) {
112 KICONV_JA_NGET(ic);
113 if (ic1 != 0U) {
114 if ((ic < u8_valid_min_2nd_byte[ic1]) ||
115 (ic > u8_valid_max_2nd_byte[ic1])) {
116 KICONV_JA_RETERROR(EILSEQ)
117 }
118 ic1 = 0U; /* 2nd byte check done */
119 } else {
120 if ((ic < 0x80) || (ic > 0xbf)) {
121 KICONV_JA_RETERROR(EILSEQ)
122 }
123 }
124 l = (l << 6) | (ic & 0x3f);
125 }
126
127 /* successfully converted */
128 *p = l;
129 } else {
130 KICONV_JA_RETERROR(EILSEQ)
131 }
132
133 ret:
134 if (rv == 0) {
135 /*
136 * Update rv, *pip, and *pileft on successfule return.
137 */
138 rv = *pileft - ileft;
139 *pip = ip;
140 *pileft = ileft;
141 }
142
143 return (rv);
144 }
145
146 static size_t
utf8_ucs_replace(uint_t * p,uchar_t ** pip,size_t * pileft,size_t * repnum)147 utf8_ucs_replace(uint_t *p, uchar_t **pip, size_t *pileft, size_t *repnum)
148 {
149 uint_t l; /* to be copied to *p on successful return */
150 uchar_t ic; /* current byte */
151 uchar_t ic1; /* 1st byte */
152 uchar_t *ip = *pip; /* next byte to read */
153 size_t ileft = *pileft; /* number of bytes available */
154 size_t rv = 0; /* return value of this function */
155 int remaining_bytes;
156 int u8_size;
157
158 KICONV_JA_NGET_REP_TO_MB(ic1); /* read 1st byte */
159
160 if (ic1 < 0x80) {
161 /* successfully converted */
162 l = (uint_t)ic1;
163 goto ret;
164 }
165
166 u8_size = u8_number_of_bytes[ic1];
167 if (u8_size == U8_ILLEGAL_CHAR || u8_size == U8_OUT_OF_RANGE_CHAR) {
168 l = KICONV_JA_DEF_SINGLE;
169 (*repnum)++;
170 goto ret;
171 }
172
173 remaining_bytes = u8_size - 1;
174
175 if (remaining_bytes != 0) {
176 l = ic1 & u8_masks_tbl[remaining_bytes];
177
178 for (; remaining_bytes > 0; remaining_bytes--) {
179 KICONV_JA_NGET_REP_TO_MB(ic);
180 if (ic1 != 0U) {
181 if ((ic < u8_valid_min_2nd_byte[ic1]) ||
182 (ic > u8_valid_max_2nd_byte[ic1])) {
183 l = KICONV_JA_DEF_SINGLE;
184 (*repnum)++;
185 ileft -= (remaining_bytes - 1);
186 ip += (remaining_bytes - 1);
187 break;
188 }
189 ic1 = 0U; /* 2nd byte check done */
190 } else {
191 if ((ic < 0x80) || (ic > 0xbf)) {
192 l = KICONV_JA_DEF_SINGLE;
193 (*repnum)++;
194 ileft -= (remaining_bytes - 1);
195 ip += (remaining_bytes - 1);
196 break;
197 }
198 }
199 l = (l << 6) | (ic & 0x3f);
200 }
201 } else {
202 l = KICONV_JA_DEF_SINGLE;
203 (*repnum)++;
204 }
205
206 ret:
207 /* successfully converted */
208 *p = l;
209 rv = *pileft - ileft;
210
211 *pip = ip;
212 *pileft = ileft;
213
214 return (rv);
215 }
216
217 static size_t /* return #bytes read, or -1 */
read_unicode(uint_t * p,uchar_t ** pip,size_t * pileft,int * errno,int flag,size_t * rv)218 read_unicode(
219 uint_t *p, /* point variable to store UTF-32 */
220 uchar_t **pip, /* point pointer to input buf */
221 size_t *pileft, /* point #bytes left in input buf */
222 int *errno, /* point variable to errno */
223 int flag, /* kiconvstr flag */
224 size_t *rv) /* point return valuse */
225 {
226 if (flag & KICONV_REPLACE_INVALID)
227 return (utf8_ucs_replace(p, pip, pileft, rv));
228 else
229 return (utf8_ucs(p, pip, pileft, errno));
230 }
231
232 static size_t
write_unicode(uint_t u32,char ** pop,size_t * poleft,int * errno)233 write_unicode(
234 uint_t u32, /* UTF-32 to write */
235 char **pop, /* point pointer to output buf */
236 size_t *poleft, /* point #bytes left in output buf */
237 int *errno) /* point variable to errno */
238 {
239 char *op = *pop;
240 size_t oleft = *poleft;
241 size_t rv = 0; /* return value */
242
243 if (u32 <= 0x7f) {
244 KICONV_JA_NPUT((uchar_t)(u32));
245 rv = 1;
246 } else if (u32 <= 0x7ff) {
247 KICONV_JA_NPUT((uchar_t)((((u32)>>6) & 0x1f) | 0xc0));
248 KICONV_JA_NPUT((uchar_t)(((u32) & 0x3f) | 0x80));
249 rv = 2;
250 } else if ((u32 >= 0xd800) && (u32 <= 0xdfff)) {
251 KICONV_JA_RETERROR(EILSEQ)
252 } else if (u32 <= 0xffff) {
253 KICONV_JA_NPUT((uchar_t)((((u32)>>12) & 0x0f) | 0xe0));
254 KICONV_JA_NPUT((uchar_t)((((u32)>>6) & 0x3f) | 0x80));
255 KICONV_JA_NPUT((uchar_t)(((u32) & 0x3f) | 0x80));
256 rv = 3;
257 } else if (u32 <= 0x10ffff) {
258 KICONV_JA_NPUT((uchar_t)((((u32)>>18) & 0x07) | 0xf0));
259 KICONV_JA_NPUT((uchar_t)((((u32)>>12) & 0x3f) | 0x80));
260 KICONV_JA_NPUT((uchar_t)((((u32)>>6) & 0x3f) | 0x80));
261 KICONV_JA_NPUT((uchar_t)(((u32) & 0x3f) | 0x80));
262 rv = 4;
263 } else {
264 KICONV_JA_RETERROR(EILSEQ)
265 }
266
267 ret:
268 if (rv != (size_t)-1) {
269 /* update *pop and *poleft only on successful return */
270 *pop = op;
271 *poleft = oleft;
272 }
273
274 return (rv);
275 }
276
277 static void *
_kiconv_ja_open_unicode(uint8_t id)278 _kiconv_ja_open_unicode(uint8_t id)
279 {
280 kiconv_state_t kcd;
281
282 kcd = (kiconv_state_t)kmem_alloc(sizeof (kiconv_state_data_t),
283 KM_SLEEP);
284 kcd->id = id;
285 kcd->bom_processed = 0;
286 return ((void *)kcd);
287 }
288
289 static void *
open_eucjp(void)290 open_eucjp(void)
291 {
292 return (_kiconv_ja_open_unicode(KICONV_JA_TBLID_EUCJP));
293 }
294
295 static void *
open_eucjpms(void)296 open_eucjpms(void)
297 {
298 return (_kiconv_ja_open_unicode(KICONV_JA_TBLID_EUCJP_MS));
299 }
300
301 static void *
open_sjis(void)302 open_sjis(void)
303 {
304 return (_kiconv_ja_open_unicode(KICONV_JA_TBLID_SJIS));
305 }
306
307 static void *
open_cp932(void)308 open_cp932(void)
309 {
310 return (_kiconv_ja_open_unicode(KICONV_JA_TBLID_CP932));
311 }
312
313 int
close_ja(void * kcd)314 close_ja(void *kcd)
315 {
316 if (! kcd || kcd == (void *)-1)
317 return (EBADF);
318
319 kmem_free(kcd, sizeof (kiconv_state_data_t));
320
321 return (0);
322 }
323
324 static size_t
_do_kiconv_fr_eucjp(void * kcd,char ** inbuf,size_t * inbytesleft,char ** outbuf,size_t * outbytesleft,int * errno)325 _do_kiconv_fr_eucjp(void *kcd, char **inbuf, size_t *inbytesleft,
326 char **outbuf, size_t *outbytesleft, int *errno)
327 {
328 uint_t u32; /* UTF-32 */
329 uint_t index; /* index for table lookup */
330 uchar_t ic1, ic2, ic3; /* 1st, 2nd, and 3rd bytes of a char */
331 size_t rv = 0; /* return value of this function */
332
333 uchar_t *ip;
334 size_t ileft;
335 char *op;
336 size_t oleft;
337 size_t id = ((kiconv_state_t)kcd)->id;
338
339 if ((inbuf == NULL) || (*inbuf == NULL)) {
340 return (0);
341 }
342
343 ip = (uchar_t *)*inbuf;
344 ileft = *inbytesleft;
345 op = *outbuf;
346 oleft = *outbytesleft;
347
348 while (ileft != 0) {
349 KICONV_JA_NGET(ic1); /* get 1st byte */
350
351 if (KICONV_JA_ISASC(ic1)) { /* ASCII; 1 byte */
352 u32 = kiconv_ja_jisx0201roman_to_ucs2[ic1];
353 KICONV_JA_PUTU(u32);
354 } else if (KICONV_JA_ISCS1(ic1)) { /* 0208 or UDC; 2 bytes */
355 KICONV_JA_NGET(ic2);
356 if (KICONV_JA_ISCS1(ic2)) { /* 2nd byte check passed */
357 ic1 &= KICONV_JA_CMASK;
358 ic2 &= KICONV_JA_CMASK;
359 KICONV_JA_CNV_JISMS_TO_U2(id, u32, ic1, ic2);
360 if (u32 == KICONV_JA_NODEST) {
361 index = (ic1 - 0x21) * 94 + ic2 - 0x21;
362 u32 = kiconv_ja_jisx0208_to_ucs2[index];
363 }
364 if (u32 == KICONV_JA_REPLACE)
365 rv++;
366 KICONV_JA_PUTU(u32);
367 } else { /* 2nd byte check failed */
368 KICONV_JA_RETERROR(EILSEQ)
369 }
370 } else if (ic1 == SS2) { /* JIS X 0201 Kana; 2 bytes */
371 KICONV_JA_NGET(ic2);
372 if (KICONV_JA_ISCS2(ic2)) { /* 2nd byte check passed */
373 index = (ic2 - 0xa1);
374 u32 = kiconv_ja_jisx0201kana_to_ucs2[index];
375 KICONV_JA_PUTU(u32);
376 } else { /* 2nd byte check failed */
377 KICONV_JA_RETERROR(EILSEQ)
378 }
379 } else if (ic1 == SS3) { /* JIS X 0212 or UDC; 3 bytes */
380 KICONV_JA_NGET(ic2);
381 if (KICONV_JA_ISCS3(ic2)) { /* 2nd byte check passed */
382 KICONV_JA_NGET(ic3);
383 if (KICONV_JA_ISCS3(ic3)) {
384 /* 3rd byte check passed */
385 ic2 &= KICONV_JA_CMASK;
386 ic3 &= KICONV_JA_CMASK;
387 KICONV_JA_CNV_JIS0212MS_TO_U2(id, u32,
388 ic2, ic3);
389 if (u32 == KICONV_JA_NODEST) {
390 index = ((ic2 - 0x21) * 94 +
391 (ic3 - 0x21));
392 u32 = kiconv_ja_jisx0212_to_ucs2
393 [index];
394 }
395 if (u32 == KICONV_JA_REPLACE)
396 rv++;
397 KICONV_JA_PUTU(u32);
398 } else { /* 3rd byte check failed */
399 KICONV_JA_RETERROR(EILSEQ)
400 }
401 } else { /* 2nd byte check failed */
402 KICONV_JA_RETERROR(EILSEQ)
403 }
404 } else if (KICONV_JA_ISC1CTRLEUC(ic1)) {
405 /* C1 control; 1 byte */
406 u32 = ic1;
407 KICONV_JA_PUTU(u32);
408 } else { /* 1st byte check failed */
409 KICONV_JA_RETERROR(EILSEQ)
410 }
411
412 /*
413 * One character successfully converted so update
414 * values outside of this function's stack.
415 */
416 *inbuf = (char *)ip;
417 *inbytesleft = ileft;
418 *outbuf = op;
419 *outbytesleft = oleft;
420 }
421
422 ret:
423 return (rv);
424 }
425
426 static size_t
_do_kiconv_to_eucjp(void * kcd,char ** inbuf,size_t * inbytesleft,char ** outbuf,size_t * outbytesleft,int * errno)427 _do_kiconv_to_eucjp(void *kcd, char **inbuf, size_t *inbytesleft,
428 char **outbuf, size_t *outbytesleft, int *errno)
429 {
430 uchar_t ic;
431 size_t rv = 0;
432 uint_t ucs4;
433 ushort_t euc16;
434
435 uchar_t *ip;
436 size_t ileft;
437 char *op;
438 size_t oleft;
439 size_t read_len;
440
441 size_t id = ((kiconv_state_t)kcd)->id;
442
443 if ((inbuf == NULL) || (*inbuf == NULL)) {
444 return (0);
445 }
446
447 ip = (uchar_t *)*inbuf;
448 ileft = *inbytesleft;
449 op = *outbuf;
450 oleft = *outbytesleft;
451
452 KICONV_JA_CHECK_UTF8_BOM(ip, ileft);
453
454 while (ileft != 0) {
455 KICONV_JA_GETU(&ucs4, 0);
456
457 if (ucs4 > 0xffff) {
458 /* non-BMP */
459 KICONV_JA_NPUT(KICONV_JA_DEF_SINGLE);
460 rv++;
461 goto next;
462 }
463
464 KICONV_JA_CNV_U2_TO_EUCJPMS(id, euc16, ucs4);
465 if (euc16 == KICONV_JA_NODEST) {
466 euc16 = kiconv_ja_ucs2_to_euc16((ushort_t)ucs4);
467 }
468 if (euc16 == KICONV_JA_NODEST) {
469 KICONV_JA_NPUT(KICONV_JA_DEF_SINGLE);
470 rv++;
471 goto next;
472 }
473
474 switch (euc16 & 0x8080) {
475 case 0x0000: /* CS0 */
476 ic = (uchar_t)euc16;
477 KICONV_JA_NPUT(ic);
478 break;
479 case 0x8080: /* CS1 */
480 ic = (uchar_t)((euc16 >> 8) & 0xff);
481 KICONV_JA_NPUT(ic);
482 ic = (uchar_t)(euc16 & 0xff);
483 KICONV_JA_NPUT(ic);
484 break;
485 case 0x0080: /* CS2 */
486 KICONV_JA_NPUT(SS2);
487 ic = (uchar_t)euc16;
488 KICONV_JA_NPUT(ic);
489 break;
490 case 0x8000: /* CS3 */
491 KICONV_JA_NPUT(SS3);
492 ic = (uchar_t)((euc16 >> 8) & 0xff);
493 KICONV_JA_NPUT(ic);
494 ic = (uchar_t)(euc16 & KICONV_JA_CMASK);
495 KICONV_JA_NPUT(ic | KICONV_JA_CMSB);
496 break;
497 }
498 next:
499 /*
500 * One character successfully converted so update
501 * values outside of this function's stack.
502 */
503 *inbuf = (char *)ip;
504 *inbytesleft = ileft;
505 *outbuf = op;
506 *outbytesleft = oleft;
507 }
508
509 ret:
510 return (rv);
511 }
512
513 static size_t
_do_kiconvstr_fr_eucjp(char * inbuf,size_t * inbytesleft,char * outbuf,size_t * outbytesleft,int flag,int * errno,uint8_t id)514 _do_kiconvstr_fr_eucjp(char *inbuf, size_t *inbytesleft, char *outbuf,
515 size_t *outbytesleft, int flag, int *errno, uint8_t id)
516 {
517 uint_t u32; /* UTF-32 */
518 uint_t index; /* index for table lookup */
519 uchar_t ic1, ic2, ic3; /* 1st, 2nd, and 3rd bytes of a char */
520 size_t rv = 0; /* return value of this function */
521
522 uchar_t *ip;
523 size_t ileft;
524 char *op;
525 size_t oleft;
526
527 boolean_t do_not_ignore_null;
528
529 if ((inbuf == NULL) || (*inbuf == NULL)) {
530 return (0);
531 }
532
533 ip = (uchar_t *)inbuf;
534 ileft = *inbytesleft;
535 op = outbuf;
536 oleft = *outbytesleft;
537
538 do_not_ignore_null = ((flag & KICONV_IGNORE_NULL) == 0);
539
540 while (ileft != 0) {
541 KICONV_JA_NGET(ic1); /* get 1st byte */
542
543 if (KICONV_JA_ISASC(ic1)) { /* ASCII; 1 byte */
544 if (ic1 == '\0' && do_not_ignore_null) {
545 return (0);
546 }
547 u32 = kiconv_ja_jisx0201roman_to_ucs2[ic1];
548 KICONV_JA_PUTU(u32);
549 } else if (KICONV_JA_ISCS1(ic1)) { /* 0208 or UDC; 2 bytes */
550 if (flag & KICONV_REPLACE_INVALID) {
551 KICONV_JA_NGET_REP_FR_MB(ic2);
552 } else {
553 KICONV_JA_NGET(ic2);
554 }
555 if (KICONV_JA_ISCS1(ic2)) { /* 2nd byte check passed */
556 ic1 &= KICONV_JA_CMASK;
557 ic2 &= KICONV_JA_CMASK;
558 KICONV_JA_CNV_JISMS_TO_U2(id, u32, ic1, ic2);
559 if (u32 == KICONV_JA_NODEST) {
560 index = (ic1 - 0x21) * 94 + ic2 - 0x21;
561 u32 = kiconv_ja_jisx0208_to_ucs2[index];
562 }
563 if (u32 == KICONV_JA_REPLACE)
564 rv++;
565 KICONV_JA_PUTU(u32);
566 } else { /* 2nd byte check failed */
567 if (flag & KICONV_REPLACE_INVALID) {
568 KICONV_JA_PUTU(KICONV_JA_REPLACE);
569 rv++;
570 } else {
571 KICONV_JA_RETERROR(EILSEQ)
572 }
573 }
574 } else if (ic1 == SS2) { /* JIS X 0201 Kana; 2bytes */
575 if (flag & KICONV_REPLACE_INVALID) {
576 KICONV_JA_NGET_REP_FR_MB(ic2);
577 } else {
578 KICONV_JA_NGET(ic2);
579 }
580 if (KICONV_JA_ISCS2(ic2)) { /* 2nd byte check passed */
581 index = (ic2 - 0xa1);
582 u32 = kiconv_ja_jisx0201kana_to_ucs2[index];
583 KICONV_JA_PUTU(u32);
584 } else { /* 2nd byte check failed */
585 if (flag & KICONV_REPLACE_INVALID) {
586 KICONV_JA_PUTU(KICONV_JA_REPLACE);
587 rv++;
588 } else {
589 KICONV_JA_RETERROR(EILSEQ)
590 }
591 }
592 } else if (ic1 == SS3) { /* JIS X 0212 or UDC; 3 bytes */
593 if (flag & KICONV_REPLACE_INVALID) {
594 KICONV_JA_NGET_REP_FR_MB(ic2);
595 } else {
596 KICONV_JA_NGET(ic2);
597 }
598 if (KICONV_JA_ISCS3(ic2)) { /* 2nd byte check passed */
599 if (flag & KICONV_REPLACE_INVALID) {
600 KICONV_JA_NGET_REP_FR_MB(ic3);
601 } else {
602 KICONV_JA_NGET(ic3);
603 }
604 if (KICONV_JA_ISCS3(ic3)) {
605 /* 3rd byte check passed */
606 ic2 &= KICONV_JA_CMASK;
607 ic3 &= KICONV_JA_CMASK;
608 KICONV_JA_CNV_JIS0212MS_TO_U2(id, u32,
609 ic2, ic3);
610 if (u32 == KICONV_JA_NODEST) {
611 index = ((ic2 - 0x21) * 94 +
612 (ic3 - 0x21));
613 u32 = kiconv_ja_jisx0212_to_ucs2
614 [index];
615 }
616 if (u32 == KICONV_JA_REPLACE)
617 rv++;
618 KICONV_JA_PUTU(u32);
619 } else { /* 3rd byte check failed */
620 if (flag & KICONV_REPLACE_INVALID) {
621 KICONV_JA_PUTU(
622 KICONV_JA_REPLACE);
623 rv++;
624 } else {
625 KICONV_JA_RETERROR(EILSEQ)
626 }
627 }
628 } else { /* 2nd byte check failed */
629 if (flag & KICONV_REPLACE_INVALID) {
630 KICONV_JA_PUTU(KICONV_JA_REPLACE);
631 rv++;
632 } else {
633 KICONV_JA_RETERROR(EILSEQ)
634 }
635 }
636 } else if (KICONV_JA_ISC1CTRLEUC(ic1)) {
637 /* C1 control; 1 byte */
638 u32 = ic1;
639 KICONV_JA_PUTU(u32);
640 } else { /* 1st byte check failed */
641 if (flag & KICONV_REPLACE_INVALID) {
642 KICONV_JA_PUTU(KICONV_JA_REPLACE);
643 rv++;
644 } else {
645 KICONV_JA_RETERROR(EILSEQ)
646 }
647 }
648
649 next:
650 /*
651 * One character successfully converted so update
652 * values outside of this function's stack.
653 */
654 *inbytesleft = ileft;
655 *outbytesleft = oleft;
656 }
657
658 ret:
659 return (rv);
660 }
661
662 static size_t
_do_kiconvstr_to_eucjp(char * inbuf,size_t * inbytesleft,char * outbuf,size_t * outbytesleft,int flag,int * errno,uint8_t id)663 _do_kiconvstr_to_eucjp(char *inbuf, size_t *inbytesleft, char *outbuf,
664 size_t *outbytesleft, int flag, int *errno, uint8_t id)
665 {
666 uchar_t ic;
667 size_t rv = 0;
668 uint_t ucs4;
669 ushort_t euc16;
670
671 uchar_t *ip;
672 size_t ileft;
673 char *op;
674 size_t oleft;
675 size_t read_len;
676
677 boolean_t do_not_ignore_null;
678
679 if ((inbuf == NULL) || (*inbuf == NULL)) {
680 return (0);
681 }
682
683 ip = (uchar_t *)inbuf;
684 ileft = *inbytesleft;
685 op = outbuf;
686 oleft = *outbytesleft;
687
688 KICONV_JA_CHECK_UTF8_BOM_WITHOUT_STATE(ip, ileft);
689
690 do_not_ignore_null = ((flag & KICONV_IGNORE_NULL) == 0);
691
692 while (ileft != 0) {
693 KICONV_JA_GETU(&ucs4, flag);
694
695 if (ucs4 == 0x0 && do_not_ignore_null) {
696 return (0);
697 }
698
699 if (ucs4 > 0xffff) {
700 /* non-BMP */
701 KICONV_JA_NPUT(KICONV_JA_DEF_SINGLE);
702 rv++;
703 goto next;
704 }
705
706 KICONV_JA_CNV_U2_TO_EUCJPMS(id, euc16, ucs4);
707 if (euc16 == KICONV_JA_NODEST) {
708 euc16 = kiconv_ja_ucs2_to_euc16((ushort_t)ucs4);
709 }
710 if (euc16 == KICONV_JA_NODEST) {
711 KICONV_JA_NPUT(KICONV_JA_DEF_SINGLE);
712 rv++;
713 goto next;
714 }
715
716 switch (euc16 & 0x8080) {
717 case 0x0000: /* CS0 */
718 ic = (uchar_t)euc16;
719 KICONV_JA_NPUT(ic);
720 break;
721 case 0x8080: /* CS1 */
722 ic = (uchar_t)((euc16 >> 8) & 0xff);
723 KICONV_JA_NPUT(ic);
724 ic = (uchar_t)(euc16 & 0xff);
725 KICONV_JA_NPUT(ic);
726 break;
727 case 0x0080: /* CS2 */
728 KICONV_JA_NPUT(SS2);
729 ic = (uchar_t)euc16;
730 KICONV_JA_NPUT(ic);
731 break;
732 case 0x8000: /* CS3 */
733 KICONV_JA_NPUT(SS3);
734 ic = (uchar_t)((euc16 >> 8) & 0xff);
735 KICONV_JA_NPUT(ic);
736 ic = (uchar_t)(euc16 & KICONV_JA_CMASK);
737 KICONV_JA_NPUT(ic | KICONV_JA_CMSB);
738 break;
739 }
740 next:
741 /*
742 * One character successfully converted so update
743 * values outside of this function's stack.
744 */
745 *inbytesleft = ileft;
746 *outbytesleft = oleft;
747 }
748
749 ret:
750 return (rv);
751 }
752
753 static size_t
kiconv_fr_eucjp(void * kcd,char ** inbuf,size_t * inbytesleft,char ** outbuf,size_t * outbytesleft,int * errno)754 kiconv_fr_eucjp(void *kcd, char **inbuf, size_t *inbytesleft,
755 char **outbuf, size_t *outbytesleft, int *errno)
756 {
757 if (! kcd || kcd == (void *)-1) {
758 *errno = EBADF;
759 return ((size_t)-1);
760 }
761
762 return (_do_kiconv_fr_eucjp(kcd, inbuf, inbytesleft,
763 outbuf, outbytesleft, errno));
764 }
765
766 static size_t
kiconv_to_eucjp(void * kcd,char ** inbuf,size_t * inbytesleft,char ** outbuf,size_t * outbytesleft,int * errno)767 kiconv_to_eucjp(void *kcd, char **inbuf, size_t *inbytesleft,
768 char **outbuf, size_t *outbytesleft, int *errno)
769 {
770 if (! kcd || kcd == (void *)-1) {
771 *errno = EBADF;
772 return ((size_t)-1);
773 }
774
775 return (_do_kiconv_to_eucjp(kcd, inbuf, inbytesleft,
776 outbuf, outbytesleft, errno));
777 }
778
779 static size_t
kiconvstr_fr_eucjp(char * inbuf,size_t * inbytesleft,char * outbuf,size_t * outbytesleft,int flag,int * errno)780 kiconvstr_fr_eucjp(char *inbuf, size_t *inbytesleft, char *outbuf,
781 size_t *outbytesleft, int flag, int *errno)
782 {
783 return (_do_kiconvstr_fr_eucjp(inbuf, inbytesleft, outbuf,
784 outbytesleft, flag, errno, KICONV_JA_TBLID_EUCJP));
785 }
786
787 static size_t
kiconvstr_to_eucjp(char * inbuf,size_t * inbytesleft,char * outbuf,size_t * outbytesleft,int flag,int * errno)788 kiconvstr_to_eucjp(char *inbuf, size_t *inbytesleft, char *outbuf,
789 size_t *outbytesleft, int flag, int *errno)
790 {
791 return (_do_kiconvstr_to_eucjp(inbuf, inbytesleft, outbuf,
792 outbytesleft, flag, errno, KICONV_JA_TBLID_EUCJP));
793 }
794
795 static size_t
kiconvstr_fr_eucjpms(char * inbuf,size_t * inbytesleft,char * outbuf,size_t * outbytesleft,int flag,int * errno)796 kiconvstr_fr_eucjpms(char *inbuf, size_t *inbytesleft, char *outbuf,
797 size_t *outbytesleft, int flag, int *errno)
798 {
799 return (_do_kiconvstr_fr_eucjp(inbuf, inbytesleft, outbuf,
800 outbytesleft, flag, errno, KICONV_JA_TBLID_EUCJP_MS));
801 }
802
803 static size_t
kiconvstr_to_eucjpms(char * inbuf,size_t * inbytesleft,char * outbuf,size_t * outbytesleft,int flag,int * errno)804 kiconvstr_to_eucjpms(char *inbuf, size_t *inbytesleft, char *outbuf,
805 size_t *outbytesleft, int flag, int *errno)
806 {
807 return (_do_kiconvstr_to_eucjp(inbuf, inbytesleft, outbuf,
808 outbytesleft, flag, errno, KICONV_JA_TBLID_EUCJP_MS));
809 }
810
811 static size_t
_do_kiconv_fr_sjis(void * kcd,char ** inbuf,size_t * inbytesleft,char ** outbuf,size_t * outbytesleft,int * errno)812 _do_kiconv_fr_sjis(void *kcd, char **inbuf, size_t *inbytesleft,
813 char **outbuf, size_t *outbytesleft, int *errno)
814 {
815 uint_t uni; /* UTF-32 */
816 uint_t index; /* index for table lookup */
817 uchar_t ic1, ic2; /* 1st and 2nd bytes of a char */
818 size_t rv = 0; /* return value of this function */
819
820 uchar_t *ip;
821 size_t ileft;
822 char *op;
823 size_t oleft;
824 size_t id = ((kiconv_state_t)kcd)->id;
825
826 if ((inbuf == NULL) || (*inbuf == NULL)) {
827 return (0);
828 }
829
830 ip = (uchar_t *)*inbuf;
831 ileft = *inbytesleft;
832 op = *outbuf;
833 oleft = *outbytesleft;
834
835 while (ileft != 0) {
836 KICONV_JA_NGET(ic1); /* get 1st byte */
837
838 if (KICONV_JA_ISASC((int)ic1)) { /* ASCII; 1 byte */
839 uni = kiconv_ja_jisx0201roman_to_ucs2[ic1];
840 KICONV_JA_PUTU(uni);
841 } else if (KICONV_JA_ISSJKANA(ic1)) { /* 0201 Kana; 1byte */
842 uni = kiconv_ja_jisx0201kana_to_ucs2[(ic1 - 0xa1)];
843 KICONV_JA_PUTU(uni);
844 } else if (KICONV_JA_ISSJKANJI1(ic1)) { /* 0208/UDC; 2bytes */
845 KICONV_JA_NGET(ic2);
846 if (KICONV_JA_ISSJKANJI2(ic2)) {
847 ic1 = kiconv_ja_sjtojis1[(ic1 - 0x80)];
848 if (ic2 >= 0x9f) {
849 ic1++;
850 }
851 ic2 = kiconv_ja_sjtojis2[ic2];
852 KICONV_JA_CNV_JISMS_TO_U2(id, uni, ic1, ic2);
853 if (uni == KICONV_JA_NODEST) {
854 index = ((ic1 - 0x21) * 94)
855 + (ic2 - 0x21);
856 uni = kiconv_ja_jisx0208_to_ucs2[index];
857 }
858 if (uni == KICONV_JA_REPLACE)
859 rv++;
860 KICONV_JA_PUTU(uni);
861 } else { /* 2nd byte check failed */
862 KICONV_JA_RETERROR(EILSEQ)
863 /* NOTREACHED */
864 }
865 } else if (KICONV_JA_ISSJSUPKANJI1(ic1)) { /* VDC, 2 bytes */
866 KICONV_JA_NGET(ic2);
867 if (KICONV_JA_ISSJKANJI2(ic2)) {
868 ic1 = kiconv_ja_sjtojis1[(ic1 - 0x80)];
869 if (ic2 >= 0x9f) {
870 ic1++;
871 }
872 index = ((ic1 - 0x21) * 94)
873 + (kiconv_ja_sjtojis2[ic2] - 0x21);
874 uni = kiconv_ja_jisx0212_to_ucs2[index];
875 if (uni == KICONV_JA_REPLACE)
876 rv++;
877 KICONV_JA_PUTU(uni);
878 } else { /* 2nd byte check failed */
879 KICONV_JA_RETERROR(EILSEQ)
880 }
881 } else if (KICONV_JA_ISSJIBM(ic1) || /* Extended IBM area */
882 KICONV_JA_ISSJNECIBM(ic1)) { /* NEC/IBM area */
883 /*
884 * We need a special treatment for each codes.
885 * By adding some offset number for them, we
886 * can process them as the same way of that of
887 * extended IBM chars.
888 */
889 KICONV_JA_NGET(ic2);
890 if (KICONV_JA_ISSJKANJI2(ic2)) {
891 ushort_t dest, upper, lower;
892 dest = (ic1 << 8) + ic2;
893 if ((0xed40 <= dest) && (dest <= 0xeffc)) {
894 KICONV_JA_REMAP_NEC(dest);
895 if (dest == 0xffff) {
896 KICONV_JA_RETERROR(EILSEQ)
897 }
898 }
899 /*
900 * XXX: 0xfa54 and 0xfa5b must be mapped
901 * to JIS0208 area. Therefore we
902 * have to do special treatment.
903 */
904 if ((dest == 0xfa54) || (dest == 0xfa5b)) {
905 if (dest == 0xfa54) {
906 upper = 0x22;
907 lower = 0x4c;
908 } else {
909 upper = 0x22;
910 lower = 0x68;
911 }
912 KICONV_JA_CNV_JISMS_TO_U2(id, uni,
913 upper, lower);
914 if (uni == KICONV_JA_NODEST) {
915 index = (uint_t)((upper - 0x21)
916 * 94 + (lower - 0x21));
917 uni = kiconv_ja_jisx0208_to_ucs2
918 [index];
919 }
920 if (uni == KICONV_JA_REPLACE)
921 rv++;
922 KICONV_JA_PUTU(uni);
923 } else {
924 dest = dest - 0xfa40 -
925 (((dest>>8) - 0xfa) * 0x40);
926 dest = kiconv_ja_sjtoibmext[dest];
927 if (dest == 0xffff) {
928 KICONV_JA_RETERROR(EILSEQ)
929 }
930 upper = (dest >> 8) & KICONV_JA_CMASK;
931 lower = dest & KICONV_JA_CMASK;
932 KICONV_JA_CNV_JIS0212MS_TO_U2(id, uni,
933 upper, lower);
934 if (uni == KICONV_JA_NODEST) {
935 index = (uint_t)((upper - 0x21)
936 * 94 + (lower - 0x21));
937 uni = kiconv_ja_jisx0212_to_ucs2
938 [index];
939 }
940 if (uni == KICONV_JA_REPLACE)
941 rv++;
942 KICONV_JA_PUTU(uni);
943 }
944 } else { /* 2nd byte check failed */
945 KICONV_JA_RETERROR(EILSEQ)
946 }
947 } else if ((0xeb <= ic1) && (ic1 <= 0xec)) {
948 /*
949 * Based on the draft convention of OSF-JVC CDEWG,
950 * characters in this area will be mapped to
951 * "CHIKAN-MOJI." (convertible character)
952 * We use U+FFFD in this case.
953 */
954 KICONV_JA_NGET(ic2);
955 if (KICONV_JA_ISSJKANJI2(ic2)) {
956 uni = 0xfffd;
957 KICONV_JA_PUTU(uni);
958 } else { /* 2nd byte check failed */
959 KICONV_JA_RETERROR(EILSEQ)
960 }
961 } else { /* 1st byte check failed */
962 KICONV_JA_RETERROR(EILSEQ)
963 }
964
965 /*
966 * One character successfully converted so update
967 * values outside of this function's stack.
968 */
969 *inbuf = (char *)ip;
970 *inbytesleft = ileft;
971 *outbuf = op;
972 *outbytesleft = oleft;
973 }
974
975 ret:
976 return (rv);
977 }
978
979 /*
980 * _kiconv_ja_lookuptbl()
981 * Return the index number if its index-ed number
982 * is the same as dest value.
983 */
984 static ushort_t
_kiconv_ja_lookuptbl(ushort_t dest)985 _kiconv_ja_lookuptbl(ushort_t dest)
986 {
987 ushort_t tmp;
988 int i;
989 int sz = (sizeof (kiconv_ja_sjtoibmext) /
990 sizeof (kiconv_ja_sjtoibmext[0]));
991
992 for (i = 0; i < sz; i++) {
993 tmp = (kiconv_ja_sjtoibmext[i] & 0x7f7f);
994 if (tmp == dest)
995 return ((i + 0xfa40 + ((i / 0xc0) * 0x40)));
996 }
997 return (0x3f);
998 }
999
1000 static size_t
_do_kiconv_to_sjis(void * kcd,char ** inbuf,size_t * inbytesleft,char ** outbuf,size_t * outbytesleft,int * errno)1001 _do_kiconv_to_sjis(void *kcd, char **inbuf, size_t *inbytesleft,
1002 char **outbuf, size_t *outbytesleft, int *errno)
1003 {
1004 uchar_t ic;
1005 size_t rv = 0;
1006 uint_t ucs4;
1007 ushort_t euc16;
1008 ushort_t dest;
1009
1010 uchar_t *ip;
1011 size_t ileft;
1012 char *op;
1013 size_t oleft;
1014 size_t read_len;
1015
1016 size_t id = ((kiconv_state_t)kcd)->id;
1017
1018 if ((inbuf == NULL) || (*inbuf == NULL)) {
1019 return (0);
1020 }
1021
1022 ip = (uchar_t *)*inbuf;
1023 ileft = *inbytesleft;
1024 op = *outbuf;
1025 oleft = *outbytesleft;
1026
1027 KICONV_JA_CHECK_UTF8_BOM(ip, ileft);
1028
1029 while (ileft != 0) {
1030 KICONV_JA_GETU(&ucs4, 0);
1031
1032 if (ucs4 > 0xffff) {
1033 /* non-BMP */
1034 KICONV_JA_NPUT(KICONV_JA_DEF_SINGLE);
1035 rv++;
1036 goto next;
1037 }
1038
1039 KICONV_JA_CNV_U2_TO_EUCJPMS(id, euc16, ucs4);
1040 if (euc16 == KICONV_JA_NODEST) {
1041 euc16 = kiconv_ja_ucs2_to_euc16((ushort_t)ucs4);
1042 }
1043 if (euc16 == KICONV_JA_NODEST) {
1044 KICONV_JA_NPUT(KICONV_JA_DEF_SINGLE);
1045 rv++;
1046 goto next;
1047 }
1048
1049 switch (euc16 & 0x8080) {
1050 case 0x0000: /* CS0 */
1051 if (KICONV_JA_ISC1CTRL((uchar_t)euc16)) {
1052 KICONV_JA_NPUT(KICONV_JA_DEF_SINGLE);
1053 rv++;
1054 } else {
1055 ic = (uchar_t)euc16;
1056 KICONV_JA_NPUT(ic);
1057 }
1058 break;
1059 case 0x8080: /* CS1 */
1060 ic = (ushort_t)((euc16 >> 8) & KICONV_JA_CMASK);
1061 KICONV_JA_NPUT(kiconv_ja_jis208tosj1[ic]);
1062 /*
1063 * for even number row (Ku), add 0x80 to
1064 * look latter half of kiconv_ja_jistosj2[] array
1065 */
1066 ic = (uchar_t)((euc16 & KICONV_JA_CMASK)
1067 + (((ic % 2) == 0) ? 0x80 : 0x00));
1068 KICONV_JA_NPUT(kiconv_ja_jistosj2[ic]);
1069 break;
1070 case 0x0080: /* CS2 */
1071 ic = (uchar_t)euc16;
1072 KICONV_JA_NPUT(ic);
1073 break;
1074 case 0x8000: /* CS3 */
1075 ic = (ushort_t)((euc16 >> 8) & KICONV_JA_CMASK);
1076 if (euc16 == 0xa271) {
1077 /* NUMERO SIGN */
1078 KICONV_JA_NPUT(0x87);
1079 KICONV_JA_NPUT(0x82);
1080 } else if (ic < 0x75) { /* check if IBM VDC */
1081 dest = _kiconv_ja_lookuptbl(euc16 & 0x7f7f);
1082 if (dest == 0xffff) {
1083 KICONV_JA_NPUT(KICONV_JA_DEF_SINGLE);
1084 } else {
1085 /* avoid putting NUL ('\0') */
1086 if (dest > 0xff) {
1087 KICONV_JA_NPUT(
1088 (dest >> 8) & 0xff);
1089 KICONV_JA_NPUT(dest & 0xff);
1090 } else {
1091 KICONV_JA_NPUT(dest & 0xff);
1092 }
1093 }
1094 } else {
1095 KICONV_JA_NPUT(kiconv_ja_jis212tosj1[ic]);
1096 /*
1097 * for even number row (Ku), add 0x80 to
1098 * look latter half of kiconv_ja_jistosj2[]
1099 */
1100 ic = (ushort_t)((euc16 & KICONV_JA_CMASK)
1101 + (((ic % 2) == 0) ? 0x80 : 0x00));
1102 KICONV_JA_NPUT(kiconv_ja_jistosj2[ic]);
1103 }
1104 break;
1105 }
1106
1107 next:
1108 /*
1109 * One character successfully converted so update
1110 * values outside of this function's stack.
1111 */
1112 *inbuf = (char *)ip;
1113 *inbytesleft = ileft;
1114 *outbuf = op;
1115 *outbytesleft = oleft;
1116 }
1117
1118 ret:
1119 return (rv);
1120 }
1121
1122 static size_t
_do_kiconvstr_fr_sjis(char * inbuf,size_t * inbytesleft,char * outbuf,size_t * outbytesleft,int flag,int * errno,uint8_t id)1123 _do_kiconvstr_fr_sjis(char *inbuf, size_t *inbytesleft, char *outbuf,
1124 size_t *outbytesleft, int flag, int *errno, uint8_t id)
1125 {
1126 uint_t uni; /* UTF-32 */
1127 uint_t index; /* index for table lookup */
1128 uchar_t ic1, ic2; /* 1st and 2nd bytes of a char */
1129 size_t rv = 0; /* return value of this function */
1130
1131 uchar_t *ip;
1132 size_t ileft;
1133 char *op;
1134 size_t oleft;
1135
1136 boolean_t do_not_ignore_null;
1137
1138 if ((inbuf == NULL) || (*inbuf == NULL)) {
1139 return (0);
1140 }
1141
1142 ip = (uchar_t *)inbuf;
1143 ileft = *inbytesleft;
1144 op = outbuf;
1145 oleft = *outbytesleft;
1146
1147 do_not_ignore_null = ((flag & KICONV_IGNORE_NULL) == 0);
1148
1149 while (ileft != 0) {
1150 KICONV_JA_NGET(ic1); /* get 1st byte */
1151
1152 if (KICONV_JA_ISASC((int)ic1)) { /* ASCII; 1 byte */
1153 if (ic1 == '\0' && do_not_ignore_null) {
1154 return (0);
1155 }
1156 uni = kiconv_ja_jisx0201roman_to_ucs2[ic1];
1157 KICONV_JA_PUTU(uni);
1158 } else if (KICONV_JA_ISSJKANA(ic1)) {
1159 /* JIS X 0201 Kana; 1 byte */
1160 uni = kiconv_ja_jisx0201kana_to_ucs2[(ic1 - 0xa1)];
1161 KICONV_JA_PUTU(uni);
1162 } else if (KICONV_JA_ISSJKANJI1(ic1)) {
1163 /* JIS X 0208 or UDC; 2 bytes */
1164 if (flag & KICONV_REPLACE_INVALID) {
1165 KICONV_JA_NGET_REP_FR_MB(ic2);
1166 } else {
1167 KICONV_JA_NGET(ic2);
1168 }
1169 if (KICONV_JA_ISSJKANJI2(ic2)) {
1170 ic1 = kiconv_ja_sjtojis1[(ic1 - 0x80)];
1171 if (ic2 >= 0x9f) {
1172 ic1++;
1173 }
1174 ic2 = kiconv_ja_sjtojis2[ic2];
1175 KICONV_JA_CNV_JISMS_TO_U2(id, uni, ic1, ic2);
1176 if (uni == KICONV_JA_NODEST) {
1177 index = ((ic1 - 0x21) * 94)
1178 + (ic2 - 0x21);
1179 uni = kiconv_ja_jisx0208_to_ucs2[index];
1180 }
1181 if (uni == KICONV_JA_REPLACE)
1182 rv++;
1183 KICONV_JA_PUTU(uni);
1184 } else { /* 2nd byte check failed */
1185 if (flag & KICONV_REPLACE_INVALID) {
1186 KICONV_JA_PUTU(KICONV_JA_REPLACE);
1187 rv++;
1188 } else {
1189 KICONV_JA_RETERROR(EILSEQ)
1190 }
1191 /* NOTREACHED */
1192 }
1193 } else if (KICONV_JA_ISSJSUPKANJI1(ic1)) { /* VDC, 2 bytes */
1194 if (flag & KICONV_REPLACE_INVALID) {
1195 KICONV_JA_NGET_REP_FR_MB(ic2);
1196 } else {
1197 KICONV_JA_NGET(ic2);
1198 }
1199 if (KICONV_JA_ISSJKANJI2(ic2)) {
1200 ic1 = kiconv_ja_sjtojis1[(ic1 - 0x80)];
1201 if (ic2 >= 0x9f) {
1202 ic1++;
1203 }
1204 index = ((ic1 - 0x21) * 94)
1205 + (kiconv_ja_sjtojis2[ic2] - 0x21);
1206 uni = kiconv_ja_jisx0212_to_ucs2[index];
1207 if (uni == KICONV_JA_REPLACE)
1208 rv++;
1209 KICONV_JA_PUTU(uni);
1210 } else { /* 2nd byte check failed */
1211 if (flag & KICONV_REPLACE_INVALID) {
1212 KICONV_JA_PUTU(KICONV_JA_REPLACE);
1213 rv++;
1214 } else {
1215 KICONV_JA_RETERROR(EILSEQ)
1216 }
1217 }
1218 } else if (KICONV_JA_ISSJIBM(ic1) || /* Extended IBM area */
1219 KICONV_JA_ISSJNECIBM(ic1)) { /* NEC/IBM area */
1220 /*
1221 * We need a special treatment for each codes.
1222 * By adding some offset number for them, we
1223 * can process them as the same way of that of
1224 * extended IBM chars.
1225 */
1226 if (flag & KICONV_REPLACE_INVALID) {
1227 KICONV_JA_NGET_REP_FR_MB(ic2);
1228 } else {
1229 KICONV_JA_NGET(ic2);
1230 }
1231 if (KICONV_JA_ISSJKANJI2(ic2)) {
1232 ushort_t dest, upper, lower;
1233 dest = (ic1 << 8) + ic2;
1234 if ((0xed40 <= dest) && (dest <= 0xeffc)) {
1235 KICONV_JA_REMAP_NEC(dest);
1236 if (dest == 0xffff) {
1237 if (flag &
1238 KICONV_REPLACE_INVALID) {
1239 KICONV_JA_PUTU(
1240 KICONV_JA_REPLACE);
1241 rv++;
1242 } else {
1243 KICONV_JA_RETERROR(
1244 EILSEQ)
1245 }
1246 }
1247 }
1248 /*
1249 * XXX: 0xfa54 and 0xfa5b must be mapped
1250 * to JIS0208 area. Therefore we
1251 * have to do special treatment.
1252 */
1253 if ((dest == 0xfa54) || (dest == 0xfa5b)) {
1254 if (dest == 0xfa54) {
1255 upper = 0x22;
1256 lower = 0x4c;
1257 } else {
1258 upper = 0x22;
1259 lower = 0x68;
1260 }
1261 KICONV_JA_CNV_JISMS_TO_U2(id, uni,
1262 upper, lower);
1263 if (uni == KICONV_JA_NODEST) {
1264 index = (uint_t)((upper - 0x21)
1265 * 94 + (lower - 0x21));
1266 uni = kiconv_ja_jisx0208_to_ucs2
1267 [index];
1268 }
1269 if (uni == KICONV_JA_REPLACE)
1270 rv++;
1271 KICONV_JA_PUTU(uni);
1272 } else {
1273 dest = dest - 0xfa40 -
1274 (((dest>>8) - 0xfa) * 0x40);
1275 dest = kiconv_ja_sjtoibmext[dest];
1276 if (dest == 0xffff) {
1277 if (flag &
1278 KICONV_REPLACE_INVALID) {
1279 KICONV_JA_PUTU(
1280 KICONV_JA_REPLACE);
1281 rv++;
1282 } else {
1283 KICONV_JA_RETERROR(
1284 EILSEQ)
1285 }
1286 }
1287 upper = (dest >> 8) & KICONV_JA_CMASK;
1288 lower = dest & KICONV_JA_CMASK;
1289 KICONV_JA_CNV_JIS0212MS_TO_U2(id, uni,
1290 upper, lower);
1291 if (uni == KICONV_JA_NODEST) {
1292 index = (uint_t)((upper - 0x21)
1293 * 94 + (lower - 0x21));
1294 uni = kiconv_ja_jisx0212_to_ucs2
1295 [index];
1296 }
1297 if (uni == KICONV_JA_REPLACE)
1298 rv++;
1299 KICONV_JA_PUTU(uni);
1300 }
1301 } else { /* 2nd byte check failed */
1302 if (flag & KICONV_REPLACE_INVALID) {
1303 KICONV_JA_PUTU(KICONV_JA_REPLACE);
1304 rv++;
1305 } else {
1306 KICONV_JA_RETERROR(EILSEQ)
1307 }
1308 }
1309 } else if ((0xeb <= ic1) && (ic1 <= 0xec)) {
1310 /*
1311 * Based on the draft convention of OSF-JVC CDEWG,
1312 * characters in this area will be mapped to
1313 * "CHIKAN-MOJI." (convertible character)
1314 * We use U+FFFD in this case.
1315 */
1316 if (flag & KICONV_REPLACE_INVALID) {
1317 KICONV_JA_NGET_REP_FR_MB(ic2);
1318 } else {
1319 KICONV_JA_NGET(ic2);
1320 }
1321 if (KICONV_JA_ISSJKANJI2(ic2)) {
1322 uni = 0xfffd;
1323 KICONV_JA_PUTU(uni);
1324 } else { /* 2nd byte check failed */
1325 if (flag & KICONV_REPLACE_INVALID) {
1326 KICONV_JA_PUTU(KICONV_JA_REPLACE);
1327 rv++;
1328 } else {
1329 KICONV_JA_RETERROR(EILSEQ)
1330 }
1331 }
1332 } else { /* 1st byte check failed */
1333 if (flag & KICONV_REPLACE_INVALID) {
1334 KICONV_JA_PUTU(KICONV_JA_REPLACE);
1335 rv++;
1336 } else {
1337 KICONV_JA_RETERROR(EILSEQ)
1338 }
1339 }
1340
1341 next:
1342 /*
1343 * One character successfully converted so update
1344 * values outside of this function's stack.
1345 */
1346 *inbytesleft = ileft;
1347 *outbytesleft = oleft;
1348 }
1349
1350 ret:
1351 return (rv);
1352 }
1353
1354 static size_t
_do_kiconvstr_to_sjis(char * inbuf,size_t * inbytesleft,char * outbuf,size_t * outbytesleft,int flag,int * errno,uint8_t id)1355 _do_kiconvstr_to_sjis(char *inbuf, size_t *inbytesleft, char *outbuf,
1356 size_t *outbytesleft, int flag, int *errno, uint8_t id)
1357 {
1358 uchar_t ic;
1359 size_t rv = 0;
1360 uint_t ucs4;
1361 ushort_t euc16;
1362 ushort_t dest;
1363
1364 uchar_t *ip;
1365 size_t ileft;
1366 char *op;
1367 size_t oleft;
1368 size_t read_len;
1369
1370 boolean_t do_not_ignore_null;
1371
1372 if ((inbuf == NULL) || (*inbuf == NULL)) {
1373 return (0);
1374 }
1375
1376 ip = (uchar_t *)inbuf;
1377 ileft = *inbytesleft;
1378 op = outbuf;
1379 oleft = *outbytesleft;
1380
1381 KICONV_JA_CHECK_UTF8_BOM_WITHOUT_STATE(ip, ileft);
1382
1383 do_not_ignore_null = ((flag & KICONV_IGNORE_NULL) == 0);
1384
1385 while (ileft != 0) {
1386 KICONV_JA_GETU(&ucs4, flag);
1387
1388 if (ucs4 == 0x0 && do_not_ignore_null) {
1389 return (0);
1390 }
1391
1392 if (ucs4 > 0xffff) {
1393 /* non-BMP */
1394 KICONV_JA_NPUT(KICONV_JA_DEF_SINGLE);
1395 rv++;
1396 goto next;
1397 }
1398
1399 KICONV_JA_CNV_U2_TO_EUCJPMS(id, euc16, ucs4);
1400 if (euc16 == KICONV_JA_NODEST) {
1401 euc16 = kiconv_ja_ucs2_to_euc16((ushort_t)ucs4);
1402 }
1403 if (euc16 == KICONV_JA_NODEST) {
1404 KICONV_JA_NPUT(KICONV_JA_DEF_SINGLE);
1405 rv++;
1406 goto next;
1407 }
1408
1409 switch (euc16 & 0x8080) {
1410 case 0x0000: /* CS0 */
1411 if (KICONV_JA_ISC1CTRL((uchar_t)euc16)) {
1412 KICONV_JA_NPUT(KICONV_JA_DEF_SINGLE);
1413 rv++;
1414 } else {
1415 ic = (uchar_t)euc16;
1416 KICONV_JA_NPUT(ic);
1417 }
1418 break;
1419 case 0x8080: /* CS1 */
1420 ic = (ushort_t)((euc16 >> 8) & KICONV_JA_CMASK);
1421 KICONV_JA_NPUT(kiconv_ja_jis208tosj1[ic]);
1422 /*
1423 * for even number row (Ku), add 0x80 to
1424 * look latter half of kiconv_ja_jistosj2[] array
1425 */
1426 ic = (uchar_t)((euc16 & KICONV_JA_CMASK)
1427 + (((ic % 2) == 0) ? 0x80 : 0x00));
1428 KICONV_JA_NPUT(kiconv_ja_jistosj2[ic]);
1429 break;
1430 case 0x0080: /* CS2 */
1431 ic = (uchar_t)euc16;
1432 KICONV_JA_NPUT(ic);
1433 break;
1434 case 0x8000: /* CS3 */
1435 ic = (ushort_t)((euc16 >> 8) & KICONV_JA_CMASK);
1436 if (euc16 == 0xa271) {
1437 /* NUMERO SIGN */
1438 KICONV_JA_NPUT(0x87);
1439 KICONV_JA_NPUT(0x82);
1440 } else if (ic < 0x75) { /* check if IBM VDC */
1441 dest = _kiconv_ja_lookuptbl(euc16 & 0x7f7f);
1442 if (dest == 0xffff) {
1443 KICONV_JA_NPUT(KICONV_JA_DEF_SINGLE);
1444 } else {
1445 /* avoid putting NUL ('\0') */
1446 if (dest > 0xff) {
1447 KICONV_JA_NPUT(
1448 (dest >> 8) & 0xff);
1449 KICONV_JA_NPUT(dest & 0xff);
1450 } else {
1451 KICONV_JA_NPUT(dest & 0xff);
1452 }
1453 }
1454 } else {
1455 KICONV_JA_NPUT(kiconv_ja_jis212tosj1[ic]);
1456 /*
1457 * for even number row (Ku), add 0x80 to
1458 * look latter half of kiconv_ja_jistosj2[]
1459 */
1460 ic = (ushort_t)((euc16 & KICONV_JA_CMASK)
1461 + (((ic % 2) == 0) ? 0x80 : 0x00));
1462 KICONV_JA_NPUT(kiconv_ja_jistosj2[ic]);
1463 }
1464 break;
1465 }
1466
1467 next:
1468 /*
1469 * One character successfully converted so update
1470 * values outside of this function's stack.
1471 */
1472 *inbytesleft = ileft;
1473 *outbytesleft = oleft;
1474 }
1475
1476 ret:
1477 return (rv);
1478 }
1479
1480 static size_t
kiconv_fr_sjis(void * kcd,char ** inbuf,size_t * inbytesleft,char ** outbuf,size_t * outbytesleft,int * errno)1481 kiconv_fr_sjis(void *kcd, char **inbuf, size_t *inbytesleft,
1482 char **outbuf, size_t *outbytesleft, int *errno)
1483 {
1484 if (! kcd || kcd == (void *)-1) {
1485 *errno = EBADF;
1486 return ((size_t)-1);
1487 }
1488
1489 return (_do_kiconv_fr_sjis(kcd, inbuf, inbytesleft,
1490 outbuf, outbytesleft, errno));
1491 }
1492
1493 static size_t
kiconv_to_sjis(void * kcd,char ** inbuf,size_t * inbytesleft,char ** outbuf,size_t * outbytesleft,int * errno)1494 kiconv_to_sjis(void *kcd, char **inbuf, size_t *inbytesleft,
1495 char **outbuf, size_t *outbytesleft, int *errno)
1496 {
1497 if (! kcd || kcd == (void *)-1) {
1498 *errno = EBADF;
1499 return ((size_t)-1);
1500 }
1501
1502 return (_do_kiconv_to_sjis(kcd, inbuf, inbytesleft,
1503 outbuf, outbytesleft, errno));
1504 }
1505
1506 static size_t
kiconvstr_fr_sjis(char * inbuf,size_t * inbytesleft,char * outbuf,size_t * outbytesleft,int flag,int * errno)1507 kiconvstr_fr_sjis(char *inbuf, size_t *inbytesleft, char *outbuf,
1508 size_t *outbytesleft, int flag, int *errno)
1509 {
1510 return (_do_kiconvstr_fr_sjis(inbuf, inbytesleft, outbuf,
1511 outbytesleft, flag, errno, KICONV_JA_TBLID_SJIS));
1512 }
1513
1514 static size_t
kiconvstr_to_sjis(char * inbuf,size_t * inbytesleft,char * outbuf,size_t * outbytesleft,int flag,int * errno)1515 kiconvstr_to_sjis(char *inbuf, size_t *inbytesleft, char *outbuf,
1516 size_t *outbytesleft, int flag, int *errno)
1517 {
1518 return (_do_kiconvstr_to_sjis(inbuf, inbytesleft, outbuf,
1519 outbytesleft, flag, errno, KICONV_JA_TBLID_SJIS));
1520 }
1521
1522 static size_t
kiconvstr_fr_cp932(char * inbuf,size_t * inbytesleft,char * outbuf,size_t * outbytesleft,int flag,int * errno)1523 kiconvstr_fr_cp932(char *inbuf, size_t *inbytesleft, char *outbuf,
1524 size_t *outbytesleft, int flag, int *errno)
1525 {
1526 return (_do_kiconvstr_fr_sjis(inbuf, inbytesleft, outbuf,
1527 outbytesleft, flag, errno, KICONV_JA_TBLID_CP932));
1528 }
1529
1530 static size_t
kiconvstr_to_cp932(char * inbuf,size_t * inbytesleft,char * outbuf,size_t * outbytesleft,int flag,int * errno)1531 kiconvstr_to_cp932(char *inbuf, size_t *inbytesleft, char *outbuf,
1532 size_t *outbytesleft, int flag, int *errno)
1533 {
1534 return (_do_kiconvstr_to_sjis(inbuf, inbytesleft, outbuf,
1535 outbytesleft, flag, errno, KICONV_JA_TBLID_CP932));
1536 }
1537
1538 static kiconv_ops_t kiconv_ja_ops_tbl[] = {
1539 {
1540 "eucjp", "utf-8", open_eucjp,
1541 kiconv_to_eucjp, close_ja, kiconvstr_to_eucjp
1542 },
1543 {
1544 "utf-8", "eucjp", open_eucjp,
1545 kiconv_fr_eucjp, close_ja, kiconvstr_fr_eucjp
1546 },
1547 {
1548 "eucjpms", "utf-8", open_eucjpms,
1549 kiconv_to_eucjp, close_ja, kiconvstr_to_eucjpms
1550 },
1551 {
1552 "utf-8", "eucjpms", open_eucjpms,
1553 kiconv_fr_eucjp, close_ja, kiconvstr_fr_eucjpms
1554 },
1555 {
1556 "sjis", "utf-8", open_sjis,
1557 kiconv_to_sjis, close_ja, kiconvstr_to_sjis
1558 },
1559 {
1560 "utf-8", "sjis", open_sjis,
1561 kiconv_fr_sjis, close_ja, kiconvstr_fr_sjis
1562 },
1563 {
1564 "cp932", "utf-8", open_cp932,
1565 kiconv_to_sjis, close_ja, kiconvstr_to_cp932
1566 },
1567 {
1568 "utf-8", "cp932", open_cp932,
1569 kiconv_fr_sjis, close_ja, kiconvstr_fr_cp932
1570 }
1571 };
1572
1573 static char *kiconv_ja_aliases[] = {"932", "shiftjis", "pck"};
1574 static char *kiconv_ja_canonicals[] = {"cp932", "sjis", "sjis"};
1575
1576 #define KICONV_JA_MAX_JA_OPS \
1577 (sizeof (kiconv_ja_ops_tbl) / sizeof (kiconv_ops_t))
1578 #define KICONV_JA_MAX_JA_ALIAS \
1579 (sizeof (kiconv_ja_aliases) / sizeof (char *))
1580
1581 static kiconv_module_info_t kiconv_ja_info = {
1582 "kiconv_ja", /* module name */
1583 KICONV_JA_MAX_JA_OPS, /* number of conversion in kiconv_ja */
1584 kiconv_ja_ops_tbl, /* kiconv_ja ops table */
1585 KICONV_JA_MAX_JA_ALIAS, /* number of alias in kiconv_ja */
1586 kiconv_ja_aliases, /* kiconv_ja aliases */
1587 kiconv_ja_canonicals, /* kiconv_ja canonicals */
1588 0
1589 };
1590
1591 static struct modlkiconv modlkiconv_ja = {
1592 &mod_kiconvops,
1593 "kiconv module for Japanese",
1594 &kiconv_ja_info
1595 };
1596
1597 static struct modlinkage modlinkage = {
1598 MODREV_1,
1599 (void *)&modlkiconv_ja,
1600 NULL
1601 };
1602
1603 int
_init(void)1604 _init(void)
1605 {
1606 int err;
1607
1608 err = mod_install(&modlinkage);
1609 if (err)
1610 cmn_err(CE_WARN, "kiconv_ja: failed to load kernel module");
1611
1612 return (err);
1613 }
1614
1615 int
_info(struct modinfo * modinfop)1616 _info(struct modinfo *modinfop)
1617 {
1618 return (mod_info(&modlinkage, modinfop));
1619 }
1620
1621 int
_fini(void)1622 _fini(void)
1623 {
1624 int err;
1625
1626 /*
1627 * If this module is being used, then, we cannot remove the module.
1628 * The following checking will catch pretty much all usual cases.
1629 *
1630 * Any remaining will be catached by the kiconv_unregister_module()
1631 * during mod_remove() at below.
1632 */
1633 if (kiconv_module_ref_count(KICONV_MODULE_ID_JA))
1634 return (EBUSY);
1635
1636 err = mod_remove(&modlinkage);
1637 if (err)
1638 cmn_err(CE_WARN, "kiconv_ja: failed to remove kernel module");
1639
1640 return (err);
1641 }
1642