1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
24 * Use is subject to license terms.
25 */
26
27 #include <sys/types.h>
28 #include <sys/param.h>
29 #include <sys/sysmacros.h>
30 #include <sys/systm.h>
31 #include <sys/debug.h>
32 #include <sys/kmem.h>
33 #include <sys/sunddi.h>
34 #include <sys/byteorder.h>
35 #include <sys/errno.h>
36 #include <sys/euc.h>
37 #include <sys/modctl.h>
38 #include <sys/kiconv.h>
39
40 #include <sys/kiconv_ja.h>
41 #include <sys/kiconv_ja_jis_to_unicode.h>
42 #include <sys/kiconv_ja_unicode_to_jis.h>
43
44 /*
45 * The following vector shows remaining bytes in a UTF-8 character.
46 * Index will be the first byte of the character. This is defined in
47 * u8_textprep.c.
48 */
49 extern const int8_t u8_number_of_bytes[];
50
51 /*
52 * The following is a vector of bit-masks to get used bits in
53 * the first byte of a UTF-8 character. Index is remaining bytes at above of
54 * the character. This is defined in uconv.c.
55 */
56 extern const uchar_t u8_masks_tbl[];
57
58 /*
59 * The following two vectors are to provide valid minimum and
60 * maximum values for the 2'nd byte of a multibyte UTF-8 character for
61 * better illegal sequence checking. The index value must be the value of
62 * the first byte of the UTF-8 character. These are defined in u8_textprep.c.
63 */
64 extern const uint8_t u8_valid_min_2nd_byte[];
65 extern const uint8_t u8_valid_max_2nd_byte[];
66
67 static kiconv_ja_euc16_t
kiconv_ja_ucs2_to_euc16(kiconv_ja_ucs2_t ucs2)68 kiconv_ja_ucs2_to_euc16(kiconv_ja_ucs2_t ucs2)
69 {
70 const kiconv_ja_euc16_t *p;
71
72 if ((p = kiconv_ja_ucs2_to_euc16_index[ucs2 >> 8]) != NULL)
73 return (p[ucs2 & 0xff]);
74
75 return (KICONV_JA_NODEST);
76 }
77
78 static size_t
utf8_ucs(uint_t * p,uchar_t ** pip,size_t * pileft,int * errno)79 utf8_ucs(uint_t *p, uchar_t **pip, size_t *pileft, int *errno)
80 {
81 uint_t l; /* to be copied to *p on successful return */
82 uchar_t ic; /* current byte */
83 uchar_t ic1; /* 1st byte */
84 uchar_t *ip = *pip; /* next byte to read */
85 size_t ileft = *pileft; /* number of bytes available */
86 size_t rv = 0; /* return value of this function */
87 int remaining_bytes;
88 int u8_size;
89
90 KICONV_JA_NGET(ic1); /* read 1st byte */
91
92 if (ic1 < 0x80) {
93 /* successfully converted */
94 *p = (uint_t)ic1;
95 goto ret;
96 }
97
98 u8_size = u8_number_of_bytes[ic1];
99 if (u8_size == U8_ILLEGAL_CHAR) {
100 KICONV_JA_RETERROR(EILSEQ)
101 } else if (u8_size == U8_OUT_OF_RANGE_CHAR) {
102 KICONV_JA_RETERROR(ERANGE)
103 }
104
105 remaining_bytes = u8_size - 1;
106 if (remaining_bytes != 0) {
107 l = ic1 & u8_masks_tbl[remaining_bytes];
108
109 for (; remaining_bytes > 0; remaining_bytes--) {
110 KICONV_JA_NGET(ic);
111 if (ic1 != 0U) {
112 if ((ic < u8_valid_min_2nd_byte[ic1]) ||
113 (ic > u8_valid_max_2nd_byte[ic1])) {
114 KICONV_JA_RETERROR(EILSEQ)
115 }
116 ic1 = 0U; /* 2nd byte check done */
117 } else {
118 if ((ic < 0x80) || (ic > 0xbf)) {
119 KICONV_JA_RETERROR(EILSEQ)
120 }
121 }
122 l = (l << 6) | (ic & 0x3f);
123 }
124
125 /* successfully converted */
126 *p = l;
127 } else {
128 KICONV_JA_RETERROR(EILSEQ)
129 }
130
131 ret:
132 if (rv == 0) {
133 /*
134 * Update rv, *pip, and *pileft on successfule return.
135 */
136 rv = *pileft - ileft;
137 *pip = ip;
138 *pileft = ileft;
139 }
140
141 return (rv);
142 }
143
144 static size_t
utf8_ucs_replace(uint_t * p,uchar_t ** pip,size_t * pileft,size_t * repnum)145 utf8_ucs_replace(uint_t *p, uchar_t **pip, size_t *pileft, size_t *repnum)
146 {
147 uint_t l; /* to be copied to *p on successful return */
148 uchar_t ic; /* current byte */
149 uchar_t ic1; /* 1st byte */
150 uchar_t *ip = *pip; /* next byte to read */
151 size_t ileft = *pileft; /* number of bytes available */
152 size_t rv = 0; /* return value of this function */
153 int remaining_bytes;
154 int u8_size;
155
156 KICONV_JA_NGET_REP_TO_MB(ic1); /* read 1st byte */
157
158 if (ic1 < 0x80) {
159 /* successfully converted */
160 l = (uint_t)ic1;
161 goto ret;
162 }
163
164 u8_size = u8_number_of_bytes[ic1];
165 if (u8_size == U8_ILLEGAL_CHAR || u8_size == U8_OUT_OF_RANGE_CHAR) {
166 l = KICONV_JA_DEF_SINGLE;
167 (*repnum)++;
168 goto ret;
169 }
170
171 remaining_bytes = u8_size - 1;
172
173 if (remaining_bytes != 0) {
174 l = ic1 & u8_masks_tbl[remaining_bytes];
175
176 for (; remaining_bytes > 0; remaining_bytes--) {
177 KICONV_JA_NGET_REP_TO_MB(ic);
178 if (ic1 != 0U) {
179 if ((ic < u8_valid_min_2nd_byte[ic1]) ||
180 (ic > u8_valid_max_2nd_byte[ic1])) {
181 l = KICONV_JA_DEF_SINGLE;
182 (*repnum)++;
183 ileft -= (remaining_bytes - 1);
184 ip += (remaining_bytes - 1);
185 break;
186 }
187 ic1 = 0U; /* 2nd byte check done */
188 } else {
189 if ((ic < 0x80) || (ic > 0xbf)) {
190 l = KICONV_JA_DEF_SINGLE;
191 (*repnum)++;
192 ileft -= (remaining_bytes - 1);
193 ip += (remaining_bytes - 1);
194 break;
195 }
196 }
197 l = (l << 6) | (ic & 0x3f);
198 }
199 } else {
200 l = KICONV_JA_DEF_SINGLE;
201 (*repnum)++;
202 }
203
204 ret:
205 /* successfully converted */
206 *p = l;
207 rv = *pileft - ileft;
208
209 *pip = ip;
210 *pileft = ileft;
211
212 return (rv);
213 }
214
215 static size_t /* return #bytes read, or -1 */
read_unicode(uint_t * p,uchar_t ** pip,size_t * pileft,int * errno,int flag,size_t * rv)216 read_unicode(
217 uint_t *p, /* point variable to store UTF-32 */
218 uchar_t **pip, /* point pointer to input buf */
219 size_t *pileft, /* point #bytes left in input buf */
220 int *errno, /* point variable to errno */
221 int flag, /* kiconvstr flag */
222 size_t *rv) /* point return valuse */
223 {
224 if (flag & KICONV_REPLACE_INVALID)
225 return (utf8_ucs_replace(p, pip, pileft, rv));
226 else
227 return (utf8_ucs(p, pip, pileft, errno));
228 }
229
230 static size_t
write_unicode(uint_t u32,char ** pop,size_t * poleft,int * errno)231 write_unicode(
232 uint_t u32, /* UTF-32 to write */
233 char **pop, /* point pointer to output buf */
234 size_t *poleft, /* point #bytes left in output buf */
235 int *errno) /* point variable to errno */
236 {
237 char *op = *pop;
238 size_t oleft = *poleft;
239 size_t rv = 0; /* return value */
240
241 if (u32 <= 0x7f) {
242 KICONV_JA_NPUT((uchar_t)(u32));
243 rv = 1;
244 } else if (u32 <= 0x7ff) {
245 KICONV_JA_NPUT((uchar_t)((((u32)>>6) & 0x1f) | 0xc0));
246 KICONV_JA_NPUT((uchar_t)(((u32) & 0x3f) | 0x80));
247 rv = 2;
248 } else if ((u32 >= 0xd800) && (u32 <= 0xdfff)) {
249 KICONV_JA_RETERROR(EILSEQ)
250 } else if (u32 <= 0xffff) {
251 KICONV_JA_NPUT((uchar_t)((((u32)>>12) & 0x0f) | 0xe0));
252 KICONV_JA_NPUT((uchar_t)((((u32)>>6) & 0x3f) | 0x80));
253 KICONV_JA_NPUT((uchar_t)(((u32) & 0x3f) | 0x80));
254 rv = 3;
255 } else if (u32 <= 0x10ffff) {
256 KICONV_JA_NPUT((uchar_t)((((u32)>>18) & 0x07) | 0xf0));
257 KICONV_JA_NPUT((uchar_t)((((u32)>>12) & 0x3f) | 0x80));
258 KICONV_JA_NPUT((uchar_t)((((u32)>>6) & 0x3f) | 0x80));
259 KICONV_JA_NPUT((uchar_t)(((u32) & 0x3f) | 0x80));
260 rv = 4;
261 } else {
262 KICONV_JA_RETERROR(EILSEQ)
263 }
264
265 ret:
266 if (rv != (size_t)-1) {
267 /* update *pop and *poleft only on successful return */
268 *pop = op;
269 *poleft = oleft;
270 }
271
272 return (rv);
273 }
274
275 static void *
_kiconv_ja_open_unicode(uint8_t id)276 _kiconv_ja_open_unicode(uint8_t id)
277 {
278 kiconv_state_t kcd;
279
280 kcd = (kiconv_state_t)kmem_alloc(sizeof (kiconv_state_data_t),
281 KM_SLEEP);
282 kcd->id = id;
283 kcd->bom_processed = 0;
284 return ((void *)kcd);
285 }
286
287 static void *
open_eucjp(void)288 open_eucjp(void)
289 {
290 return (_kiconv_ja_open_unicode(KICONV_JA_TBLID_EUCJP));
291 }
292
293 static void *
open_eucjpms(void)294 open_eucjpms(void)
295 {
296 return (_kiconv_ja_open_unicode(KICONV_JA_TBLID_EUCJP_MS));
297 }
298
299 static void *
open_sjis(void)300 open_sjis(void)
301 {
302 return (_kiconv_ja_open_unicode(KICONV_JA_TBLID_SJIS));
303 }
304
305 static void *
open_cp932(void)306 open_cp932(void)
307 {
308 return (_kiconv_ja_open_unicode(KICONV_JA_TBLID_CP932));
309 }
310
311 int
close_ja(void * kcd)312 close_ja(void *kcd)
313 {
314 if (! kcd || kcd == (void *)-1)
315 return (EBADF);
316
317 kmem_free(kcd, sizeof (kiconv_state_data_t));
318
319 return (0);
320 }
321
322 static size_t
_do_kiconv_fr_eucjp(void * kcd,char ** inbuf,size_t * inbytesleft,char ** outbuf,size_t * outbytesleft,int * errno)323 _do_kiconv_fr_eucjp(void *kcd, char **inbuf, size_t *inbytesleft,
324 char **outbuf, size_t *outbytesleft, int *errno)
325 {
326 uint_t u32; /* UTF-32 */
327 uint_t index; /* index for table lookup */
328 uchar_t ic1, ic2, ic3; /* 1st, 2nd, and 3rd bytes of a char */
329 size_t rv = 0; /* return value of this function */
330
331 uchar_t *ip;
332 size_t ileft;
333 char *op;
334 size_t oleft;
335 size_t id = ((kiconv_state_t)kcd)->id;
336
337 if ((inbuf == NULL) || (*inbuf == NULL)) {
338 return (0);
339 }
340
341 ip = (uchar_t *)*inbuf;
342 ileft = *inbytesleft;
343 op = *outbuf;
344 oleft = *outbytesleft;
345
346 while (ileft != 0) {
347 KICONV_JA_NGET(ic1); /* get 1st byte */
348
349 if (KICONV_JA_ISASC(ic1)) { /* ASCII; 1 byte */
350 u32 = kiconv_ja_jisx0201roman_to_ucs2[ic1];
351 KICONV_JA_PUTU(u32);
352 } else if (KICONV_JA_ISCS1(ic1)) { /* 0208 or UDC; 2 bytes */
353 KICONV_JA_NGET(ic2);
354 if (KICONV_JA_ISCS1(ic2)) { /* 2nd byte check passed */
355 ic1 &= KICONV_JA_CMASK;
356 ic2 &= KICONV_JA_CMASK;
357 KICONV_JA_CNV_JISMS_TO_U2(id, u32, ic1, ic2);
358 if (u32 == KICONV_JA_NODEST) {
359 index = (ic1 - 0x21) * 94 + ic2 - 0x21;
360 u32 = kiconv_ja_jisx0208_to_ucs2[index];
361 }
362 if (u32 == KICONV_JA_REPLACE)
363 rv++;
364 KICONV_JA_PUTU(u32);
365 } else { /* 2nd byte check failed */
366 KICONV_JA_RETERROR(EILSEQ)
367 }
368 } else if (ic1 == SS2) { /* JIS X 0201 Kana; 2 bytes */
369 KICONV_JA_NGET(ic2);
370 if (KICONV_JA_ISCS2(ic2)) { /* 2nd byte check passed */
371 index = (ic2 - 0xa1);
372 u32 = kiconv_ja_jisx0201kana_to_ucs2[index];
373 KICONV_JA_PUTU(u32);
374 } else { /* 2nd byte check failed */
375 KICONV_JA_RETERROR(EILSEQ)
376 }
377 } else if (ic1 == SS3) { /* JIS X 0212 or UDC; 3 bytes */
378 KICONV_JA_NGET(ic2);
379 if (KICONV_JA_ISCS3(ic2)) { /* 2nd byte check passed */
380 KICONV_JA_NGET(ic3);
381 if (KICONV_JA_ISCS3(ic3)) {
382 /* 3rd byte check passed */
383 ic2 &= KICONV_JA_CMASK;
384 ic3 &= KICONV_JA_CMASK;
385 KICONV_JA_CNV_JIS0212MS_TO_U2(id, u32,
386 ic2, ic3);
387 if (u32 == KICONV_JA_NODEST) {
388 index = ((ic2 - 0x21) * 94 +
389 (ic3 - 0x21));
390 u32 = kiconv_ja_jisx0212_to_ucs2
391 [index];
392 }
393 if (u32 == KICONV_JA_REPLACE)
394 rv++;
395 KICONV_JA_PUTU(u32);
396 } else { /* 3rd byte check failed */
397 KICONV_JA_RETERROR(EILSEQ)
398 }
399 } else { /* 2nd byte check failed */
400 KICONV_JA_RETERROR(EILSEQ)
401 }
402 } else if (KICONV_JA_ISC1CTRLEUC(ic1)) {
403 /* C1 control; 1 byte */
404 u32 = ic1;
405 KICONV_JA_PUTU(u32);
406 } else { /* 1st byte check failed */
407 KICONV_JA_RETERROR(EILSEQ)
408 }
409
410 /*
411 * One character successfully converted so update
412 * values outside of this function's stack.
413 */
414 *inbuf = (char *)ip;
415 *inbytesleft = ileft;
416 *outbuf = op;
417 *outbytesleft = oleft;
418 }
419
420 ret:
421 return (rv);
422 }
423
424 static size_t
_do_kiconv_to_eucjp(void * kcd,char ** inbuf,size_t * inbytesleft,char ** outbuf,size_t * outbytesleft,int * errno)425 _do_kiconv_to_eucjp(void *kcd, char **inbuf, size_t *inbytesleft,
426 char **outbuf, size_t *outbytesleft, int *errno)
427 {
428 uchar_t ic;
429 size_t rv = 0;
430 uint_t ucs4;
431 ushort_t euc16;
432
433 uchar_t *ip;
434 size_t ileft;
435 char *op;
436 size_t oleft;
437 size_t read_len;
438
439 size_t id = ((kiconv_state_t)kcd)->id;
440
441 if ((inbuf == NULL) || (*inbuf == NULL)) {
442 return (0);
443 }
444
445 ip = (uchar_t *)*inbuf;
446 ileft = *inbytesleft;
447 op = *outbuf;
448 oleft = *outbytesleft;
449
450 KICONV_JA_CHECK_UTF8_BOM(ip, ileft);
451
452 while (ileft != 0) {
453 KICONV_JA_GETU(&ucs4, 0);
454
455 if (ucs4 > 0xffff) {
456 /* non-BMP */
457 KICONV_JA_NPUT(KICONV_JA_DEF_SINGLE);
458 rv++;
459 goto next;
460 }
461
462 KICONV_JA_CNV_U2_TO_EUCJPMS(id, euc16, ucs4);
463 if (euc16 == KICONV_JA_NODEST) {
464 euc16 = kiconv_ja_ucs2_to_euc16((ushort_t)ucs4);
465 }
466 if (euc16 == KICONV_JA_NODEST) {
467 KICONV_JA_NPUT(KICONV_JA_DEF_SINGLE);
468 rv++;
469 goto next;
470 }
471
472 switch (euc16 & 0x8080) {
473 case 0x0000: /* CS0 */
474 ic = (uchar_t)euc16;
475 KICONV_JA_NPUT(ic);
476 break;
477 case 0x8080: /* CS1 */
478 ic = (uchar_t)((euc16 >> 8) & 0xff);
479 KICONV_JA_NPUT(ic);
480 ic = (uchar_t)(euc16 & 0xff);
481 KICONV_JA_NPUT(ic);
482 break;
483 case 0x0080: /* CS2 */
484 KICONV_JA_NPUT(SS2);
485 ic = (uchar_t)euc16;
486 KICONV_JA_NPUT(ic);
487 break;
488 case 0x8000: /* CS3 */
489 KICONV_JA_NPUT(SS3);
490 ic = (uchar_t)((euc16 >> 8) & 0xff);
491 KICONV_JA_NPUT(ic);
492 ic = (uchar_t)(euc16 & KICONV_JA_CMASK);
493 KICONV_JA_NPUT(ic | KICONV_JA_CMSB);
494 break;
495 }
496 next:
497 /*
498 * One character successfully converted so update
499 * values outside of this function's stack.
500 */
501 *inbuf = (char *)ip;
502 *inbytesleft = ileft;
503 *outbuf = op;
504 *outbytesleft = oleft;
505 }
506
507 ret:
508 return (rv);
509 }
510
511 static size_t
_do_kiconvstr_fr_eucjp(char * inbuf,size_t * inbytesleft,char * outbuf,size_t * outbytesleft,int flag,int * errno,uint8_t id)512 _do_kiconvstr_fr_eucjp(char *inbuf, size_t *inbytesleft, char *outbuf,
513 size_t *outbytesleft, int flag, int *errno, uint8_t id)
514 {
515 uint_t u32; /* UTF-32 */
516 uint_t index; /* index for table lookup */
517 uchar_t ic1, ic2, ic3; /* 1st, 2nd, and 3rd bytes of a char */
518 size_t rv = 0; /* return value of this function */
519
520 uchar_t *ip;
521 size_t ileft;
522 char *op;
523 size_t oleft;
524
525 boolean_t do_not_ignore_null;
526
527 if ((inbuf == NULL) || (*inbuf == '\0')) {
528 return (0);
529 }
530
531 ip = (uchar_t *)inbuf;
532 ileft = *inbytesleft;
533 op = outbuf;
534 oleft = *outbytesleft;
535
536 do_not_ignore_null = ((flag & KICONV_IGNORE_NULL) == 0);
537
538 while (ileft != 0) {
539 KICONV_JA_NGET(ic1); /* get 1st byte */
540
541 if (KICONV_JA_ISASC(ic1)) { /* ASCII; 1 byte */
542 if (ic1 == '\0' && do_not_ignore_null) {
543 return (0);
544 }
545 u32 = kiconv_ja_jisx0201roman_to_ucs2[ic1];
546 KICONV_JA_PUTU(u32);
547 } else if (KICONV_JA_ISCS1(ic1)) { /* 0208 or UDC; 2 bytes */
548 if (flag & KICONV_REPLACE_INVALID) {
549 KICONV_JA_NGET_REP_FR_MB(ic2);
550 } else {
551 KICONV_JA_NGET(ic2);
552 }
553 if (KICONV_JA_ISCS1(ic2)) { /* 2nd byte check passed */
554 ic1 &= KICONV_JA_CMASK;
555 ic2 &= KICONV_JA_CMASK;
556 KICONV_JA_CNV_JISMS_TO_U2(id, u32, ic1, ic2);
557 if (u32 == KICONV_JA_NODEST) {
558 index = (ic1 - 0x21) * 94 + ic2 - 0x21;
559 u32 = kiconv_ja_jisx0208_to_ucs2[index];
560 }
561 if (u32 == KICONV_JA_REPLACE)
562 rv++;
563 KICONV_JA_PUTU(u32);
564 } else { /* 2nd byte check failed */
565 if (flag & KICONV_REPLACE_INVALID) {
566 KICONV_JA_PUTU(KICONV_JA_REPLACE);
567 rv++;
568 } else {
569 KICONV_JA_RETERROR(EILSEQ)
570 }
571 }
572 } else if (ic1 == SS2) { /* JIS X 0201 Kana; 2bytes */
573 if (flag & KICONV_REPLACE_INVALID) {
574 KICONV_JA_NGET_REP_FR_MB(ic2);
575 } else {
576 KICONV_JA_NGET(ic2);
577 }
578 if (KICONV_JA_ISCS2(ic2)) { /* 2nd byte check passed */
579 index = (ic2 - 0xa1);
580 u32 = kiconv_ja_jisx0201kana_to_ucs2[index];
581 KICONV_JA_PUTU(u32);
582 } else { /* 2nd byte check failed */
583 if (flag & KICONV_REPLACE_INVALID) {
584 KICONV_JA_PUTU(KICONV_JA_REPLACE);
585 rv++;
586 } else {
587 KICONV_JA_RETERROR(EILSEQ)
588 }
589 }
590 } else if (ic1 == SS3) { /* JIS X 0212 or UDC; 3 bytes */
591 if (flag & KICONV_REPLACE_INVALID) {
592 KICONV_JA_NGET_REP_FR_MB(ic2);
593 } else {
594 KICONV_JA_NGET(ic2);
595 }
596 if (KICONV_JA_ISCS3(ic2)) { /* 2nd byte check passed */
597 if (flag & KICONV_REPLACE_INVALID) {
598 KICONV_JA_NGET_REP_FR_MB(ic3);
599 } else {
600 KICONV_JA_NGET(ic3);
601 }
602 if (KICONV_JA_ISCS3(ic3)) {
603 /* 3rd byte check passed */
604 ic2 &= KICONV_JA_CMASK;
605 ic3 &= KICONV_JA_CMASK;
606 KICONV_JA_CNV_JIS0212MS_TO_U2(id, u32,
607 ic2, ic3);
608 if (u32 == KICONV_JA_NODEST) {
609 index = ((ic2 - 0x21) * 94 +
610 (ic3 - 0x21));
611 u32 = kiconv_ja_jisx0212_to_ucs2
612 [index];
613 }
614 if (u32 == KICONV_JA_REPLACE)
615 rv++;
616 KICONV_JA_PUTU(u32);
617 } else { /* 3rd byte check failed */
618 if (flag & KICONV_REPLACE_INVALID) {
619 KICONV_JA_PUTU(
620 KICONV_JA_REPLACE);
621 rv++;
622 } else {
623 KICONV_JA_RETERROR(EILSEQ)
624 }
625 }
626 } else { /* 2nd byte check failed */
627 if (flag & KICONV_REPLACE_INVALID) {
628 KICONV_JA_PUTU(KICONV_JA_REPLACE);
629 rv++;
630 } else {
631 KICONV_JA_RETERROR(EILSEQ)
632 }
633 }
634 } else if (KICONV_JA_ISC1CTRLEUC(ic1)) {
635 /* C1 control; 1 byte */
636 u32 = ic1;
637 KICONV_JA_PUTU(u32);
638 } else { /* 1st byte check failed */
639 if (flag & KICONV_REPLACE_INVALID) {
640 KICONV_JA_PUTU(KICONV_JA_REPLACE);
641 rv++;
642 } else {
643 KICONV_JA_RETERROR(EILSEQ)
644 }
645 }
646
647 next:
648 /*
649 * One character successfully converted so update
650 * values outside of this function's stack.
651 */
652 *inbytesleft = ileft;
653 *outbytesleft = oleft;
654 }
655
656 ret:
657 return (rv);
658 }
659
660 static size_t
_do_kiconvstr_to_eucjp(char * inbuf,size_t * inbytesleft,char * outbuf,size_t * outbytesleft,int flag,int * errno,uint8_t id)661 _do_kiconvstr_to_eucjp(char *inbuf, size_t *inbytesleft, char *outbuf,
662 size_t *outbytesleft, int flag, int *errno, uint8_t id)
663 {
664 uchar_t ic;
665 size_t rv = 0;
666 uint_t ucs4;
667 ushort_t euc16;
668
669 uchar_t *ip;
670 size_t ileft;
671 char *op;
672 size_t oleft;
673 size_t read_len;
674
675 boolean_t do_not_ignore_null;
676
677 if ((inbuf == NULL) || (*inbuf == '\0')) {
678 return (0);
679 }
680
681 ip = (uchar_t *)inbuf;
682 ileft = *inbytesleft;
683 op = outbuf;
684 oleft = *outbytesleft;
685
686 KICONV_JA_CHECK_UTF8_BOM_WITHOUT_STATE(ip, ileft);
687
688 do_not_ignore_null = ((flag & KICONV_IGNORE_NULL) == 0);
689
690 while (ileft != 0) {
691 KICONV_JA_GETU(&ucs4, flag);
692
693 if (ucs4 == 0x0 && do_not_ignore_null) {
694 return (0);
695 }
696
697 if (ucs4 > 0xffff) {
698 /* non-BMP */
699 KICONV_JA_NPUT(KICONV_JA_DEF_SINGLE);
700 rv++;
701 goto next;
702 }
703
704 KICONV_JA_CNV_U2_TO_EUCJPMS(id, euc16, ucs4);
705 if (euc16 == KICONV_JA_NODEST) {
706 euc16 = kiconv_ja_ucs2_to_euc16((ushort_t)ucs4);
707 }
708 if (euc16 == KICONV_JA_NODEST) {
709 KICONV_JA_NPUT(KICONV_JA_DEF_SINGLE);
710 rv++;
711 goto next;
712 }
713
714 switch (euc16 & 0x8080) {
715 case 0x0000: /* CS0 */
716 ic = (uchar_t)euc16;
717 KICONV_JA_NPUT(ic);
718 break;
719 case 0x8080: /* CS1 */
720 ic = (uchar_t)((euc16 >> 8) & 0xff);
721 KICONV_JA_NPUT(ic);
722 ic = (uchar_t)(euc16 & 0xff);
723 KICONV_JA_NPUT(ic);
724 break;
725 case 0x0080: /* CS2 */
726 KICONV_JA_NPUT(SS2);
727 ic = (uchar_t)euc16;
728 KICONV_JA_NPUT(ic);
729 break;
730 case 0x8000: /* CS3 */
731 KICONV_JA_NPUT(SS3);
732 ic = (uchar_t)((euc16 >> 8) & 0xff);
733 KICONV_JA_NPUT(ic);
734 ic = (uchar_t)(euc16 & KICONV_JA_CMASK);
735 KICONV_JA_NPUT(ic | KICONV_JA_CMSB);
736 break;
737 }
738 next:
739 /*
740 * One character successfully converted so update
741 * values outside of this function's stack.
742 */
743 *inbytesleft = ileft;
744 *outbytesleft = oleft;
745 }
746
747 ret:
748 return (rv);
749 }
750
751 static size_t
kiconv_fr_eucjp(void * kcd,char ** inbuf,size_t * inbytesleft,char ** outbuf,size_t * outbytesleft,int * errno)752 kiconv_fr_eucjp(void *kcd, char **inbuf, size_t *inbytesleft,
753 char **outbuf, size_t *outbytesleft, int *errno)
754 {
755 if (! kcd || kcd == (void *)-1) {
756 *errno = EBADF;
757 return ((size_t)-1);
758 }
759
760 return (_do_kiconv_fr_eucjp(kcd, inbuf, inbytesleft,
761 outbuf, outbytesleft, errno));
762 }
763
764 static size_t
kiconv_to_eucjp(void * kcd,char ** inbuf,size_t * inbytesleft,char ** outbuf,size_t * outbytesleft,int * errno)765 kiconv_to_eucjp(void *kcd, char **inbuf, size_t *inbytesleft,
766 char **outbuf, size_t *outbytesleft, int *errno)
767 {
768 if (! kcd || kcd == (void *)-1) {
769 *errno = EBADF;
770 return ((size_t)-1);
771 }
772
773 return (_do_kiconv_to_eucjp(kcd, inbuf, inbytesleft,
774 outbuf, outbytesleft, errno));
775 }
776
777 static size_t
kiconvstr_fr_eucjp(char * inbuf,size_t * inbytesleft,char * outbuf,size_t * outbytesleft,int flag,int * errno)778 kiconvstr_fr_eucjp(char *inbuf, size_t *inbytesleft, char *outbuf,
779 size_t *outbytesleft, int flag, int *errno)
780 {
781 return (_do_kiconvstr_fr_eucjp(inbuf, inbytesleft, outbuf,
782 outbytesleft, flag, errno, KICONV_JA_TBLID_EUCJP));
783 }
784
785 static size_t
kiconvstr_to_eucjp(char * inbuf,size_t * inbytesleft,char * outbuf,size_t * outbytesleft,int flag,int * errno)786 kiconvstr_to_eucjp(char *inbuf, size_t *inbytesleft, char *outbuf,
787 size_t *outbytesleft, int flag, int *errno)
788 {
789 return (_do_kiconvstr_to_eucjp(inbuf, inbytesleft, outbuf,
790 outbytesleft, flag, errno, KICONV_JA_TBLID_EUCJP));
791 }
792
793 static size_t
kiconvstr_fr_eucjpms(char * inbuf,size_t * inbytesleft,char * outbuf,size_t * outbytesleft,int flag,int * errno)794 kiconvstr_fr_eucjpms(char *inbuf, size_t *inbytesleft, char *outbuf,
795 size_t *outbytesleft, int flag, int *errno)
796 {
797 return (_do_kiconvstr_fr_eucjp(inbuf, inbytesleft, outbuf,
798 outbytesleft, flag, errno, KICONV_JA_TBLID_EUCJP_MS));
799 }
800
801 static size_t
kiconvstr_to_eucjpms(char * inbuf,size_t * inbytesleft,char * outbuf,size_t * outbytesleft,int flag,int * errno)802 kiconvstr_to_eucjpms(char *inbuf, size_t *inbytesleft, char *outbuf,
803 size_t *outbytesleft, int flag, int *errno)
804 {
805 return (_do_kiconvstr_to_eucjp(inbuf, inbytesleft, outbuf,
806 outbytesleft, flag, errno, KICONV_JA_TBLID_EUCJP_MS));
807 }
808
809 static size_t
_do_kiconv_fr_sjis(void * kcd,char ** inbuf,size_t * inbytesleft,char ** outbuf,size_t * outbytesleft,int * errno)810 _do_kiconv_fr_sjis(void *kcd, char **inbuf, size_t *inbytesleft,
811 char **outbuf, size_t *outbytesleft, int *errno)
812 {
813 uint_t uni; /* UTF-32 */
814 uint_t index; /* index for table lookup */
815 uchar_t ic1, ic2; /* 1st and 2nd bytes of a char */
816 size_t rv = 0; /* return value of this function */
817
818 uchar_t *ip;
819 size_t ileft;
820 char *op;
821 size_t oleft;
822 size_t id = ((kiconv_state_t)kcd)->id;
823
824 if ((inbuf == NULL) || (*inbuf == NULL)) {
825 return (0);
826 }
827
828 ip = (uchar_t *)*inbuf;
829 ileft = *inbytesleft;
830 op = *outbuf;
831 oleft = *outbytesleft;
832
833 while (ileft != 0) {
834 KICONV_JA_NGET(ic1); /* get 1st byte */
835
836 if (KICONV_JA_ISASC((int)ic1)) { /* ASCII; 1 byte */
837 uni = kiconv_ja_jisx0201roman_to_ucs2[ic1];
838 KICONV_JA_PUTU(uni);
839 } else if (KICONV_JA_ISSJKANA(ic1)) { /* 0201 Kana; 1byte */
840 uni = kiconv_ja_jisx0201kana_to_ucs2[(ic1 - 0xa1)];
841 KICONV_JA_PUTU(uni);
842 } else if (KICONV_JA_ISSJKANJI1(ic1)) { /* 0208/UDC; 2bytes */
843 KICONV_JA_NGET(ic2);
844 if (KICONV_JA_ISSJKANJI2(ic2)) {
845 ic1 = kiconv_ja_sjtojis1[(ic1 - 0x80)];
846 if (ic2 >= 0x9f) {
847 ic1++;
848 }
849 ic2 = kiconv_ja_sjtojis2[ic2];
850 KICONV_JA_CNV_JISMS_TO_U2(id, uni, ic1, ic2);
851 if (uni == KICONV_JA_NODEST) {
852 index = ((ic1 - 0x21) * 94)
853 + (ic2 - 0x21);
854 uni = kiconv_ja_jisx0208_to_ucs2[index];
855 }
856 if (uni == KICONV_JA_REPLACE)
857 rv++;
858 KICONV_JA_PUTU(uni);
859 } else { /* 2nd byte check failed */
860 KICONV_JA_RETERROR(EILSEQ)
861 /* NOTREACHED */
862 }
863 } else if (KICONV_JA_ISSJSUPKANJI1(ic1)) { /* VDC, 2 bytes */
864 KICONV_JA_NGET(ic2);
865 if (KICONV_JA_ISSJKANJI2(ic2)) {
866 ic1 = kiconv_ja_sjtojis1[(ic1 - 0x80)];
867 if (ic2 >= 0x9f) {
868 ic1++;
869 }
870 index = ((ic1 - 0x21) * 94)
871 + (kiconv_ja_sjtojis2[ic2] - 0x21);
872 uni = kiconv_ja_jisx0212_to_ucs2[index];
873 if (uni == KICONV_JA_REPLACE)
874 rv++;
875 KICONV_JA_PUTU(uni);
876 } else { /* 2nd byte check failed */
877 KICONV_JA_RETERROR(EILSEQ)
878 }
879 } else if (KICONV_JA_ISSJIBM(ic1) || /* Extended IBM area */
880 KICONV_JA_ISSJNECIBM(ic1)) { /* NEC/IBM area */
881 /*
882 * We need a special treatment for each codes.
883 * By adding some offset number for them, we
884 * can process them as the same way of that of
885 * extended IBM chars.
886 */
887 KICONV_JA_NGET(ic2);
888 if (KICONV_JA_ISSJKANJI2(ic2)) {
889 ushort_t dest, upper, lower;
890 dest = (ic1 << 8) + ic2;
891 if ((0xed40 <= dest) && (dest <= 0xeffc)) {
892 KICONV_JA_REMAP_NEC(dest);
893 if (dest == 0xffff) {
894 KICONV_JA_RETERROR(EILSEQ)
895 }
896 }
897 /*
898 * XXX: 0xfa54 and 0xfa5b must be mapped
899 * to JIS0208 area. Therefore we
900 * have to do special treatment.
901 */
902 if ((dest == 0xfa54) || (dest == 0xfa5b)) {
903 if (dest == 0xfa54) {
904 upper = 0x22;
905 lower = 0x4c;
906 } else {
907 upper = 0x22;
908 lower = 0x68;
909 }
910 KICONV_JA_CNV_JISMS_TO_U2(id, uni,
911 upper, lower);
912 if (uni == KICONV_JA_NODEST) {
913 index = (uint_t)((upper - 0x21)
914 * 94 + (lower - 0x21));
915 uni = kiconv_ja_jisx0208_to_ucs2
916 [index];
917 }
918 if (uni == KICONV_JA_REPLACE)
919 rv++;
920 KICONV_JA_PUTU(uni);
921 } else {
922 dest = dest - 0xfa40 -
923 (((dest>>8) - 0xfa) * 0x40);
924 dest = kiconv_ja_sjtoibmext[dest];
925 if (dest == 0xffff) {
926 KICONV_JA_RETERROR(EILSEQ)
927 }
928 upper = (dest >> 8) & KICONV_JA_CMASK;
929 lower = dest & KICONV_JA_CMASK;
930 KICONV_JA_CNV_JIS0212MS_TO_U2(id, uni,
931 upper, lower);
932 if (uni == KICONV_JA_NODEST) {
933 index = (uint_t)((upper - 0x21)
934 * 94 + (lower - 0x21));
935 uni = kiconv_ja_jisx0212_to_ucs2
936 [index];
937 }
938 if (uni == KICONV_JA_REPLACE)
939 rv++;
940 KICONV_JA_PUTU(uni);
941 }
942 } else { /* 2nd byte check failed */
943 KICONV_JA_RETERROR(EILSEQ)
944 }
945 } else if ((0xeb <= ic1) && (ic1 <= 0xec)) {
946 /*
947 * Based on the draft convention of OSF-JVC CDEWG,
948 * characters in this area will be mapped to
949 * "CHIKAN-MOJI." (convertible character)
950 * We use U+FFFD in this case.
951 */
952 KICONV_JA_NGET(ic2);
953 if (KICONV_JA_ISSJKANJI2(ic2)) {
954 uni = 0xfffd;
955 KICONV_JA_PUTU(uni);
956 } else { /* 2nd byte check failed */
957 KICONV_JA_RETERROR(EILSEQ)
958 }
959 } else { /* 1st byte check failed */
960 KICONV_JA_RETERROR(EILSEQ)
961 }
962
963 /*
964 * One character successfully converted so update
965 * values outside of this function's stack.
966 */
967 *inbuf = (char *)ip;
968 *inbytesleft = ileft;
969 *outbuf = op;
970 *outbytesleft = oleft;
971 }
972
973 ret:
974 return (rv);
975 }
976
977 /*
978 * _kiconv_ja_lookuptbl()
979 * Return the index number if its index-ed number
980 * is the same as dest value.
981 */
982 static ushort_t
_kiconv_ja_lookuptbl(ushort_t dest)983 _kiconv_ja_lookuptbl(ushort_t dest)
984 {
985 ushort_t tmp;
986 int i;
987 int sz = (sizeof (kiconv_ja_sjtoibmext) /
988 sizeof (kiconv_ja_sjtoibmext[0]));
989
990 for (i = 0; i < sz; i++) {
991 tmp = (kiconv_ja_sjtoibmext[i] & 0x7f7f);
992 if (tmp == dest)
993 return ((i + 0xfa40 + ((i / 0xc0) * 0x40)));
994 }
995 return (0x3f);
996 }
997
998 static size_t
_do_kiconv_to_sjis(void * kcd,char ** inbuf,size_t * inbytesleft,char ** outbuf,size_t * outbytesleft,int * errno)999 _do_kiconv_to_sjis(void *kcd, char **inbuf, size_t *inbytesleft,
1000 char **outbuf, size_t *outbytesleft, int *errno)
1001 {
1002 uchar_t ic;
1003 size_t rv = 0;
1004 uint_t ucs4;
1005 ushort_t euc16;
1006 ushort_t dest;
1007
1008 uchar_t *ip;
1009 size_t ileft;
1010 char *op;
1011 size_t oleft;
1012 size_t read_len;
1013
1014 size_t id = ((kiconv_state_t)kcd)->id;
1015
1016 if ((inbuf == NULL) || (*inbuf == NULL)) {
1017 return (0);
1018 }
1019
1020 ip = (uchar_t *)*inbuf;
1021 ileft = *inbytesleft;
1022 op = *outbuf;
1023 oleft = *outbytesleft;
1024
1025 KICONV_JA_CHECK_UTF8_BOM(ip, ileft);
1026
1027 while (ileft != 0) {
1028 KICONV_JA_GETU(&ucs4, 0);
1029
1030 if (ucs4 > 0xffff) {
1031 /* non-BMP */
1032 KICONV_JA_NPUT(KICONV_JA_DEF_SINGLE);
1033 rv++;
1034 goto next;
1035 }
1036
1037 KICONV_JA_CNV_U2_TO_EUCJPMS(id, euc16, ucs4);
1038 if (euc16 == KICONV_JA_NODEST) {
1039 euc16 = kiconv_ja_ucs2_to_euc16((ushort_t)ucs4);
1040 }
1041 if (euc16 == KICONV_JA_NODEST) {
1042 KICONV_JA_NPUT(KICONV_JA_DEF_SINGLE);
1043 rv++;
1044 goto next;
1045 }
1046
1047 switch (euc16 & 0x8080) {
1048 case 0x0000: /* CS0 */
1049 if (KICONV_JA_ISC1CTRL((uchar_t)euc16)) {
1050 KICONV_JA_NPUT(KICONV_JA_DEF_SINGLE);
1051 rv++;
1052 } else {
1053 ic = (uchar_t)euc16;
1054 KICONV_JA_NPUT(ic);
1055 }
1056 break;
1057 case 0x8080: /* CS1 */
1058 ic = (ushort_t)((euc16 >> 8) & KICONV_JA_CMASK);
1059 KICONV_JA_NPUT(kiconv_ja_jis208tosj1[ic]);
1060 /*
1061 * for even number row (Ku), add 0x80 to
1062 * look latter half of kiconv_ja_jistosj2[] array
1063 */
1064 ic = (uchar_t)((euc16 & KICONV_JA_CMASK)
1065 + (((ic % 2) == 0) ? 0x80 : 0x00));
1066 KICONV_JA_NPUT(kiconv_ja_jistosj2[ic]);
1067 break;
1068 case 0x0080: /* CS2 */
1069 ic = (uchar_t)euc16;
1070 KICONV_JA_NPUT(ic);
1071 break;
1072 case 0x8000: /* CS3 */
1073 ic = (ushort_t)((euc16 >> 8) & KICONV_JA_CMASK);
1074 if (euc16 == 0xa271) {
1075 /* NUMERO SIGN */
1076 KICONV_JA_NPUT(0x87);
1077 KICONV_JA_NPUT(0x82);
1078 } else if (ic < 0x75) { /* check if IBM VDC */
1079 dest = _kiconv_ja_lookuptbl(euc16 & 0x7f7f);
1080 if (dest == 0xffff) {
1081 KICONV_JA_NPUT(KICONV_JA_DEF_SINGLE);
1082 } else {
1083 /* avoid putting NUL ('\0') */
1084 if (dest > 0xff) {
1085 KICONV_JA_NPUT(
1086 (dest >> 8) & 0xff);
1087 KICONV_JA_NPUT(dest & 0xff);
1088 } else {
1089 KICONV_JA_NPUT(dest & 0xff);
1090 }
1091 }
1092 } else {
1093 KICONV_JA_NPUT(kiconv_ja_jis212tosj1[ic]);
1094 /*
1095 * for even number row (Ku), add 0x80 to
1096 * look latter half of kiconv_ja_jistosj2[]
1097 */
1098 ic = (ushort_t)((euc16 & KICONV_JA_CMASK)
1099 + (((ic % 2) == 0) ? 0x80 : 0x00));
1100 KICONV_JA_NPUT(kiconv_ja_jistosj2[ic]);
1101 }
1102 break;
1103 }
1104
1105 next:
1106 /*
1107 * One character successfully converted so update
1108 * values outside of this function's stack.
1109 */
1110 *inbuf = (char *)ip;
1111 *inbytesleft = ileft;
1112 *outbuf = op;
1113 *outbytesleft = oleft;
1114 }
1115
1116 ret:
1117 return (rv);
1118 }
1119
1120 static size_t
_do_kiconvstr_fr_sjis(char * inbuf,size_t * inbytesleft,char * outbuf,size_t * outbytesleft,int flag,int * errno,uint8_t id)1121 _do_kiconvstr_fr_sjis(char *inbuf, size_t *inbytesleft, char *outbuf,
1122 size_t *outbytesleft, int flag, int *errno, uint8_t id)
1123 {
1124 uint_t uni; /* UTF-32 */
1125 uint_t index; /* index for table lookup */
1126 uchar_t ic1, ic2; /* 1st and 2nd bytes of a char */
1127 size_t rv = 0; /* return value of this function */
1128
1129 uchar_t *ip;
1130 size_t ileft;
1131 char *op;
1132 size_t oleft;
1133
1134 boolean_t do_not_ignore_null;
1135
1136 if ((inbuf == NULL) || (*inbuf == '\0')) {
1137 return (0);
1138 }
1139
1140 ip = (uchar_t *)inbuf;
1141 ileft = *inbytesleft;
1142 op = outbuf;
1143 oleft = *outbytesleft;
1144
1145 do_not_ignore_null = ((flag & KICONV_IGNORE_NULL) == 0);
1146
1147 while (ileft != 0) {
1148 KICONV_JA_NGET(ic1); /* get 1st byte */
1149
1150 if (KICONV_JA_ISASC((int)ic1)) { /* ASCII; 1 byte */
1151 if (ic1 == '\0' && do_not_ignore_null) {
1152 return (0);
1153 }
1154 uni = kiconv_ja_jisx0201roman_to_ucs2[ic1];
1155 KICONV_JA_PUTU(uni);
1156 } else if (KICONV_JA_ISSJKANA(ic1)) {
1157 /* JIS X 0201 Kana; 1 byte */
1158 uni = kiconv_ja_jisx0201kana_to_ucs2[(ic1 - 0xa1)];
1159 KICONV_JA_PUTU(uni);
1160 } else if (KICONV_JA_ISSJKANJI1(ic1)) {
1161 /* JIS X 0208 or UDC; 2 bytes */
1162 if (flag & KICONV_REPLACE_INVALID) {
1163 KICONV_JA_NGET_REP_FR_MB(ic2);
1164 } else {
1165 KICONV_JA_NGET(ic2);
1166 }
1167 if (KICONV_JA_ISSJKANJI2(ic2)) {
1168 ic1 = kiconv_ja_sjtojis1[(ic1 - 0x80)];
1169 if (ic2 >= 0x9f) {
1170 ic1++;
1171 }
1172 ic2 = kiconv_ja_sjtojis2[ic2];
1173 KICONV_JA_CNV_JISMS_TO_U2(id, uni, ic1, ic2);
1174 if (uni == KICONV_JA_NODEST) {
1175 index = ((ic1 - 0x21) * 94)
1176 + (ic2 - 0x21);
1177 uni = kiconv_ja_jisx0208_to_ucs2[index];
1178 }
1179 if (uni == KICONV_JA_REPLACE)
1180 rv++;
1181 KICONV_JA_PUTU(uni);
1182 } else { /* 2nd byte check failed */
1183 if (flag & KICONV_REPLACE_INVALID) {
1184 KICONV_JA_PUTU(KICONV_JA_REPLACE);
1185 rv++;
1186 } else {
1187 KICONV_JA_RETERROR(EILSEQ)
1188 }
1189 /* NOTREACHED */
1190 }
1191 } else if (KICONV_JA_ISSJSUPKANJI1(ic1)) { /* VDC, 2 bytes */
1192 if (flag & KICONV_REPLACE_INVALID) {
1193 KICONV_JA_NGET_REP_FR_MB(ic2);
1194 } else {
1195 KICONV_JA_NGET(ic2);
1196 }
1197 if (KICONV_JA_ISSJKANJI2(ic2)) {
1198 ic1 = kiconv_ja_sjtojis1[(ic1 - 0x80)];
1199 if (ic2 >= 0x9f) {
1200 ic1++;
1201 }
1202 index = ((ic1 - 0x21) * 94)
1203 + (kiconv_ja_sjtojis2[ic2] - 0x21);
1204 uni = kiconv_ja_jisx0212_to_ucs2[index];
1205 if (uni == KICONV_JA_REPLACE)
1206 rv++;
1207 KICONV_JA_PUTU(uni);
1208 } else { /* 2nd byte check failed */
1209 if (flag & KICONV_REPLACE_INVALID) {
1210 KICONV_JA_PUTU(KICONV_JA_REPLACE);
1211 rv++;
1212 } else {
1213 KICONV_JA_RETERROR(EILSEQ)
1214 }
1215 }
1216 } else if (KICONV_JA_ISSJIBM(ic1) || /* Extended IBM area */
1217 KICONV_JA_ISSJNECIBM(ic1)) { /* NEC/IBM area */
1218 /*
1219 * We need a special treatment for each codes.
1220 * By adding some offset number for them, we
1221 * can process them as the same way of that of
1222 * extended IBM chars.
1223 */
1224 if (flag & KICONV_REPLACE_INVALID) {
1225 KICONV_JA_NGET_REP_FR_MB(ic2);
1226 } else {
1227 KICONV_JA_NGET(ic2);
1228 }
1229 if (KICONV_JA_ISSJKANJI2(ic2)) {
1230 ushort_t dest, upper, lower;
1231 dest = (ic1 << 8) + ic2;
1232 if ((0xed40 <= dest) && (dest <= 0xeffc)) {
1233 KICONV_JA_REMAP_NEC(dest);
1234 if (dest == 0xffff) {
1235 if (flag &
1236 KICONV_REPLACE_INVALID) {
1237 KICONV_JA_PUTU(
1238 KICONV_JA_REPLACE);
1239 rv++;
1240 } else {
1241 KICONV_JA_RETERROR(
1242 EILSEQ)
1243 }
1244 }
1245 }
1246 /*
1247 * XXX: 0xfa54 and 0xfa5b must be mapped
1248 * to JIS0208 area. Therefore we
1249 * have to do special treatment.
1250 */
1251 if ((dest == 0xfa54) || (dest == 0xfa5b)) {
1252 if (dest == 0xfa54) {
1253 upper = 0x22;
1254 lower = 0x4c;
1255 } else {
1256 upper = 0x22;
1257 lower = 0x68;
1258 }
1259 KICONV_JA_CNV_JISMS_TO_U2(id, uni,
1260 upper, lower);
1261 if (uni == KICONV_JA_NODEST) {
1262 index = (uint_t)((upper - 0x21)
1263 * 94 + (lower - 0x21));
1264 uni = kiconv_ja_jisx0208_to_ucs2
1265 [index];
1266 }
1267 if (uni == KICONV_JA_REPLACE)
1268 rv++;
1269 KICONV_JA_PUTU(uni);
1270 } else {
1271 dest = dest - 0xfa40 -
1272 (((dest>>8) - 0xfa) * 0x40);
1273 dest = kiconv_ja_sjtoibmext[dest];
1274 if (dest == 0xffff) {
1275 if (flag &
1276 KICONV_REPLACE_INVALID) {
1277 KICONV_JA_PUTU(
1278 KICONV_JA_REPLACE);
1279 rv++;
1280 } else {
1281 KICONV_JA_RETERROR(
1282 EILSEQ)
1283 }
1284 }
1285 upper = (dest >> 8) & KICONV_JA_CMASK;
1286 lower = dest & KICONV_JA_CMASK;
1287 KICONV_JA_CNV_JIS0212MS_TO_U2(id, uni,
1288 upper, lower);
1289 if (uni == KICONV_JA_NODEST) {
1290 index = (uint_t)((upper - 0x21)
1291 * 94 + (lower - 0x21));
1292 uni = kiconv_ja_jisx0212_to_ucs2
1293 [index];
1294 }
1295 if (uni == KICONV_JA_REPLACE)
1296 rv++;
1297 KICONV_JA_PUTU(uni);
1298 }
1299 } else { /* 2nd byte check failed */
1300 if (flag & KICONV_REPLACE_INVALID) {
1301 KICONV_JA_PUTU(KICONV_JA_REPLACE);
1302 rv++;
1303 } else {
1304 KICONV_JA_RETERROR(EILSEQ)
1305 }
1306 }
1307 } else if ((0xeb <= ic1) && (ic1 <= 0xec)) {
1308 /*
1309 * Based on the draft convention of OSF-JVC CDEWG,
1310 * characters in this area will be mapped to
1311 * "CHIKAN-MOJI." (convertible character)
1312 * We use U+FFFD in this case.
1313 */
1314 if (flag & KICONV_REPLACE_INVALID) {
1315 KICONV_JA_NGET_REP_FR_MB(ic2);
1316 } else {
1317 KICONV_JA_NGET(ic2);
1318 }
1319 if (KICONV_JA_ISSJKANJI2(ic2)) {
1320 uni = 0xfffd;
1321 KICONV_JA_PUTU(uni);
1322 } else { /* 2nd byte check failed */
1323 if (flag & KICONV_REPLACE_INVALID) {
1324 KICONV_JA_PUTU(KICONV_JA_REPLACE);
1325 rv++;
1326 } else {
1327 KICONV_JA_RETERROR(EILSEQ)
1328 }
1329 }
1330 } else { /* 1st byte check failed */
1331 if (flag & KICONV_REPLACE_INVALID) {
1332 KICONV_JA_PUTU(KICONV_JA_REPLACE);
1333 rv++;
1334 } else {
1335 KICONV_JA_RETERROR(EILSEQ)
1336 }
1337 }
1338
1339 next:
1340 /*
1341 * One character successfully converted so update
1342 * values outside of this function's stack.
1343 */
1344 *inbytesleft = ileft;
1345 *outbytesleft = oleft;
1346 }
1347
1348 ret:
1349 return (rv);
1350 }
1351
1352 static size_t
_do_kiconvstr_to_sjis(char * inbuf,size_t * inbytesleft,char * outbuf,size_t * outbytesleft,int flag,int * errno,uint8_t id)1353 _do_kiconvstr_to_sjis(char *inbuf, size_t *inbytesleft, char *outbuf,
1354 size_t *outbytesleft, int flag, int *errno, uint8_t id)
1355 {
1356 uchar_t ic;
1357 size_t rv = 0;
1358 uint_t ucs4;
1359 ushort_t euc16;
1360 ushort_t dest;
1361
1362 uchar_t *ip;
1363 size_t ileft;
1364 char *op;
1365 size_t oleft;
1366 size_t read_len;
1367
1368 boolean_t do_not_ignore_null;
1369
1370 if ((inbuf == NULL) || (*inbuf == '\0')) {
1371 return (0);
1372 }
1373
1374 ip = (uchar_t *)inbuf;
1375 ileft = *inbytesleft;
1376 op = outbuf;
1377 oleft = *outbytesleft;
1378
1379 KICONV_JA_CHECK_UTF8_BOM_WITHOUT_STATE(ip, ileft);
1380
1381 do_not_ignore_null = ((flag & KICONV_IGNORE_NULL) == 0);
1382
1383 while (ileft != 0) {
1384 KICONV_JA_GETU(&ucs4, flag);
1385
1386 if (ucs4 == 0x0 && do_not_ignore_null) {
1387 return (0);
1388 }
1389
1390 if (ucs4 > 0xffff) {
1391 /* non-BMP */
1392 KICONV_JA_NPUT(KICONV_JA_DEF_SINGLE);
1393 rv++;
1394 goto next;
1395 }
1396
1397 KICONV_JA_CNV_U2_TO_EUCJPMS(id, euc16, ucs4);
1398 if (euc16 == KICONV_JA_NODEST) {
1399 euc16 = kiconv_ja_ucs2_to_euc16((ushort_t)ucs4);
1400 }
1401 if (euc16 == KICONV_JA_NODEST) {
1402 KICONV_JA_NPUT(KICONV_JA_DEF_SINGLE);
1403 rv++;
1404 goto next;
1405 }
1406
1407 switch (euc16 & 0x8080) {
1408 case 0x0000: /* CS0 */
1409 if (KICONV_JA_ISC1CTRL((uchar_t)euc16)) {
1410 KICONV_JA_NPUT(KICONV_JA_DEF_SINGLE);
1411 rv++;
1412 } else {
1413 ic = (uchar_t)euc16;
1414 KICONV_JA_NPUT(ic);
1415 }
1416 break;
1417 case 0x8080: /* CS1 */
1418 ic = (ushort_t)((euc16 >> 8) & KICONV_JA_CMASK);
1419 KICONV_JA_NPUT(kiconv_ja_jis208tosj1[ic]);
1420 /*
1421 * for even number row (Ku), add 0x80 to
1422 * look latter half of kiconv_ja_jistosj2[] array
1423 */
1424 ic = (uchar_t)((euc16 & KICONV_JA_CMASK)
1425 + (((ic % 2) == 0) ? 0x80 : 0x00));
1426 KICONV_JA_NPUT(kiconv_ja_jistosj2[ic]);
1427 break;
1428 case 0x0080: /* CS2 */
1429 ic = (uchar_t)euc16;
1430 KICONV_JA_NPUT(ic);
1431 break;
1432 case 0x8000: /* CS3 */
1433 ic = (ushort_t)((euc16 >> 8) & KICONV_JA_CMASK);
1434 if (euc16 == 0xa271) {
1435 /* NUMERO SIGN */
1436 KICONV_JA_NPUT(0x87);
1437 KICONV_JA_NPUT(0x82);
1438 } else if (ic < 0x75) { /* check if IBM VDC */
1439 dest = _kiconv_ja_lookuptbl(euc16 & 0x7f7f);
1440 if (dest == 0xffff) {
1441 KICONV_JA_NPUT(KICONV_JA_DEF_SINGLE);
1442 } else {
1443 /* avoid putting NUL ('\0') */
1444 if (dest > 0xff) {
1445 KICONV_JA_NPUT(
1446 (dest >> 8) & 0xff);
1447 KICONV_JA_NPUT(dest & 0xff);
1448 } else {
1449 KICONV_JA_NPUT(dest & 0xff);
1450 }
1451 }
1452 } else {
1453 KICONV_JA_NPUT(kiconv_ja_jis212tosj1[ic]);
1454 /*
1455 * for even number row (Ku), add 0x80 to
1456 * look latter half of kiconv_ja_jistosj2[]
1457 */
1458 ic = (ushort_t)((euc16 & KICONV_JA_CMASK)
1459 + (((ic % 2) == 0) ? 0x80 : 0x00));
1460 KICONV_JA_NPUT(kiconv_ja_jistosj2[ic]);
1461 }
1462 break;
1463 }
1464
1465 next:
1466 /*
1467 * One character successfully converted so update
1468 * values outside of this function's stack.
1469 */
1470 *inbytesleft = ileft;
1471 *outbytesleft = oleft;
1472 }
1473
1474 ret:
1475 return (rv);
1476 }
1477
1478 static size_t
kiconv_fr_sjis(void * kcd,char ** inbuf,size_t * inbytesleft,char ** outbuf,size_t * outbytesleft,int * errno)1479 kiconv_fr_sjis(void *kcd, char **inbuf, size_t *inbytesleft,
1480 char **outbuf, size_t *outbytesleft, int *errno)
1481 {
1482 if (! kcd || kcd == (void *)-1) {
1483 *errno = EBADF;
1484 return ((size_t)-1);
1485 }
1486
1487 return (_do_kiconv_fr_sjis(kcd, inbuf, inbytesleft,
1488 outbuf, outbytesleft, errno));
1489 }
1490
1491 static size_t
kiconv_to_sjis(void * kcd,char ** inbuf,size_t * inbytesleft,char ** outbuf,size_t * outbytesleft,int * errno)1492 kiconv_to_sjis(void *kcd, char **inbuf, size_t *inbytesleft,
1493 char **outbuf, size_t *outbytesleft, int *errno)
1494 {
1495 if (! kcd || kcd == (void *)-1) {
1496 *errno = EBADF;
1497 return ((size_t)-1);
1498 }
1499
1500 return (_do_kiconv_to_sjis(kcd, inbuf, inbytesleft,
1501 outbuf, outbytesleft, errno));
1502 }
1503
1504 static size_t
kiconvstr_fr_sjis(char * inbuf,size_t * inbytesleft,char * outbuf,size_t * outbytesleft,int flag,int * errno)1505 kiconvstr_fr_sjis(char *inbuf, size_t *inbytesleft, char *outbuf,
1506 size_t *outbytesleft, int flag, int *errno)
1507 {
1508 return (_do_kiconvstr_fr_sjis(inbuf, inbytesleft, outbuf,
1509 outbytesleft, flag, errno, KICONV_JA_TBLID_SJIS));
1510 }
1511
1512 static size_t
kiconvstr_to_sjis(char * inbuf,size_t * inbytesleft,char * outbuf,size_t * outbytesleft,int flag,int * errno)1513 kiconvstr_to_sjis(char *inbuf, size_t *inbytesleft, char *outbuf,
1514 size_t *outbytesleft, int flag, int *errno)
1515 {
1516 return (_do_kiconvstr_to_sjis(inbuf, inbytesleft, outbuf,
1517 outbytesleft, flag, errno, KICONV_JA_TBLID_SJIS));
1518 }
1519
1520 static size_t
kiconvstr_fr_cp932(char * inbuf,size_t * inbytesleft,char * outbuf,size_t * outbytesleft,int flag,int * errno)1521 kiconvstr_fr_cp932(char *inbuf, size_t *inbytesleft, char *outbuf,
1522 size_t *outbytesleft, int flag, int *errno)
1523 {
1524 return (_do_kiconvstr_fr_sjis(inbuf, inbytesleft, outbuf,
1525 outbytesleft, flag, errno, KICONV_JA_TBLID_CP932));
1526 }
1527
1528 static size_t
kiconvstr_to_cp932(char * inbuf,size_t * inbytesleft,char * outbuf,size_t * outbytesleft,int flag,int * errno)1529 kiconvstr_to_cp932(char *inbuf, size_t *inbytesleft, char *outbuf,
1530 size_t *outbytesleft, int flag, int *errno)
1531 {
1532 return (_do_kiconvstr_to_sjis(inbuf, inbytesleft, outbuf,
1533 outbytesleft, flag, errno, KICONV_JA_TBLID_CP932));
1534 }
1535
1536 static kiconv_ops_t kiconv_ja_ops_tbl[] = {
1537 {
1538 "eucjp", "utf-8", open_eucjp,
1539 kiconv_to_eucjp, close_ja, kiconvstr_to_eucjp
1540 },
1541 {
1542 "utf-8", "eucjp", open_eucjp,
1543 kiconv_fr_eucjp, close_ja, kiconvstr_fr_eucjp
1544 },
1545 {
1546 "eucjpms", "utf-8", open_eucjpms,
1547 kiconv_to_eucjp, close_ja, kiconvstr_to_eucjpms
1548 },
1549 {
1550 "utf-8", "eucjpms", open_eucjpms,
1551 kiconv_fr_eucjp, close_ja, kiconvstr_fr_eucjpms
1552 },
1553 {
1554 "sjis", "utf-8", open_sjis,
1555 kiconv_to_sjis, close_ja, kiconvstr_to_sjis
1556 },
1557 {
1558 "utf-8", "sjis", open_sjis,
1559 kiconv_fr_sjis, close_ja, kiconvstr_fr_sjis
1560 },
1561 {
1562 "cp932", "utf-8", open_cp932,
1563 kiconv_to_sjis, close_ja, kiconvstr_to_cp932
1564 },
1565 {
1566 "utf-8", "cp932", open_cp932,
1567 kiconv_fr_sjis, close_ja, kiconvstr_fr_cp932
1568 }
1569 };
1570
1571 static char *kiconv_ja_aliases[] = {"932", "shiftjis", "pck"};
1572 static char *kiconv_ja_canonicals[] = {"cp932", "sjis", "sjis"};
1573
1574 #define KICONV_JA_MAX_JA_OPS \
1575 (sizeof (kiconv_ja_ops_tbl) / sizeof (kiconv_ops_t))
1576 #define KICONV_JA_MAX_JA_ALIAS \
1577 (sizeof (kiconv_ja_aliases) / sizeof (char *))
1578
1579 static kiconv_module_info_t kiconv_ja_info = {
1580 "kiconv_ja", /* module name */
1581 KICONV_JA_MAX_JA_OPS, /* number of conversion in kiconv_ja */
1582 kiconv_ja_ops_tbl, /* kiconv_ja ops table */
1583 KICONV_JA_MAX_JA_ALIAS, /* number of alias in kiconv_ja */
1584 kiconv_ja_aliases, /* kiconv_ja aliases */
1585 kiconv_ja_canonicals, /* kiconv_ja canonicals */
1586 0
1587 };
1588
1589 static struct modlkiconv modlkiconv_ja = {
1590 &mod_kiconvops,
1591 "kiconv module for Japanese",
1592 &kiconv_ja_info
1593 };
1594
1595 static struct modlinkage modlinkage = {
1596 MODREV_1,
1597 (void *)&modlkiconv_ja,
1598 NULL
1599 };
1600
1601 int
_init(void)1602 _init(void)
1603 {
1604 int err;
1605
1606 err = mod_install(&modlinkage);
1607 if (err)
1608 cmn_err(CE_WARN, "kiconv_ja: failed to load kernel module");
1609
1610 return (err);
1611 }
1612
1613 int
_info(struct modinfo * modinfop)1614 _info(struct modinfo *modinfop)
1615 {
1616 return (mod_info(&modlinkage, modinfop));
1617 }
1618
1619 int
_fini(void)1620 _fini(void)
1621 {
1622 int err;
1623
1624 /*
1625 * If this module is being used, then, we cannot remove the module.
1626 * The following checking will catch pretty much all usual cases.
1627 *
1628 * Any remaining will be catached by the kiconv_unregister_module()
1629 * during mod_remove() at below.
1630 */
1631 if (kiconv_module_ref_count(KICONV_MODULE_ID_JA))
1632 return (EBUSY);
1633
1634 err = mod_remove(&modlinkage);
1635 if (err)
1636 cmn_err(CE_WARN, "kiconv_ja: failed to remove kernel module");
1637
1638 return (err);
1639 }
1640