1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
24 */
25
26 #pragma ident "%Z%%M% %I% %E% SMI"
27
28 #include <sys/types.h>
29 #include <sys/param.h>
30 #include <sys/sysmacros.h>
31 #include <sys/systm.h>
32 #include <sys/debug.h>
33 #include <sys/kmem.h>
34 #include <sys/sunddi.h>
35 #include <sys/byteorder.h>
36 #include <sys/errno.h>
37 #include <sys/modctl.h>
38 #include <sys/kiconv.h>
39 #include <sys/u8_textprep.h>
40 #include <sys/kiconv_cck_common.h>
41 #include <sys/kiconv_sc.h>
42 #include <sys/kiconv_gb18030_utf8.h>
43 #include <sys/kiconv_gb2312_utf8.h>
44 #include <sys/kiconv_utf8_gb18030.h>
45 #include <sys/kiconv_utf8_gb2312.h>
46
47 static int8_t gb2312_to_utf8(uchar_t byte1, uchar_t byte2, uchar_t *ob,
48 uchar_t *obtail, size_t *ret_val);
49 static int8_t gbk_to_utf8(uint32_t gbk_val, uchar_t *ob, uchar_t *obtail,
50 size_t *ret_val, boolean_t isgbk4);
51 static int8_t utf8_to_gb2312(uint32_t utf8, uchar_t **inbuf, uchar_t *ibtail,
52 uchar_t *ob, uchar_t *obtail, size_t *ret);
53 static int8_t utf8_to_gbk(uint32_t utf8, uchar_t **inbuf, uchar_t *ibtail,
54 uchar_t *ob, uchar_t *obtail, size_t *ret);
55 static int8_t utf8_to_gb18030(uint32_t utf8, uchar_t **inbuf, uchar_t *ibtail,
56 uchar_t *ob, uchar_t *obtail, size_t *ret);
57
58 #define KICONV_SC_GB18030 (0x01)
59 #define KICONV_SC_GBK (0x02)
60 #define KICONV_SC_EUCCN (0x03)
61 #define KICONV_SC_MAX_MAGIC_ID (0x03)
62
63 static void *
open_fr_gb18030()64 open_fr_gb18030()
65 {
66 return ((void *)KICONV_SC_GB18030);
67 }
68
69 static void *
open_fr_gbk()70 open_fr_gbk()
71 {
72 return ((void *)KICONV_SC_GBK);
73 }
74
75 static void *
open_fr_euccn()76 open_fr_euccn()
77 {
78 return ((void *)KICONV_SC_EUCCN);
79 }
80
81 static int
close_fr_sc(void * s)82 close_fr_sc(void *s)
83 {
84 if ((uintptr_t)s > KICONV_SC_MAX_MAGIC_ID)
85 return (EBADF);
86
87 return (0);
88 }
89
90 /*
91 * Encoding convertor from UTF-8 to GB18030.
92 */
93 size_t
kiconv_to_gb18030(void * kcd,char ** inbuf,size_t * inbytesleft,char ** outbuf,size_t * outbytesleft,int * errno)94 kiconv_to_gb18030(void *kcd, char **inbuf, size_t *inbytesleft,
95 char **outbuf, size_t *outbytesleft, int *errno)
96 {
97
98 return kiconv_utf8_to_cck(kcd, inbuf, inbytesleft, outbuf,
99 outbytesleft, errno, utf8_to_gb18030);
100 }
101
102 /*
103 * String based encoding convertor from UTF-8 to GB18030.
104 */
105 size_t
kiconvstr_to_gb18030(char * inarray,size_t * inlen,char * outarray,size_t * outlen,int flag,int * errno)106 kiconvstr_to_gb18030(char *inarray, size_t *inlen, char *outarray,
107 size_t *outlen, int flag, int *errno)
108 {
109 return kiconvstr_utf8_to_cck((uchar_t *)inarray, inlen,
110 (uchar_t *)outarray, outlen, flag, errno, utf8_to_gb18030);
111 }
112
113 /*
114 * Encoding convertor from GB18030 to UTF-8.
115 */
116 size_t
kiconv_fr_gb18030(void * kcd,char ** inbuf,size_t * inbytesleft,char ** outbuf,size_t * outbytesleft,int * errno)117 kiconv_fr_gb18030(void *kcd, char **inbuf, size_t *inbytesleft,
118 char **outbuf, size_t *outbytesleft, int *errno)
119 {
120 uchar_t *ib;
121 uchar_t *ob;
122 uchar_t *ibtail;
123 uchar_t *obtail;
124 size_t ret_val;
125 int8_t sz;
126 uint32_t gb_val;
127 boolean_t isgbk4;
128
129 /* Check on the kiconv code conversion descriptor. */
130 if (kcd == NULL || kcd == (void *)-1) {
131 *errno = EBADF;
132 return ((size_t)-1);
133 }
134
135 /* If this is a state reset request, process and return. */
136 if (inbuf == NULL || *inbuf == NULL) {
137 return (0);
138 }
139
140 ret_val = 0;
141 ib = (uchar_t *)*inbuf;
142 ob = (uchar_t *)*outbuf;
143 ibtail = ib + *inbytesleft;
144 obtail = ob + *outbytesleft;
145
146 while (ib < ibtail) {
147 if (KICONV_IS_ASCII(*ib)) {
148 if (ob >= obtail) {
149 KICONV_SET_ERRNO_AND_BREAK(E2BIG);
150 }
151
152 *ob++ = *ib++;
153 continue;
154 }
155
156 /*
157 * Issue EILSEQ error if the first byte is not a
158 * valid GB18030 leading byte.
159 */
160 if (! KICONV_SC_IS_GBK_1st_BYTE(*ib)) {
161 KICONV_SET_ERRNO_AND_BREAK(EILSEQ);
162 }
163
164 isgbk4 = (ibtail - ib < 2) ? B_FALSE :
165 KICONV_SC_IS_GB18030_2nd_BYTE(*(ib + 1));
166
167 if (isgbk4) {
168 if (ibtail - ib < 4) {
169 KICONV_SET_ERRNO_AND_BREAK(EINVAL);
170 }
171
172 if (! (KICONV_SC_IS_GB18030_2nd_BYTE(*(ib + 1)) &&
173 KICONV_SC_IS_GB18030_3rd_BYTE(*(ib + 2)) &&
174 KICONV_SC_IS_GB18030_4th_BYTE(*(ib + 3)))) {
175 KICONV_SET_ERRNO_AND_BREAK(EILSEQ);
176 }
177
178 gb_val = (uint32_t)(*ib) << 24 |
179 (uint32_t)(*(ib + 1)) << 16 |
180 (uint32_t)(*(ib + 2)) << 8 | *(ib + 3);
181 } else {
182 if (ibtail - ib < 2) {
183 KICONV_SET_ERRNO_AND_BREAK(EINVAL);
184 }
185
186 if (! KICONV_SC_IS_GBK_2nd_BYTE(*(ib + 1))) {
187 KICONV_SET_ERRNO_AND_BREAK(EILSEQ);
188 }
189
190 gb_val = (uint32_t)(*ib) << 8 | *(ib + 1);
191 }
192
193 sz = gbk_to_utf8(gb_val, ob, obtail, &ret_val, isgbk4);
194 if (sz < 0) {
195 KICONV_SET_ERRNO_AND_BREAK(E2BIG);
196 }
197
198 ib += isgbk4 ? 4 : 2;
199 ob += sz;
200 }
201
202 *inbuf = (char *)ib;
203 *inbytesleft = ibtail - ib;
204 *outbuf = (char *)ob;
205 *outbytesleft = obtail - ob;
206
207 return (ret_val);
208 }
209
210 /*
211 * String based encoding convertor from GB18030 to UTF-8.
212 */
213 size_t
kiconvstr_fr_gb18030(char * inarray,size_t * inlen,char * outarray,size_t * outlen,int flag,int * errno)214 kiconvstr_fr_gb18030(char *inarray, size_t *inlen, char *outarray,
215 size_t *outlen, int flag, int *errno)
216 {
217 uchar_t *ib;
218 uchar_t *ob;
219 uchar_t *ibtail;
220 uchar_t *obtail;
221 uchar_t *oldib;
222 size_t ret_val;
223 int8_t sz;
224 uint32_t gb_val;
225 boolean_t isgbk4;
226 boolean_t do_not_ignore_null;
227
228 ret_val = 0;
229 ib = (uchar_t *)inarray;
230 ob = (uchar_t *)outarray;
231 ibtail = ib + *inlen;
232 obtail = ob + *outlen;
233 do_not_ignore_null = ((flag & KICONV_IGNORE_NULL) == 0);
234
235 while (ib < ibtail) {
236 if (*ib == '\0' && do_not_ignore_null)
237 break;
238
239 if (KICONV_IS_ASCII(*ib)) {
240 if (ob >= obtail) {
241 KICONV_SET_ERRNO_AND_BREAK(E2BIG);
242 }
243
244 *ob++ = *ib++;
245 continue;
246 }
247
248 oldib = ib;
249
250 if (! KICONV_SC_IS_GBK_1st_BYTE(*ib)) {
251 KICONV_SET_ERRNO_WITH_FLAG(1, EILSEQ);
252 }
253
254 isgbk4 = (ibtail - ib < 2) ? B_FALSE :
255 KICONV_SC_IS_GB18030_2nd_BYTE(*(ib + 1));
256
257 if (isgbk4) {
258 if (ibtail - ib < 4) {
259 if (flag & KICONV_REPLACE_INVALID) {
260 ib = ibtail;
261 goto REPLACE_INVALID;
262 }
263
264 KICONV_SET_ERRNO_AND_BREAK(EINVAL);
265 }
266
267 if (! (KICONV_SC_IS_GB18030_2nd_BYTE(*(ib + 1)) &&
268 KICONV_SC_IS_GB18030_3rd_BYTE(*(ib + 2)) &&
269 KICONV_SC_IS_GB18030_4th_BYTE(*(ib + 3)))) {
270 KICONV_SET_ERRNO_WITH_FLAG(4, EILSEQ);
271 }
272
273 gb_val = (uint32_t)(*ib) << 24 |
274 (uint32_t)(*(ib + 1)) << 16 |
275 (uint32_t)(*(ib + 2)) << 8 | *(ib + 3);
276 } else {
277 if (ibtail - ib < 2) {
278 if (flag & KICONV_REPLACE_INVALID) {
279 ib = ibtail;
280 goto REPLACE_INVALID;
281 }
282
283 KICONV_SET_ERRNO_AND_BREAK(EINVAL);
284 }
285
286 if (! KICONV_SC_IS_GBK_2nd_BYTE(*(ib + 1))) {
287 KICONV_SET_ERRNO_WITH_FLAG(2, EILSEQ);
288 }
289
290 gb_val = (uint32_t)(*ib) << 8 | *(ib + 1);
291 }
292
293 sz = gbk_to_utf8(gb_val, ob, obtail, &ret_val, isgbk4);
294 if (sz < 0) {
295 KICONV_SET_ERRNO_AND_BREAK(E2BIG);
296 }
297
298 ib += isgbk4 ? 4 : 2;
299 ob += sz;
300 continue;
301
302 REPLACE_INVALID:
303 if (obtail - ob < KICONV_UTF8_REPLACEMENT_CHAR_LEN) {
304 ib = oldib;
305 KICONV_SET_ERRNO_AND_BREAK(E2BIG);
306 }
307
308 *ob++ = KICONV_UTF8_REPLACEMENT_CHAR1;
309 *ob++ = KICONV_UTF8_REPLACEMENT_CHAR2;
310 *ob++ = KICONV_UTF8_REPLACEMENT_CHAR3;
311 ret_val++;
312 }
313
314 *inlen = ibtail - ib;
315 *outlen = obtail - ob;
316
317 return (ret_val);
318 }
319
320 /*
321 * Encoding convertor from UTF-8 to GBK.
322 */
323 size_t
kiconv_to_gbk(void * kcd,char ** inbuf,size_t * inbytesleft,char ** outbuf,size_t * outbytesleft,int * errno)324 kiconv_to_gbk(void *kcd, char **inbuf, size_t *inbytesleft,
325 char **outbuf, size_t *outbytesleft, int *errno)
326 {
327
328 return kiconv_utf8_to_cck(kcd, inbuf, inbytesleft, outbuf,
329 outbytesleft, errno, utf8_to_gbk);
330 }
331
332 /*
333 * String based encoding convertor from UTF-8 to GBK.
334 */
335 size_t
kiconvstr_to_gbk(char * inarray,size_t * inlen,char * outarray,size_t * outlen,int flag,int * errno)336 kiconvstr_to_gbk(char *inarray, size_t *inlen, char *outarray,
337 size_t *outlen, int flag, int *errno)
338 {
339 return kiconvstr_utf8_to_cck((uchar_t *)inarray, inlen,
340 (uchar_t *)outarray, outlen, flag, errno, utf8_to_gbk);
341 }
342
343 /*
344 * Encoding convertor from GBK to UTF-8.
345 */
346 size_t
kiconv_fr_gbk(void * kcd,char ** inbuf,size_t * inbytesleft,char ** outbuf,size_t * outbytesleft,int * errno)347 kiconv_fr_gbk(void *kcd, char **inbuf, size_t *inbytesleft,
348 char **outbuf, size_t *outbytesleft, int *errno)
349 {
350 uchar_t *ib;
351 uchar_t *ob;
352 uchar_t *ibtail;
353 uchar_t *obtail;
354 size_t ret_val;
355 int8_t sz;
356 uint32_t gb_val;
357
358 /* Check on the kiconv code conversion descriptor. */
359 if (kcd == NULL || kcd == (void *)-1) {
360 *errno = EBADF;
361 return ((size_t)-1);
362 }
363
364 /* If this is a state reset request, process and return. */
365 if (inbuf == NULL || *inbuf == NULL) {
366 return (0);
367 }
368
369 ret_val = 0;
370 ib = (uchar_t *)*inbuf;
371 ob = (uchar_t *)*outbuf;
372 ibtail = ib + *inbytesleft;
373 obtail = ob + *outbytesleft;
374
375 while (ib < ibtail) {
376 if (KICONV_IS_ASCII(*ib)) {
377 if (ob >= obtail) {
378 KICONV_SET_ERRNO_AND_BREAK(E2BIG);
379 }
380
381 *ob++ = *ib++;
382 continue;
383 }
384
385 /*
386 * Issue EILSEQ error if the first byte is not a
387 * valid GBK leading byte.
388 */
389 if (! KICONV_SC_IS_GBK_1st_BYTE(*ib)) {
390 KICONV_SET_ERRNO_AND_BREAK(EILSEQ);
391 }
392
393 /*
394 * Issue EINVAL error if input buffer has an incomplete
395 * character at the end of the buffer.
396 */
397 if (ibtail - ib < 2) {
398 KICONV_SET_ERRNO_AND_BREAK(EINVAL);
399 }
400
401 /*
402 * Issue EILSEQ error if the remaining byte is not
403 * a valid GBK byte.
404 */
405 if (! KICONV_SC_IS_GBK_2nd_BYTE(*(ib + 1))) {
406 KICONV_SET_ERRNO_AND_BREAK(EILSEQ);
407 }
408
409 /* Now we have a valid GBK character. */
410 gb_val = (uint32_t)(*ib) << 8 | *(ib + 1);
411 sz = gbk_to_utf8(gb_val, ob, obtail, &ret_val, B_FALSE);
412
413 if (sz < 0) {
414 KICONV_SET_ERRNO_AND_BREAK(E2BIG);
415 }
416
417 ib += 2;
418 ob += sz;
419 }
420
421 *inbuf = (char *)ib;
422 *inbytesleft = ibtail - ib;
423 *outbuf = (char *)ob;
424 *outbytesleft = obtail - ob;
425
426 return (ret_val);
427 }
428
429 /*
430 * String based encoding convertor from GBK to UTF-8.
431 */
432 size_t
kiconvstr_fr_gbk(char * inarray,size_t * inlen,char * outarray,size_t * outlen,int flag,int * errno)433 kiconvstr_fr_gbk(char *inarray, size_t *inlen, char *outarray,
434 size_t *outlen, int flag, int *errno)
435 {
436 uchar_t *ib;
437 uchar_t *ob;
438 uchar_t *ibtail;
439 uchar_t *obtail;
440 uchar_t *oldib;
441 size_t ret_val;
442 int8_t sz;
443 uint32_t gb_val;
444 boolean_t do_not_ignore_null;
445
446 ret_val = 0;
447 ib = (uchar_t *)inarray;
448 ob = (uchar_t *)outarray;
449 ibtail = ib + *inlen;
450 obtail = ob + *outlen;
451 do_not_ignore_null = ((flag & KICONV_IGNORE_NULL) == 0);
452
453 while (ib < ibtail) {
454 if (*ib == '\0' && do_not_ignore_null)
455 break;
456
457 if (KICONV_IS_ASCII(*ib)) {
458 if (ob >= obtail) {
459 KICONV_SET_ERRNO_AND_BREAK(E2BIG);
460 }
461
462 *ob++ = *ib++;
463 continue;
464 }
465
466 oldib = ib;
467
468 if (! KICONV_SC_IS_GBK_1st_BYTE(*ib)) {
469 KICONV_SET_ERRNO_WITH_FLAG(1, EILSEQ);
470 }
471
472 if (ibtail - ib < 2) {
473 KICONV_SET_ERRNO_WITH_FLAG(1, EINVAL);
474 }
475
476 if (! KICONV_SC_IS_GBK_2nd_BYTE(*(ib + 1))) {
477 KICONV_SET_ERRNO_WITH_FLAG(2, EILSEQ);
478 }
479
480 gb_val = (uint32_t)(*ib << 8) | *(ib + 1);
481 sz = gbk_to_utf8(gb_val, ob, obtail, &ret_val, B_FALSE);
482
483 if (sz < 0) {
484 KICONV_SET_ERRNO_AND_BREAK(E2BIG);
485 }
486
487 ib += 2;
488 ob += sz;
489 continue;
490
491 REPLACE_INVALID:
492 if (obtail - ob < KICONV_UTF8_REPLACEMENT_CHAR_LEN) {
493 ib = oldib;
494 KICONV_SET_ERRNO_AND_BREAK(E2BIG);
495 }
496
497 *ob++ = KICONV_UTF8_REPLACEMENT_CHAR1;
498 *ob++ = KICONV_UTF8_REPLACEMENT_CHAR2;
499 *ob++ = KICONV_UTF8_REPLACEMENT_CHAR3;
500 ret_val++;
501 }
502
503 *inlen = ibtail - ib;
504 *outlen = obtail - ob;
505
506 return (ret_val);
507 }
508
509 /*
510 * Encoding convertor from UTF-8 to EUC-CN.
511 */
512 size_t
kiconv_to_euccn(void * kcd,char ** inbuf,size_t * inbytesleft,char ** outbuf,size_t * outbytesleft,int * errno)513 kiconv_to_euccn(void *kcd, char **inbuf, size_t *inbytesleft,
514 char **outbuf, size_t *outbytesleft, int *errno)
515 {
516 return kiconv_utf8_to_cck(kcd, inbuf, inbytesleft, outbuf,
517 outbytesleft, errno, utf8_to_gb2312);
518 }
519
520 /*
521 * String based encoding convertor from UTF-8 to EUC-CN.
522 */
523 size_t
kiconvstr_to_euccn(char * inarray,size_t * inlen,char * outarray,size_t * outlen,int flag,int * errno)524 kiconvstr_to_euccn(char *inarray, size_t *inlen, char *outarray,
525 size_t *outlen, int flag, int *errno)
526 {
527 return kiconvstr_utf8_to_cck((uchar_t *)inarray, inlen,
528 (uchar_t *)outarray, outlen, flag, errno, utf8_to_gb2312);
529 }
530
531 /*
532 * Encoding converto from EUC-CN to UTF-8 code.
533 */
534 size_t
kiconv_fr_euccn(void * kcd,char ** inbuf,size_t * inbytesleft,char ** outbuf,size_t * outbytesleft,int * errno)535 kiconv_fr_euccn(void *kcd, char **inbuf, size_t *inbytesleft,
536 char **outbuf, size_t *outbytesleft, int *errno)
537 {
538 uchar_t *ib;
539 uchar_t *ob;
540 uchar_t *ibtail;
541 uchar_t *obtail;
542 size_t ret_val;
543 int8_t sz;
544
545 /* Check on the kiconv code conversion descriptor. */
546 if (kcd == NULL || kcd == (void *)-1) {
547 *errno = EBADF;
548 return ((size_t)-1);
549 }
550
551 /* If this is a state reset request, process and return. */
552 if (inbuf == NULL || *inbuf == NULL) {
553 return (0);
554 }
555
556 ret_val = 0;
557 ib = (uchar_t *)*inbuf;
558 ob = (uchar_t *)*outbuf;
559 ibtail = ib + *inbytesleft;
560 obtail = ob + *outbytesleft;
561
562 while (ib < ibtail) {
563 if (KICONV_IS_ASCII(*ib)) {
564 if (ob >= obtail) {
565 KICONV_SET_ERRNO_AND_BREAK(E2BIG);
566 }
567
568 *ob++ = *ib++;
569 continue;
570 }
571
572 /*
573 * Issue EILSEQ error if the first byte is not a
574 * valid GB2312 leading byte.
575 */
576 if (! KICONV_SC_IS_GB2312_BYTE(*ib)) {
577 KICONV_SET_ERRNO_AND_BREAK(EILSEQ);
578 }
579
580 /*
581 * Issue EINVAL error if input buffer has an incomplete
582 * character at the end of the buffer.
583 */
584 if (ibtail - ib < 2) {
585 KICONV_SET_ERRNO_AND_BREAK(EINVAL);
586 }
587
588 /*
589 * Issue EILSEQ error if the remaining byte is not
590 * a valid GB2312 byte.
591 */
592 if (! KICONV_SC_IS_GB2312_BYTE(*(ib + 1))) {
593 KICONV_SET_ERRNO_AND_BREAK(EILSEQ);
594 }
595
596 /* Now we have a valid GB2312 character */
597 sz = gb2312_to_utf8(*ib, *(ib + 1), ob, obtail, &ret_val);
598 if (sz < 0) {
599 KICONV_SET_ERRNO_AND_BREAK(E2BIG);
600 }
601
602 ib += 2;
603 ob += sz;
604 }
605
606 *inbuf = (char *)ib;
607 *inbytesleft = ibtail - ib;
608 *outbuf = (char *)ob;
609 *outbytesleft = obtail - ob;
610
611 return (ret_val);
612 }
613
614 /*
615 * String based encoding convertor from EUC-CN to UTF-8.
616 */
617 size_t
kiconvstr_fr_euccn(char * inarray,size_t * inlen,char * outarray,size_t * outlen,int flag,int * errno)618 kiconvstr_fr_euccn(char *inarray, size_t *inlen, char *outarray,
619 size_t *outlen, int flag, int *errno)
620 {
621 uchar_t *ib;
622 uchar_t *ob;
623 uchar_t *ibtail;
624 uchar_t *obtail;
625 uchar_t *oldib;
626 size_t ret_val;
627 int8_t sz;
628 boolean_t do_not_ignore_null;
629
630 ret_val = 0;
631 ib = (uchar_t *)inarray;
632 ob = (uchar_t *)outarray;
633 ibtail = ib + *inlen;
634 obtail = ob + *outlen;
635 do_not_ignore_null = ((flag & KICONV_IGNORE_NULL) == 0);
636
637 while (ib < ibtail) {
638 if (*ib == '\0' && do_not_ignore_null)
639 break;
640
641 if (KICONV_IS_ASCII(*ib)) {
642 if (ob >= obtail) {
643 KICONV_SET_ERRNO_AND_BREAK(E2BIG);
644 }
645
646 *ob++ = *ib++;
647 continue;
648 }
649
650 oldib = ib;
651
652 if (! KICONV_SC_IS_GB2312_BYTE(*ib)) {
653 KICONV_SET_ERRNO_WITH_FLAG(1, EILSEQ);
654 }
655
656 if (ibtail - ib < 2) {
657 KICONV_SET_ERRNO_WITH_FLAG(1, EINVAL);
658 }
659
660 if (! KICONV_SC_IS_GB2312_BYTE(*(ib + 1))) {
661 KICONV_SET_ERRNO_WITH_FLAG(2, EILSEQ);
662 }
663
664 sz = gb2312_to_utf8(*ib, *(ib + 1), ob, obtail, &ret_val);
665 if (sz < 0) {
666 KICONV_SET_ERRNO_AND_BREAK(E2BIG);
667 }
668
669 ib += 2;
670 ob += sz;
671 continue;
672
673 REPLACE_INVALID:
674 if (obtail - ob < KICONV_UTF8_REPLACEMENT_CHAR_LEN) {
675 ib = oldib;
676 KICONV_SET_ERRNO_AND_BREAK(E2BIG);
677 }
678
679 *ob++ = KICONV_UTF8_REPLACEMENT_CHAR1;
680 *ob++ = KICONV_UTF8_REPLACEMENT_CHAR2;
681 *ob++ = KICONV_UTF8_REPLACEMENT_CHAR3;
682 ret_val++;
683 }
684
685 *inlen = ibtail - ib;
686 *outlen = obtail - ob;
687
688 return (ret_val);
689 }
690
691 /*
692 * Convert single GB2312 character to UTF-8.
693 * Return: > 0 - Converted successfully
694 * = -1 - E2BIG
695 */
696 static int8_t
gb2312_to_utf8(uchar_t b1,uchar_t b2,uchar_t * ob,uchar_t * obtail,size_t * ret_val)697 gb2312_to_utf8(uchar_t b1, uchar_t b2, uchar_t *ob, uchar_t *obtail,
698 size_t *ret_val)
699 {
700 size_t index;
701 int8_t sz;
702 uchar_t *u8;
703
704 /* index = (b1 - KICONV_EUC_START) * 94 + b2 - KICONV_EUC_START; */
705 index = b1 * 94 + b2 - 0x3BBF;
706
707 if (index >= KICONV_GB2312_UTF8_MAX)
708 index = KICONV_GB2312_UTF8_MAX - 1; /* Map to 0xEFBFBD */
709
710 u8 = kiconv_gb2312_utf8[index];
711 sz = u8_number_of_bytes[u8[0]];
712
713 if (obtail - ob < sz) {
714 *ret_val = (size_t)-1;
715 return (-1);
716 }
717
718 for (index = 0; index < sz; index++)
719 *ob++ = u8[index];
720
721 /*
722 * As kiconv_gb2312_utf8 contain muliple KICONV_UTF8_REPLACEMENT_CHAR
723 * elements, so need to ckeck more.
724 */
725 if (sz == KICONV_UTF8_REPLACEMENT_CHAR_LEN &&
726 u8[0] == KICONV_UTF8_REPLACEMENT_CHAR1 &&
727 u8[1] == KICONV_UTF8_REPLACEMENT_CHAR2 &&
728 u8[2] == KICONV_UTF8_REPLACEMENT_CHAR3)
729 (*ret_val)++;
730
731 return (sz);
732 }
733
734 /*
735 * Convert single GB18030 or GBK character to UTF-8.
736 * Return: > 0 - Converted successfully
737 * = -1 - E2BIG
738 */
739 static int8_t
gbk_to_utf8(uint32_t gbk_val,uchar_t * ob,uchar_t * obtail,size_t * ret_val,boolean_t isgbk4)740 gbk_to_utf8(uint32_t gbk_val, uchar_t *ob, uchar_t *obtail, size_t *ret_val,
741 boolean_t isgbk4)
742 {
743 size_t index;
744 int8_t sz;
745 uchar_t u8array[4];
746 uchar_t *u8;
747
748 if (isgbk4) {
749 if (gbk_val >= KICONV_SC_PLANE1_GB18030_START) {
750 uint32_t u32;
751
752 /*
753 * u32 = ((gbk_val >> 24) - 0x90) * 12600 +
754 * (((gbk_val & 0xFF0000) >> 16) - 0x30) * 1260 +
755 * (((gbk_val & 0xFF00) >> 8) - 0x81) * 10 +
756 * (gbk_val & 0xFF - 0x30)+
757 * KICONV_SC_PLANE1_UCS4_START;
758 */
759 u32 = (gbk_val >> 24) * 12600 +
760 ((gbk_val & 0xFF0000) >> 16) * 1260 +
761 ((gbk_val & 0xFF00) >> 8) * 10 +
762 (gbk_val & 0xFF) - 0x1BA0FA;
763 u8array[0] = (uchar_t)(0xF0 | ((u32 & 0x1C0000) >> 18));
764 u8array[1] = (uchar_t)(0x80 | ((u32 & 0x03F000) >> 12));
765 u8array[2] = (uchar_t)(0x80 | ((u32 & 0x000FC0) >> 6));
766 u8array[3] = (uchar_t)(0x80 | (u32 & 0x00003F));
767 u8 = u8array;
768 index = 1;
769 } else {
770 index = kiconv_binsearch(gbk_val,
771 kiconv_gbk4_utf8, KICONV_GBK4_UTF8_MAX);
772 u8 = kiconv_gbk4_utf8[index].u8;
773 }
774 } else {
775 index = kiconv_binsearch(gbk_val,
776 kiconv_gbk_utf8, KICONV_GBK_UTF8_MAX);
777 u8 = kiconv_gbk_utf8[index].u8;
778 }
779
780 sz = u8_number_of_bytes[u8[0]];
781 if (obtail - ob < sz) {
782 *ret_val = (size_t)-1;
783 return (-1);
784 }
785
786 if (index == 0)
787 (*ret_val)++; /* Non-identical conversion */
788
789 for (index = 0; index < sz; index++)
790 *ob++ = u8[index];
791
792 return (sz);
793 }
794
795 /*
796 * Convert single UTF-8 character to GB18030.
797 * Return: > 0 - Converted successfully
798 * = -1 - E2BIG
799 */
800 /* ARGSUSED */
801 static int8_t
utf8_to_gb18030(uint32_t utf8,uchar_t ** inbuf,uchar_t * ibtail,uchar_t * ob,uchar_t * obtail,size_t * ret)802 utf8_to_gb18030(uint32_t utf8, uchar_t **inbuf, uchar_t *ibtail,
803 uchar_t *ob, uchar_t *obtail, size_t *ret)
804 {
805 size_t index;
806 int8_t gbklen;
807 uint32_t gbkcode;
808
809 if (utf8 >= KICONV_SC_PLANE1_UTF8_START) {
810 /* Four bytes GB18030 [0x90308130, 0xe339fe39] handling. */
811 uint32_t u32;
812
813 u32 = (((utf8 & 0x07000000) >> 6) | ((utf8 & 0x3F0000) >> 4) |
814 ((utf8 & 0x3F00) >> 2) | (utf8 & 0x3F)) -
815 KICONV_SC_PLANE1_UCS4_START;
816 gbkcode = ((u32 / 12600 + 0x90) << 24) |
817 (((u32 % 12600) / 1260 + 0x30) << 16) |
818 (((u32 % 1260) / 10 + 0x81) << 8) | (u32 % 10 + 0x30);
819 gbklen = 4;
820 index = 1;
821 } else {
822 index = kiconv_binsearch(utf8, kiconv_utf8_gb18030,
823 KICONV_UTF8_GB18030_MAX);
824 gbkcode = kiconv_utf8_gb18030[index].value;
825 KICONV_SC_GET_GB_LEN(gbkcode, gbklen);
826 }
827
828 if (obtail - ob < gbklen) {
829 *ret = (size_t)-1;
830 return (-1);
831 }
832
833 if (index == 0)
834 (*ret)++; /* Non-identical conversion */
835
836 if (gbklen == 2) {
837 *ob++ = (uchar_t)(gbkcode >> 8);
838 } else if (gbklen == 4) {
839 *ob++ = (uchar_t)(gbkcode >> 24);
840 *ob++ = (uchar_t)(gbkcode >> 16);
841 *ob++ = (uchar_t)(gbkcode >> 8);
842 }
843 *ob = (uchar_t)(gbkcode & 0xFF);
844
845 return (gbklen);
846 }
847
848 /*
849 * Convert single UTF-8 character to GBK.
850 * Return: > 0 - Converted successfully
851 * = -1 - E2BIG
852 */
853 /* ARGSUSED */
854 static int8_t
utf8_to_gbk(uint32_t utf8,uchar_t ** inbuf,uchar_t * ibtail,uchar_t * ob,uchar_t * obtail,size_t * ret)855 utf8_to_gbk(uint32_t utf8, uchar_t **inbuf, uchar_t *ibtail,
856 uchar_t *ob, uchar_t *obtail, size_t *ret)
857 {
858 size_t index;
859 int8_t gbklen;
860 uint32_t gbkcode;
861
862 index = kiconv_binsearch(utf8, kiconv_utf8_gb18030,
863 KICONV_UTF8_GB18030_MAX);
864 gbkcode = kiconv_utf8_gb18030[index].value;
865 KICONV_SC_GET_GB_LEN(gbkcode, gbklen);
866
867 /* GBK and GB18030 share the same table, so check the length. */
868 if (gbklen == 4) {
869 index = 0;
870 gbkcode = kiconv_utf8_gb18030[index].value;
871 gbklen = 1;
872 }
873
874 if (obtail - ob < gbklen) {
875 *ret = (size_t)-1;
876 return (-1);
877 }
878
879 if (index == 0)
880 (*ret)++; /* Non-identical conversion */
881
882 if (gbklen > 1)
883 *ob++ = (uchar_t)(gbkcode >> 8);
884 *ob = (uchar_t)(gbkcode & 0xFF);
885
886 return (gbklen);
887 }
888
889 /*
890 * Convert single UTF-8 character to GB2312.
891 * Return: > 0 - Converted successfully
892 * = -1 - E2BIG
893 */
894 /* ARGSUSED */
895 static int8_t
utf8_to_gb2312(uint32_t utf8,uchar_t ** inbuf,uchar_t * intail,uchar_t * ob,uchar_t * obtail,size_t * ret)896 utf8_to_gb2312(uint32_t utf8, uchar_t **inbuf, uchar_t *intail,
897 uchar_t *ob, uchar_t *obtail, size_t *ret)
898 {
899 size_t index;
900 int8_t gblen;
901 uint32_t gbcode;
902
903 index = kiconv_binsearch(utf8, kiconv_utf8_gb2312,
904 KICONV_UTF8_GB2312_MAX);
905 gbcode = kiconv_utf8_gb2312[index].value;
906 gblen = (gbcode <= 0xFF) ? 1 : 2;
907
908 if (obtail - ob < gblen) {
909 *ret = (size_t)-1;
910 return (-1);
911 }
912
913 if (index == 0)
914 (*ret)++;
915
916 if (gblen > 1)
917 *ob++ = (uchar_t)(gbcode >> 8);
918 *ob = (uchar_t)(gbcode & 0xFF);
919
920 return (gblen);
921 }
922
923 static kiconv_ops_t kiconv_sc_ops_tbl[] = {
924 {
925 "gb18030", "utf-8", kiconv_open_to_cck, kiconv_to_gb18030,
926 kiconv_close_to_cck, kiconvstr_to_gb18030
927 },
928 {
929 "utf-8", "gb18030", open_fr_gb18030, kiconv_fr_gb18030,
930 close_fr_sc, kiconvstr_fr_gb18030
931 },
932 {
933 "gbk", "utf-8", kiconv_open_to_cck, kiconv_to_gbk,
934 kiconv_close_to_cck, kiconvstr_to_gbk
935 },
936 {
937 "utf-8", "gbk", open_fr_gbk, kiconv_fr_gbk,
938 close_fr_sc, kiconvstr_fr_gbk
939 },
940 {
941 "euccn", "utf-8", kiconv_open_to_cck, kiconv_to_euccn,
942 kiconv_close_to_cck, kiconvstr_to_euccn
943 },
944 {
945 "utf-8", "euccn", open_fr_euccn, kiconv_fr_euccn,
946 close_fr_sc, kiconvstr_fr_euccn
947 },
948 };
949
950 static kiconv_module_info_t kiconv_sc_info = {
951 "kiconv_sc", /* module name */
952 sizeof (kiconv_sc_ops_tbl) / sizeof (kiconv_sc_ops_tbl[0]),
953 kiconv_sc_ops_tbl,
954 0,
955 NULL,
956 NULL,
957 0
958 };
959
960 static struct modlkiconv modlkiconv_sc = {
961 &mod_kiconvops,
962 "kiconv Simplified Chinese module 1.0",
963 &kiconv_sc_info
964 };
965
966 static struct modlinkage modlinkage = {
967 MODREV_1,
968 (void *)&modlkiconv_sc,
969 NULL
970 };
971
972 int
_init(void)973 _init(void)
974 {
975 int err;
976
977 err = mod_install(&modlinkage);
978 if (err)
979 cmn_err(CE_WARN, "kiconv_sc: failed to load kernel module");
980
981 return (err);
982 }
983
984 int
_fini(void)985 _fini(void)
986 {
987 int err;
988
989 /*
990 * If this module is being used, then, we cannot remove the module.
991 * The following checking will catch pretty much all usual cases.
992 *
993 * Any remaining will be catached by the kiconv_unregister_module()
994 * during mod_remove() at below.
995 */
996 if (kiconv_module_ref_count(KICONV_MODULE_ID_SC))
997 return (EBUSY);
998
999 err = mod_remove(&modlinkage);
1000 if (err)
1001 cmn_err(CE_WARN, "kiconv_sc: failed to remove kernel module");
1002
1003 return (err);
1004 }
1005
1006 int
_info(struct modinfo * modinfop)1007 _info(struct modinfo *modinfop)
1008 {
1009 return (mod_info(&modlinkage, modinfop));
1010 }
1011