1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
24 * Use is subject to license terms.
25 */
26
27 #include <sys/types.h>
28 #include <sys/param.h>
29 #include <sys/sysmacros.h>
30 #include <sys/systm.h>
31 #include <sys/debug.h>
32 #include <sys/kmem.h>
33 #include <sys/sunddi.h>
34 #include <sys/byteorder.h>
35 #include <sys/errno.h>
36 #include <sys/modctl.h>
37 #include <sys/u8_textprep.h>
38 #include <sys/kiconv.h>
39 #include <sys/kiconv_cck_common.h>
40 #include <sys/kiconv_ko.h>
41 #include <sys/kiconv_uhc_utf8.h>
42 #include <sys/kiconv_utf8_uhc.h>
43 #include <sys/kiconv_euckr_utf8.h>
44 #include <sys/kiconv_utf8_euckr.h>
45
46 static int8_t utf8_to_euckr(uint32_t utf8, uchar_t **inbuf, uchar_t *ibtail,
47 uchar_t *ob, uchar_t *obtail, size_t *ret_val);
48 static int8_t utf8_to_uhc(uint32_t utf8, uchar_t **inbuf, uchar_t *ibtail,
49 uchar_t *ob, uchar_t *obtail, size_t *ret_val);
50 static int8_t ko_to_utf8(uint32_t ko_val, uchar_t *ob, uchar_t *obtail,
51 size_t *ret_val, kiconv_table_array_t *table, size_t nitems);
52
53
54 #define KICONV_KO_EUCKR (0x01)
55 #define KICONV_KO_UHC (0x02)
56 #define KICONV_KO_MAX_MAGIC_ID (0x02)
57
58 static void *
open_fr_euckr()59 open_fr_euckr()
60 {
61 return ((void *)KICONV_KO_EUCKR);
62 }
63
64 static void *
open_fr_uhc()65 open_fr_uhc()
66 {
67 return ((void *)KICONV_KO_UHC);
68 }
69
70 static int
close_fr_ko(void * s)71 close_fr_ko(void *s)
72 {
73 if ((uintptr_t)s > KICONV_KO_MAX_MAGIC_ID)
74 return (EBADF);
75
76 return (0);
77 }
78
79 /*
80 * Encoding convertor from EUC-KR to UTF-8.
81 */
82 static size_t
kiconv_fr_euckr(void * kcd,char ** inbuf,size_t * inbufleft,char ** outbuf,size_t * outbufleft,int * errno)83 kiconv_fr_euckr(void *kcd, char **inbuf, size_t *inbufleft,
84 char **outbuf, size_t *outbufleft, int *errno)
85 {
86 uchar_t *ib;
87 uchar_t *ob;
88 uchar_t *ibtail;
89 uchar_t *obtail;
90 size_t ret_val;
91 int8_t sz;
92 uint32_t euckr_val;
93
94 /* Check on the kiconv code conversion descriptor. */
95 if (kcd == NULL || kcd == (void *)-1) {
96 *errno = EBADF;
97 return ((size_t)-1);
98 }
99
100 /* If this is a state reset request, process and return. */
101 if (inbuf == NULL || *inbuf == NULL) {
102 return (0);
103 }
104
105 ret_val = 0;
106 ib = (uchar_t *)*inbuf;
107 ob = (uchar_t *)*outbuf;
108 ibtail = ib + *inbufleft;
109 obtail = ob + *outbufleft;
110
111 while (ib < ibtail) {
112 if (KICONV_IS_ASCII(*ib)) {
113 if (ob >= obtail) {
114 KICONV_SET_ERRNO_AND_BREAK(E2BIG);
115 }
116
117 *ob++ = *ib++;
118 continue;
119 }
120
121 /*
122 * Issue EILSEQ error if the first byte is not a
123 * valid EUC-KR leading byte.
124 */
125 if (! KICONV_KO_IS_EUCKR_BYTE(*ib)) {
126 KICONV_SET_ERRNO_AND_BREAK(EILSEQ);
127 }
128
129 /*
130 * Issue EINVAL error if input buffer has an incomplete
131 * character at the end of the buffer.
132 */
133 if (ibtail - ib < 2) {
134 KICONV_SET_ERRNO_AND_BREAK(EINVAL);
135 }
136
137 /*
138 * Issue EILSEQ error if the remaining byte is not
139 * a valid EUC-KR byte.
140 */
141 if (! KICONV_KO_IS_EUCKR_BYTE(*(ib + 1))) {
142 KICONV_SET_ERRNO_AND_BREAK(EILSEQ);
143 }
144
145 euckr_val = (uint32_t)(*ib) << 8 | *(ib + 1);
146 sz = ko_to_utf8(euckr_val, ob, obtail, &ret_val,
147 kiconv_euckr_utf8, KICONV_EUCKR_UTF8_MAX);
148
149 if (sz < 0) {
150 KICONV_SET_ERRNO_AND_BREAK(E2BIG);
151 }
152
153 ib += 2;
154 ob += sz;
155 }
156
157 *inbuf = (char *)ib;
158 *inbufleft = ibtail - ib;
159 *outbuf = (char *)ob;
160 *outbufleft = obtail - ob;
161
162 return (ret_val);
163 }
164
165 /*
166 * String based encoding convertor from EUC-KR to UTF-8.
167 */
168 static size_t
kiconvstr_fr_euckr(char * inarray,size_t * inlen,char * outarray,size_t * outlen,int flag,int * errno)169 kiconvstr_fr_euckr(char *inarray, size_t *inlen, char *outarray,
170 size_t *outlen, int flag, int *errno)
171 {
172 uchar_t *ib;
173 uchar_t *ob;
174 uchar_t *ibtail;
175 uchar_t *obtail;
176 uchar_t *oldib;
177 size_t ret_val;
178 int8_t sz;
179 uint32_t euckr_val;
180 boolean_t do_not_ignore_null;
181
182 ret_val = 0;
183 ib = (uchar_t *)inarray;
184 ob = (uchar_t *)outarray;
185 ibtail = ib + *inlen;
186 obtail = ob + *outlen;
187 do_not_ignore_null = ((flag & KICONV_IGNORE_NULL) == 0);
188
189 while (ib < ibtail) {
190 if (*ib == '\0' && do_not_ignore_null)
191 break;
192
193 if (KICONV_IS_ASCII(*ib)) {
194 if (ob >= obtail) {
195 KICONV_SET_ERRNO_AND_BREAK(E2BIG);
196 }
197
198 *ob++ = *ib++;
199 continue;
200 }
201
202 oldib = ib;
203
204 if (! KICONV_KO_IS_EUCKR_BYTE(*ib)) {
205 KICONV_SET_ERRNO_WITH_FLAG(1, EILSEQ);
206 }
207
208 if (ibtail - ib < 2) {
209 KICONV_SET_ERRNO_WITH_FLAG(1, EINVAL);
210 }
211
212 if (! KICONV_KO_IS_EUCKR_BYTE(*(ib + 1))) {
213 KICONV_SET_ERRNO_WITH_FLAG(2, EILSEQ);
214 }
215
216 euckr_val = *ib++;
217 euckr_val = (euckr_val << 8) | *ib++;
218 sz = ko_to_utf8(euckr_val, ob, obtail, &ret_val,
219 kiconv_euckr_utf8, KICONV_EUCKR_UTF8_MAX);
220
221 if (sz < 0) {
222 ib = oldib;
223 KICONV_SET_ERRNO_AND_BREAK(E2BIG);
224 }
225
226 ob += sz;
227 continue;
228
229 REPLACE_INVALID:
230 if (obtail - ob < KICONV_UTF8_REPLACEMENT_CHAR_LEN) {
231 ib = oldib;
232 KICONV_SET_ERRNO_AND_BREAK(E2BIG);
233 }
234
235 *ob++ = KICONV_UTF8_REPLACEMENT_CHAR1;
236 *ob++ = KICONV_UTF8_REPLACEMENT_CHAR2;
237 *ob++ = KICONV_UTF8_REPLACEMENT_CHAR3;
238 ret_val++;
239 }
240
241 *inlen = ibtail - ib;
242 *outlen = obtail - ob;
243
244 return (ret_val);
245 }
246
247 /*
248 * Encoding convertor from Unified Hangul Code to UTF-8.
249 */
250 static size_t
kiconv_fr_uhc(void * kcd,char ** inbuf,size_t * inbufleft,char ** outbuf,size_t * outbufleft,int * errno)251 kiconv_fr_uhc(void *kcd, char **inbuf, size_t *inbufleft,
252 char **outbuf, size_t *outbufleft, int *errno)
253 {
254 uchar_t *ib;
255 uchar_t *ob;
256 uchar_t *ibtail;
257 uchar_t *obtail;
258 size_t ret_val;
259 int8_t sz;
260 uint32_t uhc_val;
261
262 /* Check on the kiconv code conversion descriptor. */
263 if (kcd == NULL || kcd == (void *)-1) {
264 *errno = EBADF;
265 return ((size_t)-1);
266 }
267
268 /* If this is a state reset request, process and return. */
269 if (inbuf == NULL || *inbuf == NULL) {
270 return (0);
271 }
272
273 ret_val = 0;
274 ib = (uchar_t *)*inbuf;
275 ob = (uchar_t *)*outbuf;
276 ibtail = ib + *inbufleft;
277 obtail = ob + *outbufleft;
278
279 while (ib < ibtail) {
280 if (KICONV_IS_ASCII(*ib)) {
281 if (ob >= obtail) {
282 KICONV_SET_ERRNO_AND_BREAK(E2BIG);
283 }
284
285 *ob++ = *ib++;
286 continue;
287 }
288
289 /*
290 * Issue EILSEQ error if the first byte is not a
291 * valid UHC leading byte.
292 */
293 if (! KICONV_KO_IS_UHC_1st_BYTE(*ib)) {
294 KICONV_SET_ERRNO_AND_BREAK(EILSEQ);
295 }
296
297 /*
298 * Issue EINVAL error if input buffer has an incomplete
299 * character at the end of the buffer.
300 */
301 if (ibtail - ib < 2) {
302 KICONV_SET_ERRNO_AND_BREAK(EINVAL);
303 }
304
305 /*
306 * Issue EILSEQ error if the remaining byte is not
307 * a valid UHC byte.
308 */
309 if (! KICONV_KO_IS_UHC_2nd_BYTE(*(ib + 1))) {
310 KICONV_SET_ERRNO_AND_BREAK(EILSEQ);
311 }
312
313 uhc_val = (uint32_t)(*ib) << 8 | *(ib + 1);
314 sz = ko_to_utf8(uhc_val, ob, obtail, &ret_val,
315 kiconv_uhc_utf8, KICONV_UHC_UTF8_MAX);
316
317 if (sz < 0) {
318 KICONV_SET_ERRNO_AND_BREAK(E2BIG);
319 }
320
321 ib += 2;
322 ob += sz;
323 }
324
325 *inbuf = (char *)ib;
326 *inbufleft = ibtail - ib;
327 *outbuf = (char *)ob;
328 *outbufleft = obtail - ob;
329
330 return (ret_val);
331 }
332
333 /*
334 * String based encoding convertor from Unified Hangul Code to UTF-8.
335 */
336 static size_t
kiconvstr_fr_uhc(char * inarray,size_t * inlen,char * outarray,size_t * outlen,int flag,int * errno)337 kiconvstr_fr_uhc(char *inarray, size_t *inlen, char *outarray,
338 size_t *outlen, int flag, int *errno)
339 {
340 uchar_t *ib;
341 uchar_t *ob;
342 uchar_t *ibtail;
343 uchar_t *obtail;
344 uchar_t *oldib;
345 size_t ret_val;
346 int8_t sz;
347 uint32_t uhc_val;
348 boolean_t do_not_ignore_null;
349
350 ret_val = 0;
351 ib = (uchar_t *)inarray;
352 ob = (uchar_t *)outarray;
353 ibtail = ib + *inlen;
354 obtail = ob + *outlen;
355 do_not_ignore_null = ((flag & KICONV_IGNORE_NULL) == 0);
356
357 while (ib < ibtail) {
358 if (*ib == '\0' && do_not_ignore_null)
359 break;
360
361 if (KICONV_IS_ASCII(*ib)) {
362 if (ob >= obtail) {
363 KICONV_SET_ERRNO_AND_BREAK(E2BIG);
364 }
365
366 *ob++ = *ib++;
367 continue;
368 }
369
370 oldib = ib;
371
372 if (! KICONV_KO_IS_UHC_1st_BYTE(*ib)) {
373 KICONV_SET_ERRNO_WITH_FLAG(1, EILSEQ);
374 }
375
376 if (ibtail - ib < 2) {
377 KICONV_SET_ERRNO_WITH_FLAG(1, EINVAL);
378 }
379
380 if (! KICONV_KO_IS_UHC_2nd_BYTE(*(ib + 1))) {
381 KICONV_SET_ERRNO_WITH_FLAG(2, EILSEQ);
382 }
383
384 uhc_val = *ib++;
385 uhc_val = (uhc_val << 8) | *ib++;
386 sz = ko_to_utf8(uhc_val, ob, obtail, &ret_val,
387 kiconv_uhc_utf8, KICONV_UHC_UTF8_MAX);
388
389 if (sz < 0) {
390 ib = oldib;
391 KICONV_SET_ERRNO_AND_BREAK(E2BIG);
392 }
393
394 ob += sz;
395 continue;
396
397 REPLACE_INVALID:
398 if (obtail - ob < KICONV_UTF8_REPLACEMENT_CHAR_LEN) {
399 ib = oldib;
400 KICONV_SET_ERRNO_AND_BREAK(E2BIG);
401 }
402
403 *ob++ = KICONV_UTF8_REPLACEMENT_CHAR1;
404 *ob++ = KICONV_UTF8_REPLACEMENT_CHAR2;
405 *ob++ = KICONV_UTF8_REPLACEMENT_CHAR3;
406 ret_val++;
407 }
408
409 *inlen = ibtail - ib;
410 *outlen = obtail - ob;
411
412 return (ret_val);
413 }
414
415 /*
416 * Encoding convertor from UTF-8 to EUC-KR.
417 */
418 static size_t
kiconv_to_euckr(void * kcd,char ** inbuf,size_t * inbytesleft,char ** outbuf,size_t * outbytesleft,int * errno)419 kiconv_to_euckr(void *kcd, char **inbuf, size_t *inbytesleft,
420 char **outbuf, size_t *outbytesleft, int *errno)
421 {
422 return (kiconv_utf8_to_cck(kcd, inbuf, inbytesleft, outbuf,
423 outbytesleft, errno, utf8_to_euckr));
424 }
425
426 /*
427 * Encoding convertor from UTF-8 to Unified Hangul Code.
428 */
429 static size_t
kiconv_to_uhc(void * kcd,char ** inbuf,size_t * inbytesleft,char ** outbuf,size_t * outbytesleft,int * errno)430 kiconv_to_uhc(void *kcd, char **inbuf, size_t *inbytesleft,
431 char **outbuf, size_t *outbytesleft, int *errno)
432 {
433 return (kiconv_utf8_to_cck(kcd, inbuf, inbytesleft, outbuf,
434 outbytesleft, errno, utf8_to_uhc));
435 }
436
437 /*
438 * String based encoding convertor from UTF-8 to EUC-KR.
439 */
440 static size_t
kiconvstr_to_euckr(char * inarray,size_t * inlen,char * outarray,size_t * outlen,int flag,int * errno)441 kiconvstr_to_euckr(char *inarray, size_t *inlen, char *outarray,
442 size_t *outlen, int flag, int *errno)
443 {
444 return kiconvstr_utf8_to_cck((uchar_t *)inarray, inlen,
445 (uchar_t *)outarray, outlen, flag, errno, utf8_to_euckr);
446 }
447
448 /*
449 * String based encoding convertor from UTF-8 to Unified Hangul Code.
450 */
451 static size_t
kiconvstr_to_uhc(char * inarray,size_t * inlen,char * outarray,size_t * outlen,int flag,int * errno)452 kiconvstr_to_uhc(char *inarray, size_t *inlen, char *outarray,
453 size_t *outlen, int flag, int *errno)
454 {
455 return kiconvstr_utf8_to_cck((uchar_t *)inarray, inlen,
456 (uchar_t *)outarray, outlen, flag, errno, utf8_to_uhc);
457 }
458
459 /*
460 * Convert an UTF-8 character to a character of ko encodings
461 * (EUC-KR or UHC).
462 */
463 static int8_t
utf8_to_ko(uint32_t utf8,uchar_t * ob,uchar_t * obtail,size_t * ret_val,kiconv_table_t * table,size_t nitems)464 utf8_to_ko(uint32_t utf8, uchar_t *ob, uchar_t *obtail, size_t *ret_val,
465 kiconv_table_t *table, size_t nitems)
466 {
467 size_t index;
468 size_t kocode;
469 int8_t kolen;
470
471 if (KICONV_KO_IS_UDC_IN_UTF8(utf8)) {
472 /* User Definable Area handing. */
473 kocode = (((utf8 & 0xF0000) >> 4) | ((utf8 & 0x3F00) >> 2) |
474 (utf8 & 0x3F)) - KICONV_KO_UDA_UCS4_START;
475 if (kocode < KICONV_KO_UDA_RANGE) {
476 kocode = (KICONV_KO_UDA_EUC_SEG1 << 8) |
477 (kocode + KICONV_KO_UDA_OFFSET_START);
478 } else {
479 /* 0x43 = 0xA1 - 0x5E */
480 kocode = (KICONV_KO_UDA_EUC_SEG2 << 8) |
481 (kocode + 0x43);
482 }
483
484 index = 1;
485 } else {
486 index = kiconv_binsearch(utf8, table, nitems);
487 kocode = table[index].value;
488 }
489
490 kolen = (kocode <= 0xFF) ? 1 : 2;
491
492 if (obtail - ob < kolen) {
493 *ret_val = (size_t)-1;
494 return (-1);
495 }
496
497 if (index == 0)
498 (*ret_val)++;
499
500 if (kolen > 1)
501 *ob++ = (uchar_t)(kocode >> 8);
502 *ob = (uchar_t)(kocode & 0xFF);
503
504 return (kolen);
505 }
506
507 /*
508 * Convert an UTF-8 character to Unified Hangual Code.
509 */
510 /* ARGSUSED */
511 static int8_t
utf8_to_uhc(uint32_t utf8,uchar_t ** inbuf,uchar_t * ibtail,uchar_t * ob,uchar_t * obtail,size_t * ret_val)512 utf8_to_uhc(uint32_t utf8, uchar_t **inbuf, uchar_t *ibtail,
513 uchar_t *ob, uchar_t *obtail, size_t *ret_val)
514 {
515 return (utf8_to_ko(utf8, ob, obtail, ret_val, kiconv_utf8_uhc,
516 KICONV_UTF8_UHC_MAX));
517 }
518
519 /*
520 * Convert an UTF-8 character to EUC-KR.
521 */
522 /* ARGSUSED */
523 static int8_t
utf8_to_euckr(uint32_t utf8,uchar_t ** inbuf,uchar_t * ibtail,uchar_t * ob,uchar_t * obtail,size_t * ret_val)524 utf8_to_euckr(uint32_t utf8, uchar_t **inbuf, uchar_t *ibtail,
525 uchar_t *ob, uchar_t *obtail, size_t *ret_val)
526 {
527 return (utf8_to_ko(utf8, ob, obtail, ret_val, kiconv_utf8_euckr,
528 KICONV_UTF8_EUCKR_MAX));
529 }
530
531 /*
532 * Convert a single ko encoding (EUC-KR or UHC) character to UTF-8.
533 */
534 static int8_t
ko_to_utf8(uint32_t ko_val,uchar_t * ob,uchar_t * obtail,size_t * ret_val,kiconv_table_array_t * table,size_t nitems)535 ko_to_utf8(uint32_t ko_val, uchar_t *ob, uchar_t *obtail, size_t *ret_val,
536 kiconv_table_array_t *table, size_t nitems)
537 {
538 size_t index;
539 int8_t sz;
540 uchar_t udc[3];
541 uchar_t *u8;
542
543 if (KICONV_KO_IS_UDC_IN_EUC(ko_val)) {
544 /* UDA(User Definable Area) handling. */
545 uint32_t u32;
546
547 u32 = (ko_val & 0xFF) + (((ko_val & 0xFF00) == 0xC900) ?
548 KICONV_KO_UDA_OFFSET_1 : KICONV_KO_UDA_OFFSET_2);
549 udc[0] = 0xEF;
550 udc[1] = (uchar_t)(0x80 | (u32 & 0x00000FC0) >> 6);
551 udc[2] = (uchar_t)(0x80 | (u32 & 0x0000003F));
552 u8 = udc;
553 index = 1;
554 } else {
555 index = kiconv_binsearch(ko_val, table, nitems);
556 u8 = table[index].u8;
557 }
558
559 sz = u8_number_of_bytes[u8[0]];
560
561 if (obtail - ob < sz) {
562 *ret_val = (size_t)-1;
563 return (-1);
564 }
565
566 if (index == 0)
567 (*ret_val)++; /* Non-identical conversion */
568
569 for (index = 0; index < sz; index++)
570 *ob++ = u8[index];
571
572 return (sz);
573 }
574
575 static kiconv_ops_t kiconv_ko_ops_tbl[] = {
576 {
577 "euc-kr", "utf-8", kiconv_open_to_cck, kiconv_to_euckr,
578 kiconv_close_to_cck, kiconvstr_to_euckr
579 },
580 {
581 "utf-8", "euc-kr", open_fr_euckr, kiconv_fr_euckr,
582 close_fr_ko, kiconvstr_fr_euckr
583 },
584 {
585 "unifiedhangul", "utf-8", kiconv_open_to_cck, kiconv_to_uhc,
586 kiconv_close_to_cck, kiconvstr_to_uhc
587 },
588 {
589 "utf-8", "unifiedhangul", open_fr_uhc, kiconv_fr_uhc,
590 close_fr_ko, kiconvstr_fr_uhc
591 }
592 };
593
594 static kiconv_module_info_t kiconv_ko_info = {
595 "kiconv_ko", /* module name */
596 sizeof (kiconv_ko_ops_tbl) / sizeof (kiconv_ko_ops_tbl[0]),
597 kiconv_ko_ops_tbl,
598 0,
599 NULL,
600 NULL,
601 0
602 };
603
604 static struct modlkiconv modlkiconv_ko = {
605 &mod_kiconvops,
606 "kiconv korean module 1.0",
607 &kiconv_ko_info
608 };
609
610 static struct modlinkage modlinkage = {
611 MODREV_1,
612 (void *)&modlkiconv_ko,
613 NULL
614 };
615
616 int
_init(void)617 _init(void)
618 {
619 int err;
620
621 err = mod_install(&modlinkage);
622 if (err)
623 cmn_err(CE_WARN, "kiconv_ko: failed to load kernel module");
624
625 return (err);
626 }
627
628 int
_fini(void)629 _fini(void)
630 {
631 int err;
632
633 /*
634 * If this module is being used, then, we cannot remove the module.
635 * The following checking will catch pretty much all usual cases.
636 *
637 * Any remaining will be catached by the kiconv_unregister_module()
638 * during mod_remove() at below.
639 */
640 if (kiconv_module_ref_count(KICONV_MODULE_ID_KO))
641 return (EBUSY);
642
643 err = mod_remove(&modlinkage);
644 if (err)
645 cmn_err(CE_WARN, "kiconv_ko: failed to remove kernel module");
646
647 return (err);
648 }
649
650 int
_info(struct modinfo * modinfop)651 _info(struct modinfo *modinfop)
652 {
653 return (mod_info(&modlinkage, modinfop));
654 }
655