xref: /illumos-gate/usr/src/lib/iconv_modules/ko/common/common_utf.c (revision 1bff1300cebf1ea8e11ce928b10e208097e67f24)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright (c) 1996 by Sun Microsystems, Inc.
23  */
24 
25 #include "common_han.h"
26 #include "common_utf.h"
27 
28 #define UNICODE_UDC_START	0xF700
29 #define UNICODE_UDC_END		0xF8FF
30 #define UNICODE_UDC_MAX		(UNICODE_UDC_END - UNICODE_UDC_START)
31 
32 /****  _ U N I _ T O _ U T F 8  ****/
33 
34 hcode_type _uni_to_utf8(hcode_type unicode)
35 {
36         hcode_type utf8;
37 	utf8.code = 0;
38 
39 	/* new code */
40 	if(unicode.code <= 0x007F){
41 		utf8.code = unicode.code;
42 	}
43 	else if(unicode.code >= 0x0080 && unicode.code <= 0x07FF){
44 		utf8.utf8.high8bits = 0x00;
45 		utf8.utf8.sign1 = 0x00;
46 		utf8.utf8.data1 = 0x00;
47 		utf8.utf8.sign2 = 0x03;
48 		utf8.utf8.data2 = unicode.unicode.data2;
49 		utf8.utf8.sign3 = 0x02;
50 		utf8.utf8.data3 = unicode.unicode.data3;
51 	}
52 	else if(unicode.code >= 0x0800 && unicode.code < 0xFFFF){
53 		utf8.utf8.high8bits = 0x00;
54 		utf8.utf8.sign1 = 0x0E;
55 		utf8.utf8.data1 = unicode.unicode.data1;
56 		utf8.utf8.sign2 = 0x02;                  /* 10xxxxxx */
57 		utf8.utf8.data2 = unicode.unicode.data2;
58 		utf8.utf8.sign3 = 0x02;                  /* 10xxxxxx */
59 		utf8.utf8.data3 = unicode.unicode.data3;
60 	}
61 
62         return(utf8);
63 
64 }  /* end of hcode_type _uni_to_utf8(hcode_type uni_code) */
65 
66 /****  _ U T F 8 _ T O _ U N I  ****/
67 
68 hcode_type _utf8_to_uni(hcode_type utf8)
69 {
70 	hcode_type unicode;
71 	unicode.code = 0;
72 
73         if(utf8.byte.byte3 == 0 && utf8.byte.byte2 ==0)
74 	{/* ASCII */
75 		unicode.byte.byte1 = 0;
76 		unicode.byte.byte2 = 0;
77                 unicode.byte.byte3 = 0;
78 		unicode.byte.byte4 = utf8.byte.byte4;
79 		return(unicode);
80 	}
81 
82 	if(utf8.byte.byte2 == 0){
83 	/* 2-byte UTF-8 */
84 		unicode.byte.byte1 = 0;
85 		unicode.byte.byte2 = 0;
86 		unicode.byte.byte3 = (utf8.byte.byte3 & 0x3F) >> 2;
87 		unicode.byte.byte4 = (utf8.byte.byte3 << 6) | (0x3F & utf8.byte.byte4);
88 	}
89 	else {
90 	/* 3-byte UTF-8 */
91 		unicode.unicode.data1 = utf8.utf8.data1;
92 		unicode.unicode.data2 = utf8.utf8.data2;
93 		unicode.unicode.data3 = utf8.utf8.data3;
94 	}
95 	return (unicode);
96 
97 }  /* end of hcode_type _utf8_to_uni(hcode_type utf8) */
98 
99 /*  Return UTF-8 code from given User Defined Character Index(Serial Number) */
100 hcode_type _udcidx_to_utf(int udcidx)
101 {
102 	hcode_type unicode, utf8;
103 
104 	if (udcidx < 0 || UNICODE_UDC_MAX < udcidx)
105 		utf8.code = UTF_UDC_ERROR;	/* over the UDC bound */
106 	else {
107 		unicode.code = UNICODE_UDC_START + udcidx;
108 		utf8 = _uni_to_utf8(unicode);
109 	}
110 
111 	return(utf8);
112 }
113 
114 /*  Return User Defined Character Index(Serial Number) from given UTF-8 code */
115 int _utf_to_udcidx(hcode_type utf_code)
116 {
117 	hcode_type unicode;
118 
119 	unicode = _utf8_to_uni(utf_code);
120 
121 	if (unicode.code < UNICODE_UDC_START || UNICODE_UDC_END < unicode.code)
122 		return(IDX_UDC_ERROR);
123 	else
124 		return((int)(unicode.code - UNICODE_UDC_START));
125 }
126