xref: /illumos-gate/usr/src/lib/iconv_modules/zh/common/uni_common.c (revision bde334a8dbd66dfa70ce4d7fc9dcad6e1ae45fe4)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright(c) 2001 Sun Microsystems, Inc.
23  * All rights reserved.
24  */
25 
26 #if defined UCS_2LE
27 static int unichr_to_ucs_2le (st, unichr, buf, buflen, uconv_num)
28 _iconv_st *st;
29 unsigned long unichr;
30 char	*buf;
31 size_t	buflen;
32 int	*uconv_num;
33 {
34 	int size = 0;
35 
36 	if (unichr > 0x00ffff) {
37 	  unichr = ICV_CHAR_UCS2_REPLACEMENT;
38 	  *uconv_num = 1;
39 	}
40 
41 	if (!st->bom_written) {
42 	  if (buflen < 4)
43 	    return 0;
44 
45 	  *(buf + size++) = (uchar_t)0xff;
46 	  *(buf + size++) = (uchar_t)0xfe;
47 	  st->bom_written = true;
48 	}
49 
50 	if (buflen < 2)
51 	  return 0;
52 
53 	*(buf + size++) = (uchar_t)(unichr & 0xff);
54 	*(buf + size++) = (uchar_t)((unichr >> 8) & 0xff);
55 
56 	return size;
57 }
58 
59 #elif defined UCS_2BE
60 static int unichr_to_ucs_2be (st, unichr, buf, buflen, uconv_num)
61 _iconv_st *st;
62 unsigned long unichr;
63 char	*buf;
64 size_t	buflen;
65 int	*uconv_num;
66 {
67 	int size = 0;
68 
69 	if (unichr > 0x00ffff) {
70 	  unichr = ICV_CHAR_UCS2_REPLACEMENT;
71 	  *uconv_num = 1;
72 	}
73 
74 	if (!st->bom_written) {
75 	  if (buflen < 4)
76 	    return 0;
77 
78 	  *(buf + size++) = (uchar_t)0xfe;
79 	  *(buf + size++) = (uchar_t)0xff;
80 	  st->bom_written = true;
81 	}
82 
83 	if (buflen < 2)
84 	  return 0;
85 
86 	*(buf + size++) = (uchar_t)((unichr >> 8) & 0xff);
87 	*(buf + size++) = (uchar_t)(unichr & 0xff);
88 
89 	return size;
90 }
91 
92 #elif defined UCS_4LE
93 static int unichr_to_ucs_4le (st, unichr, buf, buflen, uconv_num)
94 _iconv_st *st;
95 unsigned long unichr;
96 char	*buf;
97 size_t	buflen;
98 int	*uconv_num;
99 {
100 	int size = 0;
101 
102 	if (unichr == 0xffffffff) {
103 	  unichr = ICV_CHAR_UCS2_REPLACEMENT;
104 	  *uconv_num = 1;
105 	}
106 
107 	if (!st->bom_written) {
108 	  if (buflen < 8)
109 	    return 0;
110 
111 	  *(buf + size++) = (uchar_t)0xff;
112 	  *(buf + size++) = (uchar_t)0xfe;
113 	  *(buf + size++) = (uchar_t)0;
114 	  *(buf + size++) = (uchar_t)0;
115 	  st->bom_written = true;
116 	}
117 
118 	if (buflen < 4)
119 	  return 0;
120 
121 	*(buf + size++) = (uchar_t)(unichr & 0xff);
122 	*(buf + size++) = (uchar_t)((unichr >> 8) & 0xff);
123 	*(buf + size++) = (uchar_t)((unichr >> 16) & 0xff);
124 	*(buf + size++) = (uchar_t)((unichr >> 24) & 0xff);
125 
126 	return size;
127 }
128 
129 #elif defined UCS_4BE
130 static int unichr_to_ucs_4be (st, unichr, buf, buflen, uconv_num)
131 _iconv_st *st;
132 unsigned long unichr;
133 char	*buf;
134 size_t	buflen;
135 int	*uconv_num;
136 {
137 	int size = 0;
138 
139 	if (unichr == 0xffffffff) {
140 	  unichr = ICV_CHAR_UCS2_REPLACEMENT;
141 	  *uconv_num = 1;
142 	}
143 
144 	if (!st->bom_written) {
145 	  if (buflen < 8)
146 	    return 0;
147 
148 	  *(buf + size++) = (uchar_t)0;
149 	  *(buf + size++) = (uchar_t)0;
150 	  *(buf + size++) = (uchar_t)0xfe;
151 	  *(buf + size++) = (uchar_t)0xff;
152 	  st->bom_written = true;
153 	}
154 
155 	if (buflen < 4)
156 	  return 0;
157 
158 	*(buf + size++) = (uchar_t)((unichr >> 24) & 0xff);
159 	*(buf + size++) = (uchar_t)((unichr >> 16) & 0xff);
160 	*(buf + size++) = (uchar_t)((unichr >> 8) & 0xff);
161 	*(buf + size++) = (uchar_t)(unichr & 0xff);
162 
163 	return size;
164 }
165 
166 #else
167 static int unichr_to_utf8(_iconv_st *st, unsigned long unichr, char *buf,
168 	size_t buflen, int *uconv_num)
169 {
170         if (unichr < 0x0080) {
171                 if (buflen < 1) {
172                         errno = E2BIG;
173                         return(0);
174                 }
175                 *buf = (char) unichr;
176                 return(1);
177         }
178 
179 	if (unichr >= 0x0080 && unichr <= 0x07ff) {
180 		if (buflen < 2) {
181 			errno = E2BIG;
182 			return(0);
183 		}
184 		*buf = (char)((unichr >> 6) & 0x1f) | 0xc0;
185 		*(buf+1) = (char)(unichr & 0x3f) | 0x80;
186 		return(2);
187 	}
188 
189 	if (unichr >= 0x0800 && unichr <= 0xffff) {
190 		if (buflen < 3) {
191 			errno = E2BIG;
192 			return(0);
193 		}
194 		*buf = (char)((unichr >> 12) & 0xf) | 0xe0;
195 		*(buf+1) = (char)((unichr >>6) & 0x3f) | 0x80;
196 		*(buf+2) = (char)(unichr & 0x3f) | 0x80;
197 		return(3);
198 	}
199 
200 	if (unichr >= 0x10000 && unichr <= 0x10ffff) {
201 	        if (buflen < 4) {
202 		     errno = E2BIG;
203 		     return(0);
204 		}
205 
206 	        *buf = (char)((unichr >> 18) & 0x7) | 0xf0;
207 	        *(buf+1) = (char)((unichr >> 12) & 0x3f) | 0x80;
208 	        *(buf+2) = (char)((unichr >>6) & 0x3f) | 0x80;
209 	        *(buf+3) = (char)(unichr & 0x3f) | 0x80;
210 	        return(4);
211 	}
212 
213 	/* unrecognized unicode character */
214 	if (buflen < 3) {
215 		errno = E2BIG;
216 		return(0);
217 	}
218 	*buf = (char)UTF8_NON_ID_CHAR1;
219 	*(buf+1) = (char)UTF8_NON_ID_CHAR2;
220 	*(buf+2) = (char)UTF8_NON_ID_CHAR3;
221 
222         /* non-identical conversions */
223         *uconv_num = 1;
224 
225 	return(3);
226 }
227 #endif
228 
229 /*
230 vi:ts=8:ai:expandtab
231 */
232