xref: /titanic_50/usr/src/lib/iconv_modules/ja/common/Shift_JIS-2004_TO_Unicode.c (revision 880d797826457b77414b37d531cc3e1aa166ecbe)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 
27 #include <stdlib.h>
28 #include <errno.h>
29 #include <euc.h>
30 #include "japanese.h"
31 #include "jfp_iconv_unicode.h"
32 
33 #define	JFP_J2U_ICONV_X0213
34 #include "jfp_jis_to_ucs2.h"
35 
36 typedef struct {
37 	unsigned char	odd_row;
38 	unsigned char	even_row;
39 } sj1torow_t;
40 
41 static const sj1torow_t sj1torow_x0213_p1b1[] = {
42                /* 40-9e  9f-fc */
43      /* 0x81 */ {     1,     2 },
44      /* 0x82 */ {     3,     4 },
45      /* 0x83 */ {     5,     6 },
46      /* 0x84 */ {     7,     8 },
47      /* 0x85 */ {     9,    10 },
48      /* 0x86 */ {    11,    12 },
49      /* 0x87 */ {    13,    14 },
50      /* 0x88 */ {    15,    16 },
51      /* 0x89 */ {    17,    18 },
52      /* 0x8a */ {    19,    20 },
53      /* 0x8b */ {    21,    22 },
54      /* 0x8c */ {    23,    24 },
55      /* 0x8d */ {    25,    26 },
56      /* 0x8e */ {    27,    28 },
57      /* 0x8f */ {    29,    30 },
58      /* 0x90 */ {    31,    32 },
59      /* 0x91 */ {    33,    34 },
60      /* 0x92 */ {    35,    36 },
61      /* 0x93 */ {    37,    38 },
62      /* 0x94 */ {    39,    40 },
63      /* 0x95 */ {    41,    42 },
64      /* 0x96 */ {    43,    44 },
65      /* 0x97 */ {    45,    46 },
66      /* 0x98 */ {    47,    48 },
67      /* 0x99 */ {    49,    50 },
68      /* 0x9a */ {    51,    52 },
69      /* 0x9b */ {    53,    54 },
70      /* 0x9c */ {    55,    56 },
71      /* 0x9d */ {    57,    58 },
72      /* 0x9e */ {    59,    60 },
73      /* 0x9f */ {    61,    62 },
74 };
75 
76 static const sj1torow_t sj1torow_x0213_p1b2[] = {
77                /* 40-9e  9f-fc */
78      /* 0xe0 */ {    63,    64 },
79      /* 0xe1 */ {    65,    66 },
80      /* 0xe2 */ {    67,    68 },
81      /* 0xe3 */ {    69,    70 },
82      /* 0xe4 */ {    71,    72 },
83      /* 0xe5 */ {    73,    74 },
84      /* 0xe6 */ {    75,    76 },
85      /* 0xe7 */ {    77,    78 },
86      /* 0xe8 */ {    79,    80 },
87      /* 0xe9 */ {    81,    82 },
88      /* 0xea */ {    83,    84 },
89      /* 0xeb */ {    85,    86 },
90      /* 0xec */ {    87,    88 },
91      /* 0xed */ {    89,    90 },
92      /* 0xee */ {    91,    92 },
93      /* 0xef */ {    93,    94 },
94 };
95 
96 static const sj1torow_t sj1torow_x0213_p2[] = {
97                /* 40-9e  9f-fc */
98      /* 0xf0 */ {     1,     8 },
99      /* 0xf1 */ {     3,     4 },
100      /* 0xf2 */ {     5,    12 },
101      /* 0xf3 */ {    13,    14 },
102      /* 0xf4 */ {    15,    78 },
103      /* 0xf5 */ {    79,    80 },
104      /* 0xf6 */ {    81,    82 },
105      /* 0xf7 */ {    83,    84 },
106      /* 0xf8 */ {    85,    86 },
107      /* 0xf9 */ {    87,    88 },
108      /* 0xfa */ {    89,    90 },
109      /* 0xfb */ {    91,    92 },
110      /* 0xfc */ {    93,    94 },
111 };
112 
sjtoe16_x0213(unsigned char c1,unsigned char c2)113 static unsigned short sjtoe16_x0213(unsigned char c1, unsigned char c2)
114 {
115 	const sj1torow_t	*p;
116 	unsigned short		e16;
117 
118 	/* range check (if valid or not) for c1 and c2 has been done
119 	   by the caller side */
120 
121 	if ((c1 >= 0x81) && (c1 <= 0x9f)) {
122 		p = &(sj1torow_x0213_p1b1[c1 - 0x81]);
123 	} else if ((c1 >= 0xe0) && (c1 <= 0xef)) {
124 		p = &(sj1torow_x0213_p1b2[c1 - 0xe0]);
125 	} else {
126 		p = &(sj1torow_x0213_p2[c1 - 0xf0]);
127 	}
128 
129 	if (c2 >= 0x9f) {
130 		e16 = (p->even_row + 0xa0) << 8;
131 		e16 |= (c2 - 0x9f + 0x21);
132 		e16 |= (c1 <= 0xef) ? 0x0080 : 0x0000;
133 	} else {
134 		e16 = (p->odd_row + 0xa0) << 8;
135 		e16 |= (c2 - 0x40 + 0x21);
136 		if (c2 >= 0x80) {
137 			e16--;
138 		}
139 		e16 |= (c1 <= 0xef) ? 0x0080 : 0x0000;
140 	}
141 
142 	return (e16);
143 }
144 
145 void *
_icv_open(void)146 _icv_open(void)
147 {
148 	return (_icv_open_unicode((size_t)0));
149 }
150 
151 void
_icv_close(void * cd)152 _icv_close(void *cd)
153 {
154 	_icv_close_unicode(cd);
155 	return;
156 }
157 
158 size_t
_icv_iconv(void * cd,char ** inbuf,size_t * inbytesleft,char ** outbuf,size_t * outbytesleft)159 _icv_iconv(void *cd, char **inbuf, size_t *inbytesleft,
160 				char **outbuf, size_t *outbytesleft)
161 {
162 	unsigned int	u32;		/* UTF-32 */
163 	unsigned short	e16;		/* 16-bit EUC */
164 	unsigned char	ic1, ic2;	/* 1st and 2nd bytes of a char */
165 	size_t		rv = (size_t)0;	/* return value of this function */
166 
167 	unsigned char	*ip;
168 	size_t		ileft;
169 	char		*op;
170 	size_t		oleft;
171 
172 	/*
173 	 * If inbuf and/or *inbuf are NULL, reset conversion descriptor
174 	 * and put escape sequence if needed.
175 	 */
176 	if ((inbuf == NULL) || (*inbuf == NULL)) {
177 		_icv_reset_unicode(cd);
178 		return ((size_t)0);
179 	}
180 
181 	ip = (unsigned char *)*inbuf;
182 	ileft = *inbytesleft;
183 	op = *outbuf;
184 	oleft = *outbytesleft;
185 
186 	while (ileft != 0) {
187 		NGET(ic1, "never fail here"); /* get 1st byte */
188 
189 		if (ISASC((int)ic1)) {	/* ASCII; 1 byte */
190 			u32 = _jfp_tbl_jisx0201roman_to_ucs2[ic1];
191 			PUTU(u32, "ASCII");
192 		} else if (ISSJKANA(ic1)) { /* JIS X 0201 Kana; 1 byte */
193 			u32 = _jfp_tbl_jisx0201kana_to_ucs2[ic1 - 0xa1];
194 			PUTU(u32, "KANA");
195 		} else if (((ic1 >= 0x81) && (ic1 <= 0x9f)) ||
196 				((ic1 >= 0xe0) && (ic1 <= 0xef))) {
197 			/* JIS X 0213 plane 1 */
198 			NGET(ic2, "PLANE1-2");
199 			if (ISSJKANJI2(ic2)) {
200 				e16 = sjtoe16_x0213(ic1, ic2);
201 				u32 = (unsigned int)_jfp_tbl_jisx0208_to_ucs2[
202 					((e16 >> 8) - 0xa1) * 94
203 					+ ((e16 & 0xff) - 0xa1)];
204 				if (IFHISUR(u32)) {
205 					u32 = _jfp_lookup_x0213_nonbmp(
206 						e16, u32);
207 					PUTU(u32, "PLANE1->NONBMP");
208 				} else if (u32 == 0xffff) {
209 					/* need to compose */
210 					unsigned int	u32_2;
211 					u32 = _jfp_lookup_x0213_compose(
212 						e16, &u32_2);
213 					PUTU(u32, "PLANE1->CP1");
214 					PUTU(u32_2, "PLANE1->CP2");
215 				} else {
216 					PUTU(u32, "PLANE1->BMP");
217 				}
218 			} else { /* 2nd byte check failed */
219 				RETERROR(EILSEQ, "PLANE1-2")
220 				/* NOTREACHED */
221 			}
222 		} else if ((ic1 >= 0xf0) && (ic1 <= 0xfc)) {
223 			/* JIS X 0213 plane 2 */
224 			NGET(ic2, "PLANE2-2");
225 			if (ISSJKANJI2(ic2)) {
226 				e16 = sjtoe16_x0213(ic1, ic2);
227 				u32 = (unsigned int)_jfp_tbl_jisx0213p2_to_ucs2[
228 					((e16 >> 8) - 0xa1) * 94
229 					+ ((e16 & 0xff) - 0x21)];
230 				if (IFHISUR(u32)) {
231 					u32 = _jfp_lookup_x0213_nonbmp(
232 					e16, u32);
233 					PUTU(u32, "PLANE2->NONBMP");
234 				} else {
235 					PUTU(u32, "PLANE2->BMP");
236 				}
237 			} else {
238 				RETERROR(EILSEQ, "PLANE2-2")
239 				/* NOTREACHED */
240 			}
241 		} else { /* 1st byte check failed */
242 			RETERROR(EILSEQ, "EILSEQ at 1st")
243 		}
244 
245 		/*
246 		 * One character successfully converted so update
247 		 * values outside of this function's stack.
248 		 */
249 		*inbuf = (char *)ip;
250 		*inbytesleft = ileft;
251 		*outbuf = op;
252 		*outbytesleft = oleft;
253 	}
254 
255 ret:
256 	DEBUGPRINTERROR
257 
258 	/*
259 	 * Return value for successful return is not defined by XPG
260 	 * so return same as *inbytesleft as existing codes do.
261 	 */
262 	return ((rv == (size_t)-1) ? rv : *inbytesleft);
263 }
264