xref: /illumos-gate/usr/src/lib/iconv_modules/ja/common/PCK_TO_Unicode.c (revision 16d8656330ae5622ec32e5007f62145ebafdc50f)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * COPYRIGHT AND PERMISSION NOTICE
23  *
24  * Copyright (c) 1991-2005 Unicode, Inc. All rights reserved. Distributed
25  * under the Terms of Use in http://www.unicode.org/copyright.html.
26  *
27  * This file has been modified by Sun Microsystems, Inc.
28  */
29 /*
30  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
31  * Use is subject to license terms.
32  */
33 
34 
35 #include <stdlib.h>
36 #include <errno.h>
37 #include <euc.h>
38 #include "japanese.h"
39 #include "jfp_iconv_unicode.h"
40 
41 #ifdef JAVA_CONV_COMPAT
42 #define	JFP_J2U_ICONV_JAVA
43 #elif	JFP_ICONV_MS932
44 #define	JFP_J2U_ICONV_MS932
45 #else
46 #define	JFP_J2U_ICONV
47 #endif
48 #include "jfp_jis_to_ucs2.h"
49 
50 void *
_icv_open(void)51 _icv_open(void)
52 {
53 	return (_icv_open_unicode((size_t)0));
54 }
55 
56 void
_icv_close(void * cd)57 _icv_close(void *cd)
58 {
59 	_icv_close_unicode(cd);
60 	return;
61 }
62 
63 size_t
_icv_iconv(void * cd,char ** inbuf,size_t * inbytesleft,char ** outbuf,size_t * outbytesleft)64 _icv_iconv(void *cd, char **inbuf, size_t *inbytesleft,
65 				char **outbuf, size_t *outbytesleft)
66 {
67 	unsigned int	uni;		/* UTF-32 */
68 	unsigned int	index;		/* index for table lookup */
69 	unsigned char	ic1, ic2;	/* 1st and 2nd bytes of a char */
70 	size_t		rv = (size_t)0;	/* return value of this function */
71 
72 	unsigned char	*ip;
73 	size_t		ileft;
74 	char		*op;
75 	size_t		oleft;
76 
77 	/*
78 	 * If inbuf and/or *inbuf are NULL, reset conversion descriptor
79 	 * and put escape sequence if needed.
80 	 */
81 	if ((inbuf == NULL) || (*inbuf == NULL)) {
82 		_icv_reset_unicode(cd);
83 		return ((size_t)0);
84 	}
85 
86 	ip = (unsigned char *)*inbuf;
87 	ileft = *inbytesleft;
88 	op = *outbuf;
89 	oleft = *outbytesleft;
90 
91 	while (ileft != 0) {
92 		NGET(ic1, "never fail here"); /* get 1st byte */
93 
94 		if (ISASC((int)ic1)) {	/* ASCII; 1 byte */
95 			uni = _jfp_tbl_jisx0201roman_to_ucs2[ic1];
96 			PUTU(uni, "ASCII");
97 		} else if (ISSJKANA(ic1)) { /* JIS X 0201 Kana; 1 byte */
98 			uni = _jfp_tbl_jisx0201kana_to_ucs2[(ic1 - 0xa1)];
99 			PUTU(uni, "KANA");
100 		} else if (ISSJKANJI1(ic1)) { /* JIS X 0208 or UDC; 2 bytes */
101 			NGET(ic2, "CS1-2 not available");
102 			if (ISSJKANJI2(ic2)) {
103 				ic1 = sjtojis1[(ic1 - 0x80)];
104 				if (ic2 >= 0x9f) {
105 					ic1++;
106 				}
107 				index = ((ic1 - 0x21) * 94)
108 					+ (sjtojis2[ic2] - 0x21);
109 				uni = _jfp_tbl_jisx0208_to_ucs2[index];
110 				PUTU(uni, "KANJI");
111 			} else { /* 2nd byte check failed */
112 				RETERROR(EILSEQ, "EILSEQ at CS1-2")
113 				/* NOTREACHED */
114 			}
115 		} else if (ISSJSUPKANJI1(ic1)) { /* VDC, 2 bytes */
116 			NGET(ic2, "SUP-2 not available");
117 			if (ISSJKANJI2(ic2)) {
118 				ic1 = sjtojis1[(ic1 - 0x80)];
119 				if (ic2 >= 0x9f) {
120 					ic1++;
121 				}
122 				index = ((ic1 - 0x21) * 94)
123 						+ (sjtojis2[ic2] - 0x21);
124 				uni = _jfp_tbl_jisx0212_to_ucs2[index];
125 				PUTU(uni, "SUPKANJI");
126 			} else { /* 2nd byte check failed */
127 				RETERROR(EILSEQ, "EILSEQ at CS1-2")
128 			}
129 		} else if (ISSJIBM(ic1) || /* Extended IBM char. area */
130 			ISSJNECIBM(ic1)) { /* NEC/IBM char. area */
131 			/*
132 			 * We need a special treatment for each codes.
133 			 * By adding some offset number for them, we
134 			 * can process them as the same way of that of
135 			 * extended IBM chars.
136 			 */
137 			NGET(ic2, "IBM-2 not available");
138 			if (ISSJKANJI2(ic2)) {
139 				unsigned short dest, upper, lower;
140 				dest = (ic1 << 8) + ic2;
141 				if ((0xed40 <= dest) && (dest <= 0xeffc)) {
142 					REMAP_NEC(dest);
143 					if (dest == 0xffff) {
144 						RETERROR(EILSEQ, "invalid NEC")
145 					}
146 				}
147 				/*
148 				 * XXX: 0xfa54 and 0xfa5b must be mapped
149 				 *	to JIS0208 area. Therefore we
150 				 *	have to do special treatment.
151 				 */
152 				if ((dest == 0xfa54) || (dest == 0xfa5b)) {
153 					if (dest == 0xfa54) {
154 			/* map to JIS X 0208 row 2 cell 44 "NOT SIGN" */
155 				index = (2 - 1) * 94 + (44 - 1);
156 					} else {
157 			/* map to JIS X 0208 row 2 cell 72 "BECAUSE" */
158 				index = (2 - 1) * 94 + (72 - 1);
159 					}
160 					uni = _jfp_tbl_jisx0208_to_ucs2[index];
161 					PUTU(uni, "IBM");
162 				} else {
163 					dest = dest - 0xfa40 -
164 						(((dest>>8) - 0xfa) * 0x40);
165 					dest = sjtoibmext[dest];
166 					if (dest == 0xffff) {
167 						RETERROR(EILSEQ, "invalid IBM")
168 					}
169 					upper = ((dest >> 8) & 0x7f) - 0x21;
170 					lower = (dest & 0x7f) - 0x21;
171 					index = (unsigned int)(upper * 94 +
172 						lower);
173 					uni = _jfp_tbl_jisx0212_to_ucs2[index];
174 					PUTU(uni, "IBM");
175 				}
176 			} else { /* 2nd byte check failed */
177 				RETERROR(EILSEQ, "EILSEQ at IBM-2")
178 			}
179 		} else if ((0xeb <= ic1) && (ic1 <= 0xec)) {
180 		/*
181 		 * Based on the draft convention of OSF-JVC CDEWG,
182 		 * characters in this area will be mapped to
183 		 * "CHIKAN-MOJI." (convertible character)
184 		 * We use U+FFFD in this case.
185 		 */
186 			NGET(ic2, "GAP-2 not available");
187 			if (ISSJKANJI2(ic2)) {
188 				uni = 0xfffd;
189 				PUTU(uni, "GAP");
190 			} else { /* 2nd byte check failed */
191 				RETERROR(EILSEQ, "EILSEQ at GAP-2")
192 			}
193 		} else { /* 1st byte check failed */
194 			RETERROR(EILSEQ, "EILSEQ at 1st")
195 		}
196 
197 		/*
198 		 * One character successfully converted so update
199 		 * values outside of this function's stack.
200 		 */
201 		*inbuf = (char *)ip;
202 		*inbytesleft = ileft;
203 		*outbuf = op;
204 		*outbytesleft = oleft;
205 	}
206 
207 ret:
208 	DEBUGPRINTERROR
209 
210 	/*
211 	 * Return value for successful return is not defined by XPG
212 	 * so return same as *inbytesleft as existing codes do.
213 	 */
214 	return ((rv == (size_t)-1) ? rv : *inbytesleft);
215 }
216