xref: /titanic_50/usr/src/lib/iconv_modules/ja/common/ISO-2022-JP-2004_TO_Unicode.c (revision 880d797826457b77414b37d531cc3e1aa166ecbe)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 
27 #include <stdlib.h>
28 #include <errno.h>
29 #include <euc.h>
30 #include "japanese.h"
31 #include "jfp_iconv_unicode.h"
32 
33 #define	JFP_J2U_ICONV_X0213
34 #include "jfp_jis_to_ucs2.h"
35 
36 struct _icv_state {
37 	int	_st_cset;
38 };
39 
40 void *
_icv_open(void)41 _icv_open(void)
42 {
43 	void			*cd;
44 	struct _icv_state	*st;
45 
46 	cd = _icv_open_unicode(sizeof (struct _icv_state));
47 
48 	if (cd != NULL) {
49 		st = (struct _icv_state *)(_icv_get_ext(cd));
50 		st->_st_cset = CS_0;
51 	}
52 
53 	return (cd);
54 }
55 
56 void
_icv_close(void * cd)57 _icv_close(void *cd)
58 {
59 	_icv_close_unicode(cd);
60 	return;
61 }
62 
63 size_t
_icv_iconv(void * cd,char ** inbuf,size_t * inbytesleft,char ** outbuf,size_t * outbytesleft)64 _icv_iconv(void *cd, char **inbuf, size_t *inbytesleft,
65 				char **outbuf, size_t *outbytesleft)
66 {
67 	unsigned int	u32;		/* UTF-32 */
68 	unsigned short	e16;		/* 16-bit EUC */
69 	unsigned char	ic1, ic2;	/* bytes in a char or an esc seq */
70 	unsigned char	ic3, ic4;	/* bytes in an esc seq */
71 	size_t		rv = (size_t)0;	/* return value of this function */
72 	struct _icv_state	*st;
73 
74 	unsigned char	*ip;
75         size_t		ileft;
76 	char		*op;
77         size_t		oleft;
78 
79 	st = (struct _icv_state *)(_icv_get_ext(cd));
80 
81 	/*
82 	 * If inbuf and/or *inbuf are NULL, reset conversion descriptor
83 	 * and put escape sequence if needed.
84 	 */
85 	if ((inbuf == NULL) || (*inbuf == NULL)) {
86 		st->_st_cset = CS_0;
87 		_icv_reset_unicode(cd);
88 		return ((size_t)0);
89 	}
90 
91 	ip = (unsigned char *)*inbuf;
92 	ileft = *inbytesleft;
93 	op = *outbuf;
94 	oleft = *outbytesleft;
95 
96 	while (ileft != 0) {
97 		NGET(ic1, "never fail here"); /* get 1st byte */
98 
99 		if (ic1 == ESC) { /* Escape */
100 			NGET(ic2, "ESC-2");
101 			switch (ic2) {
102 			case 0x24: /* $ */
103 				NGET(ic3, "ESC$-3");
104 				switch (ic3) {
105 				case 0x28: /* $( */
106 					NGET(ic4, "ESC$(-4");
107 					switch (ic4) {
108 					case 0x4f: /* 24-28-4F ESC$(O */
109 						st->_st_cset = CS_1;
110 						break;
111 					case 0x50: /* 24-28-50 ESC$(P */
112 						st->_st_cset = CS_3;
113 						break;
114 					case 0x51: /* 24-28-51 ESC$(Q */
115 						st->_st_cset = CS_1;
116 						break;
117 					default:
118 						RETERROR(EILSEQ,
119 							"Unknown ESC$(?");
120 					}
121 					break;
122 				case 0x42: /* 24-42 ESC$B */
123 					st->_st_cset = CS_1;
124 					break;
125 				default:
126 					RETERROR(EILSEQ, "Unknown ESC$?");
127 				}
128 				break;
129 			case 0x28: /* ( */
130 				NGET(ic3, "ESC(-3");
131 				switch (ic3) {
132 				case 0x42: /* 28-42 ESC(B */
133 					st->_st_cset = CS_0;
134 					break;
135 				default:
136 					RETERROR(EILSEQ, "Unknown ESC(?");
137 				}
138 				break;
139 			default:
140 				RETERROR(EILSEQ, "Unknown ESC?");
141 			}
142 		} else if (st->_st_cset == CS_0) { /* IRV */
143 			if ((ic1 == 0x0e) || (ic1 == 0x0f) || (ic1 > 0x7f)) {
144 				RETERROR(EILSEQ, "IRV-1")
145 			}
146 			u32 = (unsigned int)_jfp_tbl_jisx0201roman_to_ucs2[ic1];
147 			PUTU(u32, "IRV");
148 		} else if (st->_st_cset == CS_1) { /* Plane 1 */
149 			if ((ic1 < 0x21) || (ic1 > 0x7e)) {
150 				RETERROR(EILSEQ, "PLANE1-1")
151 			}
152 			NGET(ic2, "PLANE1-2");
153 			if ((ic2 < 0x21) || (ic2 > 0x7e)) {
154 				RETERROR(EILSEQ, "PLANE1-2")
155 			}
156 			e16 = ((ic1 << 8) | ic2) | 0x8080;
157 			u32 = (unsigned int)_jfp_tbl_jisx0208_to_ucs2[
158 				(ic1 - 0x21) * 94 + (ic2 - 0x21)];
159 			if (IFHISUR(u32)) {
160 				u32 = _jfp_lookup_x0213_nonbmp(e16, u32);
161 				PUTU(u32, "PLANE1->NONBMP");
162 			} else if (u32 == 0xffff) {
163 				/* need to compose */
164 				unsigned int	u32_2;
165 				u32 = _jfp_lookup_x0213_compose(e16, &u32_2);
166 				PUTU(u32, "PLANE1->CP1");
167 				PUTU(u32_2, "PLANE1->CP2");
168 			} else {
169 				PUTU(u32, "PLANE1->BMP");
170 			}
171 		} else if (st->_st_cset == CS_3) { /* Plane 2 */
172 			if ((ic1 < 0x21) || (ic1 > 0x7e)) {
173 				RETERROR(EILSEQ, "PLANE2-1")
174 			}
175 			NGET(ic2, "PLANE2-2");
176 			if ((ic2 < 0x21) || (ic2 > 0x7e)) {
177 				RETERROR(EILSEQ, "PLANE2-2")
178 			}
179 			e16 = ((ic1 << 8) | ic2) | 0x8000;
180 			u32 = (unsigned int)_jfp_tbl_jisx0213p2_to_ucs2[
181 				(ic1 - 0x21) * 94 + (ic2 - 0x21)];
182 			if (IFHISUR(u32)) {
183 				u32 = _jfp_lookup_x0213_nonbmp(e16, u32);
184 				PUTU(u32, "PLANE2->NONBMP");
185 			} else {
186 				PUTU(u32, "PLANE2->BMP");
187 			}
188 		}
189 
190 		/*
191 		 * One character successfully converted so update
192 		 * values outside of this function's stack.
193 		 */
194 		*inbuf = (char *)ip;
195 		*inbytesleft = ileft;
196 		*outbuf = op;
197 		*outbytesleft = oleft;
198 	}
199 
200 ret:
201 	DEBUGPRINTERROR
202 
203 	/*
204 	 * Return value for successful return is not defined by XPG
205 	 * so return same as *inbytesleft as existing codes do.
206 	 */
207 	return ((rv == (size_t)-1) ? rv : *inbytesleft);
208 }
209