xref: /illumos-gate/usr/src/lib/iconv_modules/ja/common/UTF-8_TO_ISO-2022-JP.c (revision 16d8656330ae5622ec32e5007f62145ebafdc50f)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 1997-2004 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 
27 #include <stdio.h>
28 #include <stdlib.h>
29 #include <errno.h>
30 #include <euc.h>
31 #include "japanese.h"
32 #include "jfp_iconv_unicode.h"
33 
34 #ifdef RFC1468_MODE
35 #define	JFP_U2E_ICONV_RFC1468
36 #else
37 #define	JFP_U2E_ICONV
38 #endif
39 #include "jfp_ucs2_to_euc16.h"
40 
41 #define	DEF_SINGLE	'?'
42 
43 /*
44  * struct _cv_state; to keep status
45  */
46 struct _icv_state {
47 	int	_st_cset;
48 };
49 
50 void *
_icv_open(void)51 _icv_open(void)
52 {
53 	struct _icv_state *st;
54 
55 	if ((st = (struct _icv_state *)
56 		malloc(sizeof (struct _icv_state))) == NULL)
57 		return ((void *)-1);
58 
59 	st->_st_cset = CS_0;
60 
61 	return (st);
62 }
63 
64 void
_icv_close(void * cd)65 _icv_close(void *cd)
66 {
67 	if (cd == NULL) {
68 		errno = EBADF;
69 	} else {
70 		free(cd);
71 	}
72 	return;
73 }
74 
75 size_t
_icv_iconv(void * cd,char ** inbuf,size_t * inbytesleft,char ** outbuf,size_t * outbytesleft)76 _icv_iconv(void *cd, char **inbuf, size_t *inbytesleft,
77 				char **outbuf, size_t *outbytesleft)
78 {
79 	unsigned char	ic;
80 	size_t		rv = (size_t)0;
81 	unsigned int	ucs4;
82 	unsigned short	euc16;
83 
84 	struct _icv_state	*st = (struct _icv_state *)cd;
85 	int			cset;
86 
87 	unsigned char	*ip;
88         size_t		ileft;
89 	char		*op;
90         size_t		oleft;
91 
92 	/*
93 	 * If inbuf and/or *inbuf are NULL, reset conversion descriptor
94 	 * and put escape sequence if needed.
95 	 */
96 	if ((inbuf == NULL) || (*inbuf == NULL)) {
97 		if (st->_st_cset != CS_0) {
98 			if ((outbuf != NULL) && (*outbuf != NULL)
99 					&& (outbytesleft != NULL)) {
100 				op = (char *)*outbuf;
101 				oleft = *outbytesleft;
102 				NPUT(ESC, "RESET-SEQ-ESC");
103 				NPUT(SBTOG0_1, "RESET-SEQ-1");
104 				NPUT(F_X0201_RM, "RESET-SEQ-2");
105 				*outbuf = (char *)op;
106 				*outbytesleft = oleft;
107 			}
108 			st->_st_cset = CS_0;
109 		}
110 		return ((size_t)0);
111 	}
112 
113 	cset = st->_st_cset;
114 
115 	ip = (unsigned char *)*inbuf;
116 	ileft = *inbytesleft;
117 	op = *outbuf;
118 	oleft = *outbytesleft;
119 
120 	while (ileft != 0) {
121 		if (utf8_ucs(&ucs4, &ip, &ileft) == (size_t)-1) {
122 			/* errno has been set in utf8_ucs() */
123 			rv = (size_t)-1;
124 			goto ret;
125 		}
126 
127 		if (ucs4 > 0xffff) {
128 			/* non-BMP */
129 			if (cset != CS_0) {
130 				NPUT(ESC, "CS0-SEQ-ESC");
131 				NPUT(SBTOG0_1, "CS0-SEQ-1");
132 				NPUT(F_X0201_RM, "CS0-SEQ-2");
133 				cset = CS_0;
134 			}
135 			ic = (unsigned char)DEF_SINGLE;
136 			NPUT(ic, "DEF for non-BMP(replaced)");
137 		} else {
138 			euc16 = _jfp_ucs2_to_euc16((unsigned short)ucs4);
139 
140 			switch (euc16 & 0x8080) {
141 			case 0x0000:	/* CS0 */
142 				if (cset != CS_0) {
143 					NPUT(ESC, "CS0-SEQ-ESC");
144 					NPUT(SBTOG0_1, "CS0-SEQ-1");
145 					NPUT(F_X0201_RM, "CS0-SEQ-2");
146 					cset = CS_0;
147 				}
148 				ic = (unsigned char)euc16;
149 				NPUT(ic, "CS0-1");
150 				break;
151 			case 0x8080:	/* CS1 */
152 				if (cset != CS_1) {
153 					NPUT(ESC, "CS1-SEQ-ESC");
154 					NPUT(MBTOG0_1, "CS1-SEQ-1");
155 					NPUT(F_X0208_83_90, "CS1-SEQ-2");
156 					cset = CS_1;
157 				}
158 				ic = (unsigned char)((euc16 >> 8) & CMASK);
159 				NPUT(ic, "CS1-1");
160 				ic = (unsigned char)(euc16 & CMASK);
161 				NPUT(ic, "CS1-2");
162 				break;
163 			case 0x0080:	/* CS2 */
164 #ifdef  RFC1468_MODE	/* Substitute JIS X 0208 for JIS X 0201 Katakana */
165 				if (cset != CS_1) {
166 					NPUT(ESC, "CS2-SEQ-ESC(fullsized)");
167 					NPUT(MBTOG0_1, "CS2-SEQ-1(fullsized)");
168 					NPUT(F_X0208_83_90,
169 						"CS2-SEQ-2(fullsized)");
170 					cset = CS_1;
171 				}
172 				euc16 = halfkana2zenkakuj[euc16 - 0xa1];
173 				ic = (unsigned char)((euc16 >> 8) & CMASK);
174 				NPUT(ic, "CS2-1(fullsized)");
175 				ic = (unsigned char)(euc16 & CMASK);
176 				NPUT(ic, "CS2-2(fullsized)");
177 #else   /* ISO-2022-JP.UIOSF */
178 				if (cset != CS_2) {
179 					NPUT(ESC, "CS2-SEQ-ESC");
180 					NPUT(SBTOG0_1, "CS2-SEQ-1");
181 					NPUT(F_X0201_KN, "CS2-SEQ-2");
182 					cset = CS_2;
183 				}
184 				ic = (unsigned char)euc16;
185 				NPUT(ic & CMASK, "CS2-1");
186 #endif  /* RFC1468_MODE */
187 				break;
188 			case 0x8000:	/* CS3 */
189 				if (cset != CS_3) {
190 					NPUT(ESC, "CS3-SEQ-ESC");
191 					NPUT(MBTOG0_1, "CS3-SEQ-1");
192 					NPUT(MBTOG0_2, "CS3-SEQ-2");
193 					NPUT(F_X0212_90, "CS3-SEQ-3");
194 					cset = CS_3;
195 				}
196 				ic = (unsigned char)((euc16 >> 8) & CMASK);
197 				NPUT(ic, "CS3-1");
198 				ic = (unsigned char)(euc16 & CMASK);
199 				NPUT(ic, "CS3-2");
200 				break;
201 			}
202 		}
203 
204 		/*
205 		 * One character successfully converted so update
206 		 * values outside of this function's stack.
207 		 */
208 		*inbuf = (char *)ip;
209 		*inbytesleft = ileft;
210 		*outbuf = op;
211 		*outbytesleft = oleft;
212 
213 		st->_st_cset = cset;
214 	}
215 
216 ret:
217 
218 #if	defined(DEBUG)
219 	if (rv == (size_t)-1) {
220 		fprintf(stderr, "DEBUG: errno=%d: %s\n", errno, debugmsg);
221 	}
222 #endif	/* DEBUG */
223 
224 	/*
225 	 * Return value for successful return is not defined by XPG
226 	 * so return same as *inbytesleft as existing codes do.
227 	 */
228 	return ((rv == (size_t)-1) ? rv : *inbytesleft);
229 }
230