xref: /illumos-gate/usr/src/lib/iconv_modules/utf-8/common/utf_ebcdic_to_utf8.c (revision 16d8656330ae5622ec32e5007f62145ebafdc50f)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright (c) 1999 by Sun Microsystems, Inc.
23  * All rights reserved.
24  */
25 
26 
27 #include <stdlib.h>
28 #include <errno.h>
29 #include <sys/types.h>
30 #include <sys/isa_defs.h>
31 #include "utf_ebcdic_to_utf8.h"
32 
33 void *
_icv_open()34 _icv_open()
35 {
36 	return((void *)MAGIC_NUMBER);
37 }
38 
39 
40 void
_icv_close(int * cd)41 _icv_close(int *cd)
42 {
43 	if (! cd || cd != (int *)MAGIC_NUMBER)
44 		errno = EBADF;
45 }
46 
47 
48 #define	OUTBUF_SIZE_CHECK(sz) \
49 	if ((obtail - ob) < (sz)) { \
50 		ib = ib_org;\
51 		errno = E2BIG; \
52 		ret_val = (size_t)-1; \
53 		break; \
54 	}
55 
56 #define UTFEBICDIC_I8(utfe) utf_ebcdic_to_i8[(utfe)]
57 
58 size_t
_icv_iconv(int * cd,char ** inbuf,size_t * inbufleft,char ** outbuf,size_t * outbufleft)59 _icv_iconv(int *cd, char **inbuf, size_t *inbufleft, char **outbuf,
60                 size_t *outbufleft)
61 {
62 	size_t ret_val = 0;
63 	uchar_t *ib;
64 	uchar_t *ob;
65 	uchar_t *ibtail;
66 	uchar_t *obtail;
67 
68 	if (cd != (int *)MAGIC_NUMBER) {
69 		errno = EBADF;
70 		return((size_t)-1);
71 	}
72 
73 	if (!inbuf || !(*inbuf))
74 		return((size_t)0);
75 
76 	ib = (uchar_t *)*inbuf;
77 	ob = (uchar_t *)*outbuf;
78 	ibtail = ib + *inbufleft;
79 	obtail = ob + *outbufleft;
80 
81 	while (ib < ibtail) {
82 		uchar_t *ib_org;
83 		uint_t u4;
84 		signed char sz; /* must be signed for loop condition */
85 
86 		sz = number_of_bytes_in_utf_ebcidc[*ib];
87 		if ((sz > UTF_EBCDIC_LEAD_OCTET_MAX) ||
88 			(sz < UTF_EBCDIC_LEAD_OCTET_MIN)) {
89 			errno = EILSEQ;
90 			ret_val = (size_t)-1;
91 			break;
92 		}
93 		/* sz == 0 means control character. and it need 1 byte */
94 		if ((ibtail - ib) < ((sz == 0)? 1: sz)) {
95 			errno = EINVAL;
96 			ret_val = (size_t)-1;
97 			break;
98 		}
99 
100 		ib_org = ib;
101 
102 		u4 = (UTFEBICDIC_I8(*ib++) & utf_ebcdic_masks_tbl[sz]);
103 
104 		/* correct size */
105 		if (sz == 0){
106 			sz = 1;
107 		}
108 		for (; sz > 1; sz--) {
109 			if (number_of_bytes_in_utf_ebcidc[*ib] !=
110 				UTF_EBCDIC_TRAILING_OCTET) {
111 				ib = ib_org;
112 				errno = EILSEQ;
113 				ret_val = (size_t)-1;
114 				goto illegal_char_err;
115 			}
116 			u4 = ((u4 << UTF_EBCDIC_BIT_SHIFT) |
117 				(((uint_t)(UTFEBICDIC_I8(*ib)))
118 				& UTF_EBCDIC_BIT_MASK));
119 			ib++;
120 		}
121 
122 		if (u4 <= 0x7f) {
123 			OUTBUF_SIZE_CHECK(1);
124 			*ob++ = (uchar_t)u4;
125 		} else if (u4 <= 0x7ff) {
126 			OUTBUF_SIZE_CHECK(2);
127 			*ob++ = (uchar_t)(0xc0 | ((u4 & 0x07c0) >> 6));
128 			*ob++ = (uchar_t)(0x80 |  (u4 & 0x003f));
129 		} else if (u4 <= 0xd7ff) {
130 			OUTBUF_SIZE_CHECK(3);
131 			*ob++ = (uchar_t)(0xe0 | ((u4 & 0x0f000) >> 12));
132 			*ob++ = (uchar_t)(0x80 | ((u4 & 0x00fc0) >> 6));
133 			*ob++ = (uchar_t)(0x80 |  (u4 & 0x0003f));
134 		} else if (u4 <= 0x00dfff) {
135 			/* S zone */
136 			errno = EILSEQ;
137 			ret_val = (size_t)-1;
138 			break;
139 		} else if (u4 <= 0x00fffd) {
140 			OUTBUF_SIZE_CHECK(3);
141 			*ob++ = (uchar_t)(0xe0 | ((u4 & 0x0f000) >> 12));
142 			*ob++ = (uchar_t)(0x80 | ((u4 & 0x00fc0) >> 6));
143 			*ob++ = (uchar_t)(0x80 |  (u4 & 0x0003f));
144 		} else if (u4 <= 0x00ffff) {
145 			errno = EILSEQ;
146 			ret_val = (size_t)-1;
147 			break;
148 		} else if (u4 <= 0x1fffff) {
149 			OUTBUF_SIZE_CHECK(4);
150 			*ob++ = (uchar_t)(0xf0 | ((u4 & 0x01c0000) >> 18));
151 			*ob++ = (uchar_t)(0x80 | ((u4 & 0x003f000) >> 12));
152 			*ob++ = (uchar_t)(0x80 | ((u4 & 0x0000fc0) >> 6));
153 			*ob++ = (uchar_t)(0x80 |  (u4 & 0x000003f));
154 		} else if (u4 <= 0x3ffffff) {
155 			OUTBUF_SIZE_CHECK(5);
156 			*ob++ = (uchar_t)(0xf8 | ((u4 & 0x03000000) >> 24));
157 			*ob++ = (uchar_t)(0x80 | ((u4 & 0x00fc0000) >> 18));
158 			*ob++ = (uchar_t)(0x80 | ((u4 & 0x0003f000) >> 12));
159 			*ob++ = (uchar_t)(0x80 | ((u4 & 0x00000fc0) >> 6));
160 			*ob++ = (uchar_t)(0x80 |  (u4 & 0x0000003f));
161 		} else if (u4 <= 0x7fffffff) {
162 			OUTBUF_SIZE_CHECK(6);
163 			*ob++ = (uchar_t)(0xfc | ((u4 & 0x40000000) >> 30));
164 			*ob++ = (uchar_t)(0x80 | ((u4 & 0x3f000000) >> 24));
165 			*ob++ = (uchar_t)(0x80 | ((u4 & 0x00fc0000) >> 18));
166 			*ob++ = (uchar_t)(0x80 | ((u4 & 0x0003f000) >> 12));
167 			*ob++ = (uchar_t)(0x80 | ((u4 & 0x00000fc0) >> 6));
168 			*ob++ = (uchar_t)(0x80 |  (u4 & 0x0000003f));
169 
170 		} else {
171 			ib = ib_org;
172 			errno = EILSEQ;
173 			ret_val = (size_t)-1;
174 			break;
175 		}
176 	}
177 
178 illegal_char_err:
179 	*inbuf = (char *)ib;
180 	*inbufleft = ibtail - ib;
181 	*outbuf = (char *)ob;
182 	*outbufleft = obtail - ob;
183 
184 	return(ret_val);
185 }
186