xref: /illumos-gate/usr/src/lib/iconv_modules/ko/common/unihan_to_UCS_main.c (revision 16d8656330ae5622ec32e5007f62145ebafdc50f)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright (c) 1999 by Sun Microsystems, Inc.
23  */
24 
25 
26 #include <errno.h>
27 #include <widec.h>
28 #include <stdlib.h>
29 #include <sys/isa_defs.h>
30 #include <sys/types.h>
31 #include "common_def.h"
32 #include "common_defs.h"
33 #include "common_han.h"
34 #include "uhang_utf_api.h"
35 #include "euc_utf_api.h"
36 
37 typedef struct {
38   int         _magic;
39   boolean     _need_byte_swap;
40 } _icv_state_t;
41 
42 
43 extern hcode_type _unified_hangul_to_UCS2LE (hcode_type);
44 
45 void *
_icv_open()46 _icv_open()
47 {
48   _icv_state_t *h = (_icv_state_t *) malloc (sizeof (_icv_state_t));
49   if (!h){
50     errno = ENOMEM;
51     return((void *)-1);
52   }
53 
54   h->_magic = MAGIC_NUMBER;
55 #if defined(UCS_2BE)
56   h->_need_byte_swap =false;
57 #elif defined(UCS_2LE)
58   h->_need_byte_swap = true;
59 #endif
60 
61   return (void *)h;
62 }
63 
64 
65 void
_icv_close(_icv_state_t * cd)66 _icv_close (_icv_state_t *cd)
67 {
68   if (!cd || ((_icv_state_t *)cd)->_magic  != MAGIC_NUMBER)
69     errno = EBADF;
70 }
71 
72 
73 
74 size_t
_icv_iconv(_icv_state_t * cd,char ** inbuf,size_t * inbufleft,char ** outbuf,size_t * outbufleft)75 _icv_iconv (_icv_state_t *cd, char** inbuf, size_t* inbufleft,
76 	    char** outbuf, size_t* outbufleft)
77 {
78   size_t		ret_val = 0;
79   unsigned char*	ib;
80   unsigned char*	ob;
81   unsigned char*	ibtail;
82   unsigned char*	obtail;
83 
84   if (!cd || ((_icv_state_t *)cd)->_magic  != MAGIC_NUMBER)
85     {
86       errno = EBADF;
87       return((size_t)-1);
88     }
89 
90   if (!inbuf || !(*inbuf))
91     return((size_t)0);
92 
93   ib = (unsigned char*)*inbuf;
94   ob = (unsigned char*)*outbuf;
95   ibtail = ib + *inbufleft;
96   obtail = ob + *outbufleft;
97 
98   while (ib < ibtail)
99     {
100       if (*ib & 0x80)  /* Korean EUC doesn't have CS2 or CS3. */
101 	{
102 	  hcode_type unihan_code, ucs2_code;
103 	  int flag;
104 
105 	  flag = 0;
106 
107 	  if ((ibtail - ib) < 2)
108 	    {
109 	      errno = EINVAL;
110 	      ret_val = (size_t)-1;
111 	      break;
112 	    }
113 
114 
115 	  if(*ib<0xA1)
116 	    {
117 	      if((*(ib+1)>0x40 && *(ib+1)<0x5B) || (*(ib+1)>0x60 && *(ib+1)<0x7B) || (*(ib+1)>0x80 && *(ib+1)<0xFF))
118 		flag = 0;
119 	      else
120 		flag = 1;
121 
122 	    }
123 	  else
124 	    {
125 	      if(*ib<0xC7)
126 		{
127 		  if((*(ib+1)>0x40 && *(ib+1)<0x5B) || (*(ib+1)>0x60 && *(ib+1)<0x7B) || (*(ib+1)>0x80 && *(ib+1)<0xFF))
128 		    flag = 0;
129 		  else
130 		    flag = 1;
131 		}
132 	      else
133 		{
134 		  if(*(ib+1)>0xA0 && *(ib+1)<0xFF)
135 		    flag = 0;
136 		  else
137 		    flag = 1;
138 		}
139 
140 	    }
141 
142 	  if(flag)
143 	    {
144 	      errno = EILSEQ;
145 	      ret_val = (size_t)-1;
146 	      break;
147 	    }
148 
149 	  unihan_code.code = 0;
150 	  unihan_code.byte.byte3 = *ib;
151 	  unihan_code.byte.byte4 = *(ib + 1);
152 
153 	  ucs2_code = _unified_hangul_to_UCS2LE (unihan_code);
154 
155 	  if (ucs2_code.code != 0)
156 	    {
157 	      if ((obtail - ob) < 3)
158 		{
159 		  errno = E2BIG;
160 		  ret_val = (size_t)-1;
161 		  break;
162 		}
163 #if !defined(UCS_2LE) && !defined(UCS_2BE)
164 	      if (!cd->_bom_written){
165 		*ob++ = (uchar_t)0xff;
166 		*ob++ = (uchar_t)0xfe;
167 
168 		cd->_bom_written = true;
169 	      }
170 #endif    /* !defined(UCS_2LE) && !defined(UCS_2BE) */
171 	      if (cd->_need_byte_swap)
172 		{
173 		  *ob++ = ucs2_code.byte.byte4;
174 		  *ob++ = ucs2_code.byte.byte3;
175 		}
176 	      else
177 		{
178 		  *ob++ = ucs2_code.byte.byte3;
179 		  *ob++ = ucs2_code.byte.byte4;
180 		}
181 	    }
182 	  else  /* FAILED - this means input char doesn't belong to
183 		 *	  input codeset. */
184 	    {
185 	      errno = EILSEQ;
186 	      ret_val = (size_t)-1;
187 	      break;
188 	    }
189 	  ib += 2;
190 
191 	}
192       else  /* CS0 */
193 	{
194 #if !defined(UCS_2LE) && !defined(UCS_2BE)
195 	  if (!cd->_bom_written)
196 	    {
197 	    if ((obtail - ob) < 3)
198 	      {
199 		errno = E2BIG;
200 		ret_val = (size_t) -1;
201 		break;
202 	      }
203 	    *ob++ = (uchar_t)0xff;
204 	    *ob++ = (uchar_t)0xfe;
205 	    cd->_bom_written = true;
206 	    }
207 	  else
208 #endif    /* !defined(UCS_2LE) && !defined(UCS_2BE) */
209 	    {
210 	      if ((obtail - ob) < 1)
211 	      {
212 		errno = E2BIG;
213 		ret_val = (size_t) -1;
214 		break;
215 	      }
216 	    }
217 
218 	  if (cd->_need_byte_swap)
219 	    {
220 	      *ob++ = *ib++;
221 	      *ob++ = 0x00;
222 	    }
223 	  else
224 	    {
225 	      *ob++ = 0x00;
226 	      *ob++ = *ib++;
227 	    }
228 	}
229     }
230 
231   *inbuf = (char*)ib;
232   *inbufleft = ibtail - ib;
233   *outbuf = (char*)ob;
234   *outbufleft = obtail - ob;
235 
236   return(ret_val);
237 }
238