1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright(c) 2001 Sun Microsystems, Inc.
23 * All rights reserved.
24 */
25
26 #include <stdio.h>
27 #include <errno.h>
28 #include <stdlib.h>
29 #include <strings.h>
30 #include "iscii.h"
31
32 #define MSB 0x80 /* most significant bit */
33 #define ONEBYTE 0xff /* right most byte */
34
35 #define REPLACE_CHAR1 0xEF /* invalid conversion character */
36 #define REPLACE_CHAR2 0xBF
37 #define REPLACE_CHAR3 0xBD
38
39 #define UTF8_SET1B(b,v) \
40 (b[0]=(v&0x7f))
41
42 #define UTF8_SET2B(b,v) \
43 (b[0]=(0xc0|((v>>6)&0x1f))); \
44 (b[1]=(0x80|((v&0x3f))))
45
46 #define UTF8_SET3B(b,v) \
47 (b[0]=(0xe0|((v>>12)&0xf))); \
48 (b[1]=(0x80|((v>>6)&0x3f))); \
49 (b[2]=(0x80|((v&0x3f))))
50
51 typedef struct _icv_state {
52 char keepc[3]; /* keepc[0] is attr, keepc[1] and keepc[2] are lookup-ed */
53 short pState; /* Previous State */
54 int _errno;
55 } _iconv_st;
56
57 enum _CSTATE { S_BASIC, S_ATR, S_EXT, S_NONE };
58
59 #define have_nukta(isc_type) ( nukta_type[isc_type] != NULL )
60 #define have_EXT(isc_type) ( EXT_type[isc_type] != NULL )
61 #define FIRST_CHAR 0xA0
62
63 static int copy_to_outbuf(ucs_t uniid, char *buf, size_t buflen);
64
65 static ucs_t
get_nukta(uchar iscii,int type)66 get_nukta(uchar iscii, int type)
67 {
68 int indx = iscii - FIRST_CHAR;
69 int *iscii_nukta = nukta_type[type];
70
71 return ((indx >= 0) ? iscii_nukta[indx] : 0 );
72 }
73
74 static ucs_t
get_EXT(uchar iscii,int type)75 get_EXT(uchar iscii, int type)
76 {
77 int indx = iscii - FIRST_CHAR;
78 int *iscii_EXT = EXT_type[type];
79
80 return ((indx >= 0) ? iscii_EXT[indx] : 0 );
81 }
82
83 static ucs_t
traverse_table(Entry * entry,int num,uchar iscii)84 traverse_table(Entry *entry, int num, uchar iscii)
85 {
86 int i=0;
87 ucs_t retucs=0;
88
89 for ( ; i < num; ++i ) {
90 Entry en = entry[i];
91
92 if ( iscii < en.iscii ) break;
93 if ( iscii >= en.iscii && iscii < en.iscii + en.count ) {
94 retucs = en.ucs + ( iscii - en.iscii );
95 break;
96 }
97 }
98
99 return retucs;
100 }
101
102 /*
103 * the copy_to_outbuf has to be called before the st->keepc needs to changed.
104 * if E2BIG error, keep st->keepc. Will flush it at the beginning of next
105 * _icv_iconv() invocation
106 */
107 int
iscii_to_utf8(_iconv_st * st,char * buf,size_t buflen)108 iscii_to_utf8(_iconv_st *st, char *buf, size_t buflen)
109 {
110 #define DEV_ATR 0x42
111 ucs_t uniid;
112 int nBytes=0;
113 ISCII isc_type = isc_TYPE[st->keepc[0] - DEV_ATR];
114 Entries en = iscii_table[isc_type];
115 /* unsigned int keepc0 = (unsigned int) (st->keepc[0] & ONEBYTE); */
116 unsigned int keepc1 = (unsigned int) (st->keepc[1] & ONEBYTE);
117 unsigned int keepc2 = (unsigned int) (st->keepc[2] & ONEBYTE);
118
119 if (keepc1 == 0xFF) { /* FFFD */
120 if ( buflen < 3 ) {
121 errno = E2BIG;
122 return 0;
123 }
124
125 *buf = (char)REPLACE_CHAR1;
126 *(buf+1) = (char)REPLACE_CHAR2;
127 *(buf+2) = (char)REPLACE_CHAR3;
128 return (3);
129 }
130
131 if (keepc2 == 0) { /* Flush Single Character */
132
133 if (keepc1 & MSB) { /* ISCII - Non-Ascii Codepoints */
134 uniid = traverse_table(en.entry, en.items, keepc1);
135 } else /* ASCII */
136 uniid = keepc1;
137
138 if ( (nBytes = copy_to_outbuf(uniid, buf, buflen)) == 0) goto E2big;
139 st->keepc[1] = 0;
140
141 } else {
142 /* keepc[1] and keepc[2] != 0 */
143 if (keepc1 & MSB) {
144
145 switch (keepc1)
146 {
147 case ISC_ext:
148
149 if ( have_EXT(isc_type) && is_valid_ext_code(keepc2) )
150 { /* EXT only supported in Devanagari script */
151
152 uniid = get_EXT(keepc2, isc_type);
153 if ((nBytes = copy_to_outbuf(uniid, buf, buflen)) == 0) goto E2big;
154 }
155 else
156 errno = EILSEQ;
157
158 st->keepc[1] = st->keepc[2] = 0;
159 break;
160 case ISC_halant:
161 /* test whether there has enough space to hold the converted bytes */
162 if ((keepc2 == ISC_halant || keepc2 == ISC_nukta) && buflen < 6 )
163 goto E2big;
164
165 uniid = traverse_table(en.entry, en.items, keepc1);
166 if ((nBytes = copy_to_outbuf(uniid, buf, buflen)) == 0) goto E2big;
167 st->keepc[1] = st->keepc[2];
168
169 if ( keepc2 == ISC_halant || keepc2 == ISC_nukta )
170 {
171 int nbytes_2 = 0;
172 if (keepc2 == ISC_halant) uniid = UNI_ZWNJ; /* explicit Halant */
173 if (keepc2 == ISC_nukta) uniid = UNI_ZWJ; /* soft Halant */
174
175 if ((nbytes_2 = copy_to_outbuf(uniid, buf+nBytes, buflen)) == 0) goto E2big;
176 st->keepc[1] = st->keepc[2] = 0;
177
178 nBytes += nbytes_2;
179 }
180
181 break;
182 case ISC_danda:
183 if ( isc_type == DEV && keepc2 == ISC_danda )
184 { /* only in Devanagari script, it works */
185 uniid = UNI_DOUBLE_DANDA;
186 if ((nBytes = copy_to_outbuf(uniid, buf, buflen)) == 0) goto E2big;
187 st->keepc[1] = st->keepc[2] = 0;
188
189 break;
190 }
191
192 /* fall into default case, convert the DANDA if it isn't DOUBLE_DANDA */
193 /* FALLTHRU */
194 default:
195
196 uniid = traverse_table(en.entry, en.items, keepc1);
197
198 if ( have_nukta(isc_type) && keepc2 == ISC_nukta) {
199 /* then try to test whether it is Nukta Cases */
200 int ucs;
201
202 if (( ucs = get_nukta(keepc1, isc_type)) != 0 ) {
203
204 uniid = ucs;
205
206 if ( (nBytes = copy_to_outbuf(uniid, buf, buflen)) == 0) goto E2big;
207 st->keepc[1] = st->keepc[2] = 0;
208 } else {
209 if ( (nBytes = copy_to_outbuf(uniid, buf, buflen)) == 0) goto E2big;
210 st->keepc[1] = st->keepc[2];
211 }
212 } else {
213 if ( (nBytes = copy_to_outbuf(uniid, buf, buflen)) == 0) goto E2big;
214 st->keepc[1] = st->keepc[2];
215 }
216 break;
217 } /* end of switch */
218 } else { /* ASCII */
219 uniid = keepc1;
220 if ( (nBytes = copy_to_outbuf(uniid, buf, buflen)) == 0) goto E2big;
221 st->keepc[1] = st->keepc[2];
222 }
223 st->keepc[2] = 0;
224 }
225
226 E2big:
227 return nBytes;
228 }
229
230 static int
copy_to_outbuf(ucs_t uniid,char * buf,size_t buflen)231 copy_to_outbuf(ucs_t uniid, char *buf, size_t buflen)
232 {
233 if (uniid > 0) {
234 if (uniid <= 0x7f) {
235 if (buflen < 1) {
236 errno = E2BIG;
237 return(0);
238 }
239 UTF8_SET1B(buf, uniid);
240 return (1);
241 }
242
243 if (uniid >= 0x80 && uniid <= 0x7ff) {
244 if (buflen < 2) {
245 errno = E2BIG;
246 return(0);
247 }
248 UTF8_SET2B(buf, uniid);
249 return (2);
250 }
251
252 if (uniid >= 0x800 && uniid <= 0xffff) {
253 if (buflen < 3) {
254 errno = E2BIG;
255 return(0);
256 }
257 UTF8_SET3B(buf, uniid);
258 return (3);
259 }
260 } else { /* Replacement Character */
261 if ( buflen < 3 ) {
262 errno = E2BIG;
263 return 0;
264 }
265
266 *buf = (char)REPLACE_CHAR1;
267 *(buf+1) = (char)REPLACE_CHAR2;
268 *(buf+2) = (char)REPLACE_CHAR3;
269 return (3);
270 }
271
272 /* This code shouldn't be reached */
273 return (0);
274 }
275
276 /*
277 * Open; called from iconv_open()
278 */
279 void *
_icv_open()280 _icv_open()
281 {
282 _iconv_st *st;
283
284 if ((st = (_iconv_st*)malloc(sizeof(_iconv_st))) == NULL) {
285 errno = ENOMEM;
286 return ((void*)-1);
287 }
288
289 bzero(st, sizeof(_iconv_st));
290 st->keepc[0] = DEV_ATR;
291 st->pState = S_BASIC;
292
293 return ((void*)st);
294 }
295
296 /*
297 * Close; called from iconv_close()
298 */
299 void
_icv_close(_iconv_st * st)300 _icv_close(_iconv_st *st)
301 {
302 if (!st)
303 errno = EBADF;
304 else
305 free(st);
306 }
307
308 /*
309 * Conversion routine; called from iconv()
310 */
311 size_t
_icv_iconv(_iconv_st * st,char ** inbuf,size_t * inbytesleft,char ** outbuf,size_t * outbytesleft)312 _icv_iconv(_iconv_st *st, char **inbuf, size_t *inbytesleft,
313 char **outbuf, size_t *outbytesleft)
314 {
315 int n;
316 short curState;
317
318 if (st == NULL) {
319 errno = EBADF;
320 return ((size_t) -1);
321 }
322
323 if (inbuf == NULL || *inbuf == NULL) { /* Reset request */
324 st->keepc[0] = DEV_ATR;
325 st->pState = S_BASIC;
326 st->_errno = 0;
327 return ((size_t)0);
328 }
329
330 /* flush if possible */
331 if ( st->_errno == E2BIG ) {
332 n = iscii_to_utf8(st, *outbuf, *outbytesleft);
333 (*outbuf) += n;
334 (*outbytesleft) -= n;
335 }
336
337 st->_errno = errno = 0; /* reset internal and external errno */
338
339 /* a state machine for interpreting ISCII code */
340 while (*inbytesleft > 0 && *outbytesleft > 0) {
341 unsigned int curChar = (unsigned int)(**inbuf & ONEBYTE);
342 unsigned int prevChar = (unsigned int)(st->keepc[1] & ONEBYTE);
343
344 if (curChar == ISC_ext)
345 curState = S_EXT;
346 else if (curChar == ISC_atr)
347 curState = S_ATR;
348 else
349 curState = S_BASIC;
350
351 switch (curState) {
352 case S_BASIC:
353 if (prevChar == 0)
354 st->keepc[1] = curChar;
355 else
356 st->keepc[2] = curChar;
357
358 if (st->pState == S_ATR) {
359 /* clear the keepc[1], which is part of attribute */
360 st->keepc[1] = 0;
361 /* change the attribute for Indian Script Fonts */
362 if ((curChar >= 0x42) && (curChar <= 0x4b) && curChar != 0x46) {
363 st->keepc[0] = curChar;
364 }
365 /* other attributes such as display attributes would be ignored */
366 } else { /* Handle Cases and Flush */
367
368 if ((curChar > 0 && curChar <= 0x7f) || prevChar != 0) {
369 n=iscii_to_utf8(st, *outbuf, *outbytesleft);
370 if (n > 0) {
371 (*outbuf) += n;
372 (*outbytesleft) -= n;
373 } else /* don't return immediately, need advance the *inbuf */
374 st->_errno = errno;
375 }
376 }
377 break;
378 case S_ATR:
379 case S_EXT: /* Do nothing */
380 if (st->pState == S_BASIC) { /* Flush */
381 if ( st->keepc[1] == 0 )
382 {
383 if (curState == S_EXT) st->keepc[1] = ISC_ext;
384 break;
385 }
386 n = iscii_to_utf8(st, *outbuf, *outbytesleft);
387 if (n > 0) {
388 (*outbuf) += n;
389 (*outbytesleft) -= n;
390 } else /* don't return immediately */
391 st->_errno = errno;
392
393 if (curState == S_EXT) st->keepc[1] = ISC_ext;
394 } else {
395 errno = EILSEQ;
396 return (size_t)-1;
397 }
398
399 break;
400 default: /* should never come here */
401 st->_errno = errno = EILSEQ;
402 st->pState = S_BASIC; /* reset state */
403 break;
404 }
405
406 st->pState = curState;
407
408 (*inbuf)++;
409 (*inbytesleft)--;
410
411 if (errno)
412 return(size_t)-1;
413 }
414
415 if (*inbytesleft > 0 && *outbytesleft == 0) {
416 /* in this case, the st->_errno is zero */
417 errno = E2BIG;
418 return(size_t)-1;
419 }
420
421 return (size_t)(*inbytesleft);
422 }
423