xref: /titanic_51/usr/src/lib/iconv_modules/hi_IN/iscii91%UTF-8.c (revision 91e1e26ac6a73ce959289cf7d3d96c4baedbe0b8)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright(c) 2001 Sun Microsystems, Inc.
23  * All rights reserved.
24  */
25 
26 #include <stdio.h>
27 #include <errno.h>
28 #include <stdlib.h>
29 #include <strings.h>
30 #include "iscii.h"
31 
32 #define MSB        0x80    /* most significant bit */
33 #define ONEBYTE    0xff    /* right most byte */
34 
35 #define REPLACE_CHAR1  0xEF     /* invalid conversion character */
36 #define REPLACE_CHAR2  0xBF
37 #define REPLACE_CHAR3  0xBD
38 
39 #define UTF8_SET1B(b,v)      \
40     (b[0]=(v&0x7f))
41 
42 #define UTF8_SET2B(b,v)      \
43     (b[0]=(0xc0|((v>>6)&0x1f))); \
44     (b[1]=(0x80|((v&0x3f))))
45 
46 #define UTF8_SET3B(b,v)      \
47     (b[0]=(0xe0|((v>>12)&0xf))); \
48     (b[1]=(0x80|((v>>6)&0x3f))); \
49     (b[2]=(0x80|((v&0x3f))))
50 
51 typedef struct _icv_state {
52      char    keepc[3];    /* keepc[0] is attr, keepc[1] and keepc[2] are lookup-ed */
53      short   pState;      /* Previous State */
54      int    _errno;
55 } _iconv_st;
56 
57 enum _CSTATE { S_BASIC, S_ATR, S_EXT, S_NONE };
58 
59 #define have_nukta(isc_type) ( nukta_type[isc_type] != NULL )
60 #define have_EXT(isc_type) ( EXT_type[isc_type] != NULL )
61 #define FIRST_CHAR  0xA0
62 
63 static int copy_to_outbuf(ucs_t uniid, char *buf, size_t buflen);
64 
65 static ucs_t
66 get_nukta(uchar iscii, int type)
67 {
68     int indx = iscii - FIRST_CHAR;
69     int *iscii_nukta = nukta_type[type];
70 
71     return ((indx >= 0) ? iscii_nukta[indx] : 0 );
72 }
73 
74 static ucs_t
75 get_EXT(uchar iscii, int type)
76 {
77     int indx = iscii - FIRST_CHAR;
78     int *iscii_EXT = EXT_type[type];
79 
80     return ((indx >= 0) ? iscii_EXT[indx] : 0 );
81 }
82 
83 static ucs_t
84 traverse_table(Entry *entry, int num,  uchar iscii)
85 {
86     int i=0;
87     ucs_t retucs=0;
88 
89     for ( ; i < num; ++i ) {
90         Entry en = entry[i];
91 
92         if ( iscii < en.iscii ) break;
93         if ( iscii >= en.iscii && iscii < en.iscii + en.count ) {
94              retucs = en.ucs + ( iscii - en.iscii );
95              break;
96         }
97     }
98 
99     return retucs;
100 }
101 
102 /*
103  * the copy_to_outbuf has to be called before the st->keepc needs to changed.
104  * if E2BIG error, keep st->keepc. Will flush it at the beginning of next
105  * _icv_iconv() invocation
106  */
107 int
108 iscii_to_utf8(_iconv_st *st, char *buf, size_t buflen)
109 {
110 #define DEV_ATR 0x42
111     ucs_t uniid;
112     int   nBytes=0;
113     ISCII isc_type = isc_TYPE[st->keepc[0] - DEV_ATR];
114     Entries en = iscii_table[isc_type];
115     /* unsigned int  keepc0 = (unsigned int) (st->keepc[0] & ONEBYTE); */
116     unsigned int  keepc1 = (unsigned int) (st->keepc[1] & ONEBYTE);
117     unsigned int  keepc2 = (unsigned int) (st->keepc[2] & ONEBYTE);
118 
119     if (keepc1 == 0xFF) { /* FFFD */
120         if ( buflen < 3 ) {
121             errno = E2BIG;
122             return 0;
123         }
124 
125         *buf = (char)REPLACE_CHAR1;
126         *(buf+1) = (char)REPLACE_CHAR2;
127         *(buf+2) = (char)REPLACE_CHAR3;
128         return (3);
129     }
130 
131     if (keepc2 == 0) { /* Flush Single Character */
132 
133         if (keepc1 & MSB) {    /* ISCII - Non-Ascii Codepoints */
134             uniid = traverse_table(en.entry, en.items, keepc1);
135         } else  /* ASCII */
136             uniid = keepc1;
137 
138         if ( (nBytes = copy_to_outbuf(uniid, buf, buflen)) == 0) goto E2big;
139         st->keepc[1] = 0;
140 
141     } else {
142         /* keepc[1] and keepc[2] != 0 */
143         if (keepc1 & MSB) {
144 
145 	    switch (keepc1)
146 	     {
147 	      case ISC_ext:
148 
149 		if ( have_EXT(isc_type) && is_valid_ext_code(keepc2) )
150 		  {  /* EXT only supported in Devanagari script */
151 
152                      uniid = get_EXT(keepc2, isc_type);
153                      if ((nBytes = copy_to_outbuf(uniid, buf, buflen)) == 0) goto E2big;
154 		  }
155 		else
156 		     errno = EILSEQ;
157 
158 	        st->keepc[1] = st->keepc[2] = 0;
159 		break;
160 	      case ISC_halant:
161                 /* test whether there has enough space to hold the converted bytes */
162                 if ((keepc2 == ISC_halant || keepc2 == ISC_nukta) && buflen < 6 )
163                     goto E2big;
164 
165                 uniid = traverse_table(en.entry, en.items, keepc1);
166                 if ((nBytes = copy_to_outbuf(uniid, buf, buflen)) == 0) goto E2big;
167                 st->keepc[1] = st->keepc[2];
168 
169                 if ( keepc2 == ISC_halant || keepc2 == ISC_nukta )
170                   {
171                      int nbytes_2 = 0;
172                      if (keepc2 == ISC_halant) uniid = UNI_ZWNJ; /* explicit Halant */
173                      if (keepc2 == ISC_nukta) uniid = UNI_ZWJ; /* soft Halant */
174 
175                      if ((nbytes_2 = copy_to_outbuf(uniid, buf+nBytes, buflen)) == 0) goto E2big;
176                      st->keepc[1] = st->keepc[2] = 0;
177 
178                      nBytes += nbytes_2;
179                   }
180 
181                 break;
182 	      case ISC_danda:
183 		if ( isc_type == DEV && keepc2 == ISC_danda )
184 		  { /* only in Devanagari script, it works */
185 		     uniid = UNI_DOUBLE_DANDA;
186                      if ((nBytes = copy_to_outbuf(uniid, buf, buflen)) == 0) goto E2big;
187                      st->keepc[1] = st->keepc[2] = 0;
188 
189 		     break;
190 		  }
191 
192 		/* fall into default case, convert the DANDA if it isn't DOUBLE_DANDA */
193 		/* FALLTHRU */
194 	      default:
195 
196 		uniid = traverse_table(en.entry, en.items, keepc1);
197 
198                 if ( have_nukta(isc_type) &&  keepc2 == ISC_nukta) {
199 		    /* then try to test whether it is Nukta Cases */
200                     int    ucs;
201 
202                     if (( ucs = get_nukta(keepc1, isc_type)) != 0 ) {
203 
204                        uniid = ucs;
205 
206                        if ( (nBytes = copy_to_outbuf(uniid, buf, buflen)) == 0) goto E2big;
207                        st->keepc[1] = st->keepc[2] = 0;
208                     } else {
209                        if ( (nBytes = copy_to_outbuf(uniid, buf, buflen)) == 0) goto E2big;
210                        st->keepc[1] = st->keepc[2];
211                     }
212                 } else {
213                     if ( (nBytes = copy_to_outbuf(uniid, buf, buflen)) == 0) goto E2big;
214                     st->keepc[1] = st->keepc[2];
215                 }
216 		break;
217 	     } /* end of switch */
218         } else { /* ASCII */
219             uniid = keepc1;
220             if ( (nBytes = copy_to_outbuf(uniid, buf, buflen)) == 0) goto E2big;
221             st->keepc[1] = st->keepc[2];
222         }
223         st->keepc[2] = 0;
224     }
225 
226 E2big:
227     return nBytes;
228 }
229 
230 static int
231 copy_to_outbuf(ucs_t uniid, char *buf, size_t buflen)
232 {
233     if (uniid > 0) {
234         if (uniid <= 0x7f) {
235             if (buflen < 1) {
236                 errno = E2BIG;
237                 return(0);
238             }
239             UTF8_SET1B(buf, uniid);
240             return (1);
241         }
242 
243         if (uniid >= 0x80 && uniid <= 0x7ff) {
244             if (buflen < 2) {
245                 errno = E2BIG;
246                 return(0);
247             }
248             UTF8_SET2B(buf, uniid);
249             return (2);
250         }
251 
252         if (uniid >= 0x800 && uniid <= 0xffff) {
253             if (buflen < 3) {
254                 errno = E2BIG;
255                 return(0);
256             }
257             UTF8_SET3B(buf, uniid);
258             return (3);
259         }
260     } else { /* Replacement Character */
261         if ( buflen < 3 ) {
262             errno = E2BIG;
263             return 0;
264         }
265 
266         *buf = (char)REPLACE_CHAR1;
267         *(buf+1) = (char)REPLACE_CHAR2;
268         *(buf+2) = (char)REPLACE_CHAR3;
269         return (3);
270     }
271 
272     /* This code shouldn't be reached */
273     return (0);
274 }
275 
276 /*
277  * Open; called from iconv_open()
278  */
279 void *
280 _icv_open()
281 {
282     _iconv_st *st;
283 
284     if ((st = (_iconv_st*)malloc(sizeof(_iconv_st))) == NULL) {
285         errno = ENOMEM;
286         return ((void*)-1);
287     }
288 
289     bzero(st, sizeof(_iconv_st));
290     st->keepc[0] = DEV_ATR;
291     st->pState = S_BASIC;
292 
293     return ((void*)st);
294 }
295 
296 /*
297  * Close; called from iconv_close()
298  */
299 void
300 _icv_close(_iconv_st *st)
301 {
302     if (!st)
303         errno = EBADF;
304     else
305         free(st);
306 }
307 
308 /*
309  * Conversion routine; called from iconv()
310  */
311 size_t
312 _icv_iconv(_iconv_st *st, char **inbuf, size_t *inbytesleft,
313        char **outbuf, size_t *outbytesleft)
314 {
315     int   n;
316     short curState;
317 
318     if (st == NULL) {
319         errno = EBADF;
320         return ((size_t) -1);
321     }
322 
323     if (inbuf == NULL || *inbuf == NULL) { /* Reset request */
324         st->keepc[0] = DEV_ATR;
325         st->pState = S_BASIC;
326         st->_errno = 0;
327         return ((size_t)0);
328     }
329 
330     /* flush if possible */
331     if ( st->_errno == E2BIG ) {
332         n = iscii_to_utf8(st, *outbuf, *outbytesleft);
333         (*outbuf) += n;
334         (*outbytesleft) -= n;
335     }
336 
337     st->_errno = errno = 0; /* reset internal and external errno */
338 
339     /* a state machine for interpreting ISCII code */
340     while (*inbytesleft > 0 && *outbytesleft > 0) {
341         unsigned int curChar = (unsigned int)(**inbuf & ONEBYTE);
342         unsigned int prevChar = (unsigned int)(st->keepc[1] & ONEBYTE);
343 
344         if (curChar == ISC_ext)
345             curState = S_EXT;
346         else if (curChar == ISC_atr)
347             curState = S_ATR;
348         else
349             curState = S_BASIC;
350 
351         switch (curState) {
352         case S_BASIC:
353             if (prevChar == 0)
354                 st->keepc[1] = curChar;
355             else
356                 st->keepc[2] = curChar;
357 
358             if (st->pState == S_ATR) {
359                 /* clear the keepc[1], which is part of attribute */
360                 st->keepc[1] = 0;
361                 /* change the attribute for Indian Script Fonts */
362                 if ((curChar >= 0x42) && (curChar <= 0x4b) && curChar != 0x46) {
363                     st->keepc[0] = curChar;
364                 }
365                 /* other attributes such as display attributes would be ignored */
366             } else { /* Handle Cases and Flush */
367 
368                 if ((curChar > 0 && curChar <= 0x7f) || prevChar != 0) {
369                     n=iscii_to_utf8(st, *outbuf, *outbytesleft);
370                     if (n > 0) {
371                         (*outbuf) += n;
372                         (*outbytesleft) -= n;
373                     } else   /* don't return immediately, need advance the *inbuf */
374                          st->_errno = errno;
375                 }
376             }
377             break;
378         case S_ATR:
379         case S_EXT: /* Do nothing */
380             if (st->pState == S_BASIC) { /* Flush */
381                 if ( st->keepc[1] == 0 )
382                  {
383                    if (curState == S_EXT) st->keepc[1] = ISC_ext;
384                    break;
385                  }
386                 n = iscii_to_utf8(st, *outbuf, *outbytesleft);
387                 if (n > 0) {
388                     (*outbuf) += n;
389                     (*outbytesleft) -= n;
390                 } else /* don't return immediately */
391                     st->_errno = errno;
392 
393                 if (curState == S_EXT) st->keepc[1] = ISC_ext;
394             } else {
395                 errno = EILSEQ;
396                 return (size_t)-1;
397             }
398 
399             break;
400         default:  /* should never come here */
401             st->_errno = errno = EILSEQ;
402             st->pState = S_BASIC;    /* reset state */
403             break;
404         }
405 
406         st->pState = curState;
407 
408         (*inbuf)++;
409         (*inbytesleft)--;
410 
411         if (errno)
412             return(size_t)-1;
413     }
414 
415     if (*inbytesleft > 0 && *outbytesleft == 0) {
416         /* in this case, the st->_errno is zero */
417         errno = E2BIG;
418         return(size_t)-1;
419     }
420 
421     return (size_t)(*inbytesleft);
422 }
423