xref: /titanic_52/usr/src/lib/iconv_modules/ko/common/byte_to_comb.c (revision 91e1e26ac6a73ce959289cf7d3d96c4baedbe0b8)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /* Copyright 1995 by Sun Microsystems, Inc.
22  * All rights are reserved.
23  */
24 
25 #include <stdio.h>
26 #include <string.h>
27 #include "kdefs.h"
28 #include "ktable.h"
29 
30 int input_typ(char c);
31 
32 struct _cv_state {
33 	char temp_ibuf[5];
34 	int  ibuf_left;
35 	int  istart, iend;
36 	char temp_obuf[1];
37 	int  flush_obuf;
38 };
39 
40 KCHAR packtocomp(KCHAR comb2);
41 
42 #ifndef SUNVIEW
43 char vowel_mix(char c1,char c2);
44 #endif
45 
46 /*
47  * Hangul 7-bit(KS C 5601) to Standard 2-byte Combination code(87-3)
48  */
49 
50 
51 static int cur_stat = 1;	/* current state of automata */
52 static int cur_act;		/* current action of automata */
53 
54 static char han_buf[5] = {0,0,0,0,0 };	/* Hangul buffer */
55 
56 static int temp_flag;		/* Hangul temporary flag */
57 static int han_temp = 0;	/* Hangul temporary while two
58 				   2-byte code are generated */
59 
60 static int next_stat[14][21]={	/* next state table[current state][input] */
61 	/* input
62 	  0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 */
63 /*state*/
64 /* 0 */ { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
65 /* 1 */	{ 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1},
66 /* 2 */	{ 0, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 3, 3, 3, 3, 3, 3, 3, 2, 1, 2},
67 /* 3 */ { 0, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 3, 3, 3, 3, 3, 3, 3, 2, 1, 2},
68 /* 4 */ { 0, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 5, 6, 7, 2, 1, 2},
69 /* 5 */ { 0, 4, 9,10,11,12,13,13,13,13,13, 3, 8, 3, 8, 3, 3, 3, 2, 1, 2},
70 /* 6 */ { 0, 4, 9,10,11,12,13,13,13,13,13, 3, 3, 8, 8, 3, 3, 3, 2, 1, 2},
71 /* 7 */ { 0, 4, 9,10,11,12,13,13,13,13,13, 3, 3, 3, 8, 3, 3, 3, 2, 1, 2},
72 /* 8 */ { 0, 4, 9,10,11,12,13,13,13,13,13, 3, 3, 3, 3, 3, 3, 3, 2, 1, 2},
73 /* 9 */ { 0, 4, 4, 4, 4, 4, 4,13, 4, 4, 4, 8, 8, 8, 8, 5, 6, 7, 2, 1, 2},
74 /*10 */ { 0, 4, 4, 4, 4, 4, 4, 4,13,13, 4, 8, 8, 8, 8, 5, 6, 7, 2, 1, 2},
75 /*11 */ { 0, 4,13, 4, 4,13, 4,13, 4,13,13, 8, 8, 8, 8, 5, 6, 7, 2, 1, 2},
76 /*12 */ { 0, 4, 4, 4, 4, 4, 4,13, 4, 4, 4, 8, 8, 8, 8, 5, 6, 7, 2, 1, 2},
77 /*13 */ { 0, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 5, 6, 7, 2, 1, 2}
78 };
79 
80 static int next_act[14][21]={	/* next action table[current state][input]  */
81 	/*input
82 	  0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 */
83 /*state*/
84 /* 0 */ { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
85 /* 1 */ { 0, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 2, 1, 4},/*4-1*/
86 /* 2 */ { 0, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,15,15,15,15,15,15,15, 1, 3, 4},
87 /* 3 */ { 0, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,15,15,15,15,15,15,15, 1, 3, 4},
88 /* 4 */ { 0, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 6, 6, 6, 6, 6, 6, 6,16,12,13},
89 /* 5 */ { 0, 9, 7, 7, 7, 7, 7, 7, 7, 7, 7,10,14,10,14,10,10,10,16,12,13},
90 /* 6 */ { 0, 9, 7, 7, 7, 7, 7, 7, 7, 7, 7,10,10,14,14,10,10,10,16,12,13},
91 /* 7 */ { 0, 9, 7, 7, 7, 7, 7, 7, 7, 7, 7,10,10,10,14,10,10,10,16,12,13},
92 /* 8 */ { 0, 9, 7, 7, 7, 7, 7, 7, 7, 7, 7,10,10,10,10,10,10,10,16,12,13},
93 /* 9 */ { 0, 9, 9, 9, 9, 9, 9, 8, 9, 9, 9,17,17,17,17,17,17,17,16,12,13},
94 /*10 */ { 0, 9, 9, 9, 9, 9, 9, 9, 8, 8, 9,17,17,17,17,17,17,17,16,12,13},
95 /*11 */ { 0, 9, 8, 9, 9, 8, 9, 8, 9, 8, 8,17,17,17,17,17,17,17,16,12,13},
96 /*12 */ { 0, 9, 9, 9, 9, 9, 9, 8, 9, 9, 9,17,17,17,17,17,17,17,16,12,13},
97 /*13 */ { 0, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,17,17,17,17,17,17,17,16,12,13}
98 };
99 
100 KCHAR
101 getc_12(my_inbuf, my_inbytesleft, st)
102 char **my_inbuf;
103 size_t *my_inbytesleft;
104 struct _cv_state *st;
105 {
106 	register char	c;		/* input character */
107 	register int	cur_input;	/* type of input character */
108 	register KCHAR	code_2;		/* 2-byte char converted */
109 	KCHAR		make_2();
110 
111 	if(temp_flag == 1){
112 		code_2 = han_temp;
113 		temp_flag = 0;
114 		return(code_2);
115 	}
116 	for(;;){		/* read 1 byte */
117 		if (st->ibuf_left) {
118 			c = st->temp_ibuf[st->istart];
119 			st->istart++;
120 			if (st->istart >= st->iend) {
121 				st->ibuf_left = 0;
122 				st->istart = 0;
123 				st->iend = 0;
124 			}
125 		} else {
126 			c = **my_inbuf;
127 			(*my_inbuf)++, (*my_inbytesleft)--;
128 		}
129 				/* run Hangul automata */
130 		cur_input = input_typ(c);
131 		cur_act = next_act[cur_stat][cur_input];
132 		cur_stat = next_stat[cur_stat][cur_input];
133 		switch (cur_act) {
134 			case 1:
135 				break;
136 			case 2:
137 				break;
138 			case 3:
139 				break;
140 			case 4:
141 				return(0x0000 | c);
142 			case 5:
143 				han_buf[1] = c;
144 				break;
145 			case 6:
146 				han_buf[2] = c;
147 				if((code_2=packtocomp(make_2(2))) == 0xFFFF){
148 					han_buf[2] = 0;
149 					code_2 = make_2(0);
150 					if (st->ibuf_left) {
151 						st->istart--;
152 					} else {
153 						(*my_inbuf)--, (*my_inbytesleft)++;
154 					}
155 					cur_stat = 2;
156 					return(code_2);
157 				}
158 				break;
159 			case 7:
160 				han_buf[3] = c;
161 				if((code_2=packtocomp(make_2(2))) == 0xFFFF){
162 					han_buf[3] = 0;
163 					code_2 = make_2(0);
164 					if (st->ibuf_left) {
165 						st->istart--;
166 					} else {
167 						(*my_inbuf)--, (*my_inbytesleft)++;
168 					}
169 					cur_stat = 2;
170 					return(code_2);
171 				}
172 				break;
173 			case 8:
174 				han_buf[4] = c;
175 				if((code_2=packtocomp(make_2(2))) == 0xFFFF){
176 					han_buf[4] = 0;
177 					code_2 = make_2(0);
178 					cur_stat = 2;
179 					if (st->ibuf_left) {
180 						st->istart--;
181 					} else {
182 						(*my_inbuf)--, (*my_inbytesleft)++;
183 					}
184 					return(code_2);
185 				}
186 				break;
187 			case 9:
188 				code_2 = make_2(0);
189 				han_buf[1] = c;
190 				return(code_2);
191 			case 10:
192 				code_2 = make_2(0);
193 				han_buf[2] = c;
194 				han_temp = make_2(0);
195 				temp_flag = 1;
196 				return(code_2);
197 			case 11:			/* Unused */
198 				return(make_2(0));
199 			case 12:
200 				return(make_2(0));
201 			case 13:
202 				code_2 = make_2(0);
203 				han_temp = (0x0000 | c);
204 				temp_flag = 1;
205 				return(code_2);
206 			case 14:
207 				han_buf[0] = han_buf[2]; /* Save */
208 				han_buf[2] = vowel_mix(han_buf[2],c);
209 				if((code_2=packtocomp(make_2(2))) == 0xFFFF){
210 					han_buf[2] = han_buf[0]; /* Recover */
211 					code_2 = make_2(0);
212 					if (st->ibuf_left) {
213 						st->istart--;
214 					} else {
215 						(*my_inbuf)--, (*my_inbytesleft)++;
216 					}
217 					cur_stat = 2;
218 					return(code_2);
219 				}
220 				break;
221 			case 15:
222 				han_buf[2] = c;
223 				return(make_2(0));
224 			case 16:
225 				return(make_2(0));
226 			case 17:
227 				code_2 = make_2(1);
228 				han_buf[2] = c;
229 				return(code_2);
230 			default:
231 				break;
232 		}
233 	}
234 }
235 
236 int input_typ(char c)
237 {
238 	switch(c) {
239 		case D_DI_GUD:	/* double di-gud	0x48 'H' */
240 		case D_BI_UB:	/* double bi-ub		0x52 'S' */
241 		case D_JI_UD:	/* double ji-ud		0x59 'Y' */
242 			return(1);
243 
244 		case GI_UG:	/* gi-ug 		0x41 'A' */
245 			return(2);
246 
247 		case NI_UN:	/* ni-un		0x44 'D' */
248 			return(3);
249 
250 		case RI_UL:	/* ri-ul		0x49 'I' */
251 			return(4);
252 
253 		case BI_UB:	/* bi-ub		0x52 'R' */
254 			return(5);
255 
256 		case D_GI_UG:	/* double gi-ug		0x42 'B' */
257 		case DI_GUD:	/* di-gud		0x47 'G' */
258 		case D_SI_OD:	/* double si-od		0x56 'V' */
259 		case YI_UNG:	/* yi-ung		0x57 'W' */
260 		case CHI_UD:	/* chi-ud		0x5a 'Z' */
261 		case KI_UK:	/* ki-uk		0x5b '[' */
262 			return(6);
263 
264 		case SI_OD:	/* si-od		0x55 'U' */
265 			return(7);
266 
267 		case JI_UD:	/* ji_ud		0x58 'X' */
268 			return(8);
269 
270 		case HI_UD:	/* hi-ud		0x5e '^' */
271 			return(9);
272 
273 		case MI_UM:	/* mi-um		0x51 'Q' */
274 		case PI_UP:	/* pi-up		0x5d ']' */
275 		case TI_GUT:	/* ti-gut		0x51 '\' */
276 			return(10);
277 
278 		case YEA:	/* yea 			0x6b 'k' */
279 		case IA:	/* ia			0x64 'd' */
280 		case IYAI:	/* iyai			0x65 'e' */
281 		case IE:	/* ie			0x6a 'j' */
282 		case YO:	/* yo			0x72 'r' */
283 		case YU:	/* yu			0x77 'g' */
284 			return(11);
285 
286 		case A:		/* a			0x62 'b' */
287 		case AE:	/* ae			0x63 'c' */
288 			return(12);
289 
290 		case E:		/* e			0x66 'f' */
291 		case EA:	/* ea			0x67 'g' */
292 			return(13);
293 
294 		case I:		/* i			0x7c '|' */
295 			return(14);
296 
297 		case O:		/* o			0x6c 'l' */
298 			return(15);
299 
300 		case U:		/* u			0x73 's' */
301 			return(16);
302 
303 		case EU:	/* eu			0x7a 'z' */
304 			return(17);
305 
306 		default:
307 			if(c == '\016')	/* Ctrl-N Hangul delimiter */
308 				return(18);
309 			if(c == '\017' || c == '\024')	/* Ctrl-O Ctrl-T English delimiter */
310 				return(19);
311 			return(20);
312 	}
313 }
314 
315 /* This routine make 2-byte code from hangul buffer, if parameter (1)
316     is given, han_buf[4] or han_buf[3] is eliminated before making a
317     2-byte code and inserted han_buf[1] after 2-byte code is made */
318 
319 KCHAR make_2(n)
320 register int n;
321 {
322 	register KCHAR code_2 = 0;
323 	register char tmp = 0;
324 	register int i;
325 
326 			/* if n = 1, save han_buf[3] or han_buf[4] */
327 	if (n == 1) {
328 		if(han_buf[4]){
329 			tmp = han_buf[4];
330 			han_buf[4] = 0;
331 		} else{
332 			tmp = han_buf[3];
333 			han_buf[3] = 0;
334 		}
335 	}
336 
337 	if(han_buf[1] > BEG_OF_CONSO){
338 		code_2 = code_2 | X32_19[han_buf[1] - BEG_OF_CONSO];
339 	} else {
340 		code_2 = 0x9;
341 	}
342 
343 	if(han_buf[2] > BEG_OF_VOW){
344 		code_2 = ((code_2 << 5) | X32_21[han_buf[2] - BEG_OF_VOW]);
345 	} else{
346 		code_2 = (code_2 << 5) | 0x1;
347 	}
348 
349 	if(han_buf[3] > BEG_OF_CONSO){
350 		code_2 = ((code_2 << 5) | X32_28[han_buf[3] - BEG_OF_CONSO]);
351 	} else {
352 		code_2 = code_2 << 5 | 0x01;
353 	}
354 
355 	if(han_buf[4] > BEG_OF_CONSO){
356 		switch(han_buf[3]){
357 				/* process gi-ug si-od */
358 			case GI_UG:
359 				if(han_buf[4] == SI_OD){
360 					code_2 += 2;
361 				}
362 				break;
363 
364 				/* process ni-un zi-ud, ni-un hi-ud */
365 			case NI_UN:
366 				switch (han_buf[4]) {
367 				case JI_UD:
368 					code_2++;
369 					break;
370 				case HI_UD:
371 					code_2 += 2;
372 					break;
373 				default:
374 					break;
375 				}
376 				break;
377 
378 				/* process ri-ul gi-ug, ri-ul mi-um,
379 				     ri-ul bi-ub, ri-ul si-od, ri-ul ti-ut,
380 				     ri-ul pi-up, ri-ul hi-ud */
381 			case RI_UL:
382 				switch (han_buf[4]) {
383 				case GI_UG:
384 					code_2++;
385 					break;
386 
387 				case MI_UM:
388 					code_2 += 2;
389 					break;
390 
391 				case BI_UB:
392 					code_2 += 3;
393 					break;
394 
395 				case SI_OD:
396 					code_2 += 4;
397 					break;
398 
399 				case TI_GUT:
400 					code_2 += 5;
401 					break;
402 
403 				case PI_UP:
404 					code_2 += 6;
405 					break;
406 
407 				case HI_UD:
408 					code_2 += 7;
409 					break;
410 
411 				default:
412 					break;
413 				}
414 				break;
415 
416 				/* process bi-ub si-od */
417 			case BI_UB:
418 				if(han_buf[4] == SI_OD){
419 					code_2++;
420 				}
421 				break;
422 
423 				/* process si-od si-od */
424 			case SI_OD:
425 				if(han_buf[4] == SI_OD){
426 					code_2++;
427 				}
428 				break;
429 		}
430 	}
431 
432 				/* set 1st 7-bit of code_2 */
433 	code_2 = code_2 | 0x8000;
434 
435 				/* initialize Hangul buffer */
436 	if(n != 2)
437 		for(i = 0; i < 5; i++){
438 			han_buf[i] = 0;
439 		}
440 
441 	if(n == 1){			/* restore Hangul temporary */
442 		han_buf[1] = tmp;
443 	}
444 
445 	return(code_2);
446 }
447 
448 /* This routine make double vowel from han_buf[2] and input character c */
449 
450 #ifndef SUNVIEW
451 char vowel_mix(char c1,char c2)
452 {
453 	register char c = '\0';	/* result double vowel */
454 
455 	switch(c1){
456 				/* process o-a, o-ae, o-i */
457 		case O:
458 			switch (c2) {
459 			case A:
460 				c = c1 + 1;
461 				break;
462 			case AE:
463 				c = c1 + 2;
464 				break;
465 			case I:
466 				c = c1 + 3;
467 				break;
468 			}
469 			break;
470 
471 				/* process u-oe, u-e, u-i */
472 		case U:
473 			switch (c2) {
474 			case E:
475 				c = c1 + 1;
476 				break;
477 			case EA:
478 				c = c1 + 2;
479 				break;
480 			case I:
481 				c = c1 + 3;
482 				break;
483 			}
484 			break;
485 
486 				/* process eu-i */
487 		case EU:
488 			if(c2 == I){
489 				c = c1 + 1;
490 			}
491 			break;
492 	}
493 	return(c);
494 }
495 #endif
496