xref: /illumos-gate/usr/src/contrib/ast/src/lib/libast/string/wc2utf8.c (revision 36589d6bb0cdae89e166b57b0d64ae56d53247d9)
1 /***********************************************************************
2 *                                                                      *
3 *               This software is part of the ast package               *
4 *          Copyright (c) 1985-2012 AT&T Intellectual Property          *
5 *                      and is licensed under the                       *
6 *                 Eclipse Public License, Version 1.0                  *
7 *                    by AT&T Intellectual Property                     *
8 *                                                                      *
9 *                A copy of the License is available at                 *
10 *          http://www.eclipse.org/org/documents/epl-v10.html           *
11 *         (with md5 checksum b35adb5213ca9657e911e9befb180842)         *
12 *                                                                      *
13 *              Information and Software Systems Research               *
14 *                            AT&T Research                             *
15 *                           Florham Park NJ                            *
16 *                                                                      *
17 *                 Glenn Fowler <gsf@research.att.com>                  *
18 *                  David Korn <dgk@research.att.com>                   *
19 *                   Phong Vo <kpv@research.att.com>                    *
20 *                                                                      *
21 ***********************************************************************/
22 #pragma prototyped
23 /*
24  * Glenn Fowler
25  * AT&T Research
26  *
27  * convert wide character to utf8 in s
28  * s must have room for at least 6 bytes
29  * return value is the number of chars placed in s
30  * thanks Tom Duff
31  */
32 
33 #include <ast.h>
34 
35 typedef struct Utf8_s
36 {
37 	uint32_t	range;
38 	unsigned short	prefix;
39 	unsigned short	shift;
40 } Utf8_t;
41 
42 static const Utf8_t	ops[] =
43 {
44 	{ 0x00000080, 0x00,  0 },
45 	{ 0x00000800, 0xc0,  6 },
46 	{ 0x00010000, 0xe0, 12 },
47 	{ 0x00200000, 0xf0, 18 },
48 	{ 0x04000000, 0xf8, 24 },
49 	{ 0x80000000, 0xfc, 30 }
50 };
51 
52 int
53 wc2utf8(register char* s, register uint32_t w)
54 {
55 	register int	i;
56 	char*		b;
57 
58 	for (i = 0; i < elementsof(ops); i++)
59 		if (w < ops[i].range)
60 		{
61 			b = s;
62 			*s++ = ops[i].prefix | (w >> ops[i].shift);
63 			switch (ops[i].shift)
64 			{
65 			case 30:	*s++ = 0x80 | ((w >> 24) & 0x3f);
66 			/* FALLTHROUGH */
67 			case 24:	*s++ = 0x80 | ((w >> 18) & 0x3f);
68 			/* FALLTHROUGH */
69 			case 18:	*s++ = 0x80 | ((w >> 12) & 0x3f);
70 			/* FALLTHROUGH */
71 			case 12:	*s++ = 0x80 | ((w >>  6) & 0x3f);
72 			/* FALLTHROUGH */
73 			case  6:	*s++ = 0x80 | (w & 0x3f);
74 			}
75 			return s - b;
76 		}
77 	return 0;
78 }
79