xref: /illumos-gate/usr/src/lib/libc/port/locale/unicode.h (revision f73e1ebf60792a8bdb2d559097c3131b68c09318)
1 /*
2  * This file and its contents are supplied under the terms of the
3  * Common Development and Distribution License ("CDDL"), version 1.0.
4  * You may only use this file in accordance with the terms of version
5  * 1.0 of the CDDL.
6  *
7  * A full copy of the text of the CDDL should have accompanied this
8  * source.  A copy of the CDDL is also available via the Internet at
9  * http://www.illumos.org/license/CDDL.
10  */
11 
12 /*
13  * Copyright 2020 Robert Mustacchi
14  */
15 
16 #ifndef _UNICODE_H
17 #define	_UNICODE_H
18 
19 /*
20  * Common definitions for dealing with Unicode.
21  *
22  * UTF-16 encodes data as a series of two byte values. However, there are more
23  * than 16-bit of code points. Code points inside of the first 16-bits are
24  * referred to as existing in the 'basic multilingual plane' (BMP). Those
25  * outside of it are in the 'supplementary plane'. When such a code point is
26  * encountered, it is encoded as a series of two uint16_t values.
27  *
28  * A value which is up to 20 bits (the current limit of the unicode code point
29  * space) is encoded by splitting it into two 10-bit values. The upper 10 bits
30  * are ORed with 0xd800 and the lower 10 bits are ORed with 0xdc00.
31  */
32 
33 #ifdef __cplusplus
34 extern "C" {
35 #endif
36 
37 /*
38  * Range of Unicode code points reserved for surrogate characters.
39  */
40 #define	UNICODE_SUR_MIN		0xd800
41 #define	UNICODE_SUR_MAX		0xdfff
42 
43 /*
44  * Range of Unicode code points in supplementary planes.
45  */
46 #define	UNICODE_SUP_START	0x10000
47 #define	UNICODE_SUP_MAX		0x10ffff
48 
49 /*
50  * Starting constants for surrogate pairs.
51  */
52 #define	UNICODE_SUR_UPPER	0xd800
53 #define	UNICODE_SUR_LOWER	0xdc00
54 
55 /*
56  * Macros to extract the value from a surrogate pair and to take a code point
57  * and transform it into the surrogate version.
58  */
59 #define	UNICODE_SUR_UVALUE(x)	(((x) & 0x3ff) << 10)
60 #define	UNICODE_SUR_LVALUE(x)	((x) & 0x3ff)
61 #define	UNICODE_SUR_UMASK(x)	(((x) >> 10) & 0x3ff)
62 #define	UNICODE_SUR_LMASK(x)	((x) & 0x3ff)
63 
64 #ifdef __cplusplus
65 }
66 #endif
67 
68 #endif /* _UNICODE_H */
69