1 /* 2 * This file and its contents are supplied under the terms of the 3 * Common Development and Distribution License ("CDDL"), version 1.0. 4 * You may only use this file in accordance with the terms of version 5 * 1.0 of the CDDL. 6 * 7 * A full copy of the text of the CDDL should have accompanied this 8 * source. A copy of the CDDL is also available via the Internet at 9 * http://www.illumos.org/license/CDDL. 10 */ 11 12 /* 13 * Copyright 2020 Robert Mustacchi 14 */ 15 16 #ifndef _UNICODE_H 17 #define _UNICODE_H 18 19 /* 20 * Common definitions for dealing with Unicode. 21 * 22 * UTF-16 encodes data as a series of two byte values. However, there are more 23 * than 16-bit of code points. Code points inside of the first 16-bits are 24 * referred to as existing in the 'basic multilingual plane' (BMP). Those 25 * outside of it are in the 'supplementary plane'. When such a code point is 26 * encountered, it is encoded as a series of two uint16_t values. 27 * 28 * A value which is up to 20 bits (the current limit of the unicode code point 29 * space) is encoded by splitting it into two 10-bit values. The upper 10 bits 30 * are ORed with 0xd800 and the lower 10 bits are ORed with 0xdc00. 31 */ 32 33 #ifdef __cplusplus 34 extern "C" { 35 #endif 36 37 /* 38 * Range of Unicode code points reserved for surrogate characters. 39 */ 40 #define UNICODE_SUR_MIN 0xd800 41 #define UNICODE_SUR_MAX 0xdfff 42 43 /* 44 * Range of Unicode code points in supplementary planes. 45 */ 46 #define UNICODE_SUP_START 0x10000 47 #define UNICODE_SUP_MAX 0x10ffff 48 49 /* 50 * Starting constants for surrogate pairs. 51 */ 52 #define UNICODE_SUR_UPPER 0xd800 53 #define UNICODE_SUR_LOWER 0xdc00 54 55 /* 56 * Macros to extract the value from a surrogate pair and to take a code point 57 * and transform it into the surrogate version. 58 */ 59 #define UNICODE_SUR_UVALUE(x) (((x) & 0x3ff) << 10) 60 #define UNICODE_SUR_LVALUE(x) ((x) & 0x3ff) 61 #define UNICODE_SUR_UMASK(x) (((x) >> 10) & 0x3ff) 62 #define UNICODE_SUR_LMASK(x) ((x) & 0x3ff) 63 64 #ifdef __cplusplus 65 } 66 #endif 67 68 #endif /* _UNICODE_H */ 69