1*eda3ef2dSRobert Mustacchi /* 2*eda3ef2dSRobert Mustacchi * This file and its contents are supplied under the terms of the 3*eda3ef2dSRobert Mustacchi * Common Development and Distribution License ("CDDL"), version 1.0. 4*eda3ef2dSRobert Mustacchi * You may only use this file in accordance with the terms of version 5*eda3ef2dSRobert Mustacchi * 1.0 of the CDDL. 6*eda3ef2dSRobert Mustacchi * 7*eda3ef2dSRobert Mustacchi * A full copy of the text of the CDDL should have accompanied this 8*eda3ef2dSRobert Mustacchi * source. A copy of the CDDL is also available via the Internet at 9*eda3ef2dSRobert Mustacchi * http://www.illumos.org/license/CDDL. 10*eda3ef2dSRobert Mustacchi */ 11*eda3ef2dSRobert Mustacchi 12*eda3ef2dSRobert Mustacchi /* 13*eda3ef2dSRobert Mustacchi * Copyright 2020 Robert Mustacchi 14*eda3ef2dSRobert Mustacchi */ 15*eda3ef2dSRobert Mustacchi 16*eda3ef2dSRobert Mustacchi #ifndef _UNICODE_H 17*eda3ef2dSRobert Mustacchi #define _UNICODE_H 18*eda3ef2dSRobert Mustacchi 19*eda3ef2dSRobert Mustacchi /* 20*eda3ef2dSRobert Mustacchi * Common definitions for dealing with Unicode. 21*eda3ef2dSRobert Mustacchi * 22*eda3ef2dSRobert Mustacchi * UTF-16 encodes data as a series of two byte values. However, there are more 23*eda3ef2dSRobert Mustacchi * than 16-bit of code points. Code points inside of the first 16-bits are 24*eda3ef2dSRobert Mustacchi * referred to as existing in the 'basic multilingual plane' (BMP). Those 25*eda3ef2dSRobert Mustacchi * outside of it are in the 'supplementary plane'. When such a code point is 26*eda3ef2dSRobert Mustacchi * encountered, it is encoded as a series of two uint16_t values. 27*eda3ef2dSRobert Mustacchi * 28*eda3ef2dSRobert Mustacchi * A value which is up to 20 bits (the current limit of the unicode code point 29*eda3ef2dSRobert Mustacchi * space) is encoded by splitting it into two 10-bit values. The upper 10 bits 30*eda3ef2dSRobert Mustacchi * are ORed with 0xd800 and the lower 10 bits are ORed with 0xdc00. 31*eda3ef2dSRobert Mustacchi */ 32*eda3ef2dSRobert Mustacchi 33*eda3ef2dSRobert Mustacchi #ifdef __cplusplus 34*eda3ef2dSRobert Mustacchi extern "C" { 35*eda3ef2dSRobert Mustacchi #endif 36*eda3ef2dSRobert Mustacchi 37*eda3ef2dSRobert Mustacchi /* 38*eda3ef2dSRobert Mustacchi * Range of Unicode code points reserved for surrogate characters. 39*eda3ef2dSRobert Mustacchi */ 40*eda3ef2dSRobert Mustacchi #define UNICODE_SUR_MIN 0xd800 41*eda3ef2dSRobert Mustacchi #define UNICODE_SUR_MAX 0xdfff 42*eda3ef2dSRobert Mustacchi 43*eda3ef2dSRobert Mustacchi /* 44*eda3ef2dSRobert Mustacchi * Range of Unicode code points in supplementary planes. 45*eda3ef2dSRobert Mustacchi */ 46*eda3ef2dSRobert Mustacchi #define UNICODE_SUP_START 0x10000 47*eda3ef2dSRobert Mustacchi #define UNICODE_SUP_MAX 0x10ffff 48*eda3ef2dSRobert Mustacchi 49*eda3ef2dSRobert Mustacchi /* 50*eda3ef2dSRobert Mustacchi * Starting constants for surrogate pairs. 51*eda3ef2dSRobert Mustacchi */ 52*eda3ef2dSRobert Mustacchi #define UNICODE_SUR_UPPER 0xd800 53*eda3ef2dSRobert Mustacchi #define UNICODE_SUR_LOWER 0xdc00 54*eda3ef2dSRobert Mustacchi 55*eda3ef2dSRobert Mustacchi /* 56*eda3ef2dSRobert Mustacchi * Macros to extract the value from a surrogate pair and to take a code point 57*eda3ef2dSRobert Mustacchi * and transform it into the surrogate version. 58*eda3ef2dSRobert Mustacchi */ 59*eda3ef2dSRobert Mustacchi #define UNICODE_SUR_UVALUE(x) (((x) & 0x3ff) << 10) 60*eda3ef2dSRobert Mustacchi #define UNICODE_SUR_LVALUE(x) ((x) & 0x3ff) 61*eda3ef2dSRobert Mustacchi #define UNICODE_SUR_UMASK(x) (((x) >> 10) & 0x3ff) 62*eda3ef2dSRobert Mustacchi #define UNICODE_SUR_LMASK(x) ((x) & 0x3ff) 63*eda3ef2dSRobert Mustacchi 64*eda3ef2dSRobert Mustacchi #ifdef __cplusplus 65*eda3ef2dSRobert Mustacchi } 66*eda3ef2dSRobert Mustacchi #endif 67*eda3ef2dSRobert Mustacchi 68*eda3ef2dSRobert Mustacchi #endif /* _UNICODE_H */ 69