1*3fc10f8cSRobert Mustacchi /* 2*3fc10f8cSRobert Mustacchi * This file and its contents are supplied under the terms of the 3*3fc10f8cSRobert Mustacchi * Common Development and Distribution License ("CDDL"), version 1.0. 4*3fc10f8cSRobert Mustacchi * You may only use this file in accordance with the terms of version 5*3fc10f8cSRobert Mustacchi * 1.0 of the CDDL. 6*3fc10f8cSRobert Mustacchi * 7*3fc10f8cSRobert Mustacchi * A full copy of the text of the CDDL should have accompanied this 8*3fc10f8cSRobert Mustacchi * source. A copy of the CDDL is also available via the Internet at 9*3fc10f8cSRobert Mustacchi * http://www.illumos.org/license/CDDL. 10*3fc10f8cSRobert Mustacchi */ 11*3fc10f8cSRobert Mustacchi 12*3fc10f8cSRobert Mustacchi /* 13*3fc10f8cSRobert Mustacchi * Copyright 2020 Robert Mustacchi 14*3fc10f8cSRobert Mustacchi */ 15*3fc10f8cSRobert Mustacchi 16*3fc10f8cSRobert Mustacchi #ifndef _UNICODE_H 17*3fc10f8cSRobert Mustacchi #define _UNICODE_H 18*3fc10f8cSRobert Mustacchi 19*3fc10f8cSRobert Mustacchi /* 20*3fc10f8cSRobert Mustacchi * Common definitions for dealing with Unicode. 21*3fc10f8cSRobert Mustacchi * 22*3fc10f8cSRobert Mustacchi * UTF-16 encodes data as a series of two byte values. However, there are more 23*3fc10f8cSRobert Mustacchi * than 16-bit of code points. Code points inside of the first 16-bits are 24*3fc10f8cSRobert Mustacchi * referred to as existing in the 'basic multilingual plane' (BMP). Those 25*3fc10f8cSRobert Mustacchi * outside of it are in the 'supplementary plane'. When such a code point is 26*3fc10f8cSRobert Mustacchi * encountered, it is encoded as a series of two uint16_t values. 27*3fc10f8cSRobert Mustacchi * 28*3fc10f8cSRobert Mustacchi * A value which is up to 20 bits (the current limit of the unicode code point 29*3fc10f8cSRobert Mustacchi * space) is encoded by splitting it into two 10-bit values. The upper 10 bits 30*3fc10f8cSRobert Mustacchi * are ORed with 0xd800 and the lower 10 bits are ORed with 0xdc00. 31*3fc10f8cSRobert Mustacchi */ 32*3fc10f8cSRobert Mustacchi 33*3fc10f8cSRobert Mustacchi #ifdef __cplusplus 34*3fc10f8cSRobert Mustacchi extern "C" { 35*3fc10f8cSRobert Mustacchi #endif 36*3fc10f8cSRobert Mustacchi 37*3fc10f8cSRobert Mustacchi /* 38*3fc10f8cSRobert Mustacchi * Range of Unicode code points reserved for surrogate characters. 39*3fc10f8cSRobert Mustacchi */ 40*3fc10f8cSRobert Mustacchi #define UNICODE_SUR_MIN 0xd800 41*3fc10f8cSRobert Mustacchi #define UNICODE_SUR_MAX 0xdfff 42*3fc10f8cSRobert Mustacchi 43*3fc10f8cSRobert Mustacchi /* 44*3fc10f8cSRobert Mustacchi * Range of Unicode code points in supplementary planes. 45*3fc10f8cSRobert Mustacchi */ 46*3fc10f8cSRobert Mustacchi #define UNICODE_SUP_START 0x10000 47*3fc10f8cSRobert Mustacchi #define UNICODE_SUP_MAX 0x10ffff 48*3fc10f8cSRobert Mustacchi 49*3fc10f8cSRobert Mustacchi /* 50*3fc10f8cSRobert Mustacchi * Starting constants for surrogate pairs. 51*3fc10f8cSRobert Mustacchi */ 52*3fc10f8cSRobert Mustacchi #define UNICODE_SUR_UPPER 0xd800 53*3fc10f8cSRobert Mustacchi #define UNICODE_SUR_LOWER 0xdc00 54*3fc10f8cSRobert Mustacchi 55*3fc10f8cSRobert Mustacchi /* 56*3fc10f8cSRobert Mustacchi * Macros to extract the value from a surrogate pair and to take a code point 57*3fc10f8cSRobert Mustacchi * and transform it into the surrogate version. 58*3fc10f8cSRobert Mustacchi */ 59*3fc10f8cSRobert Mustacchi #define UNICODE_SUR_UVALUE(x) (((x) & 0x3ff) << 10) 60*3fc10f8cSRobert Mustacchi #define UNICODE_SUR_LVALUE(x) ((x) & 0x3ff) 61*3fc10f8cSRobert Mustacchi #define UNICODE_SUR_UMASK(x) (((x) >> 10) & 0x3ff) 62*3fc10f8cSRobert Mustacchi #define UNICODE_SUR_LMASK(x) ((x) & 0x3ff) 63*3fc10f8cSRobert Mustacchi 64*3fc10f8cSRobert Mustacchi #ifdef __cplusplus 65*3fc10f8cSRobert Mustacchi } 66*3fc10f8cSRobert Mustacchi #endif 67*3fc10f8cSRobert Mustacchi 68*3fc10f8cSRobert Mustacchi #endif /* _UNICODE_H */ 69