1*9a4a12bdSRobert Mustacchi /* 2*9a4a12bdSRobert Mustacchi * This file and its contents are supplied under the terms of the 3*9a4a12bdSRobert Mustacchi * Common Development and Distribution License ("CDDL"), version 1.0. 4*9a4a12bdSRobert Mustacchi * You may only use this file in accordance with the terms of version 5*9a4a12bdSRobert Mustacchi * 1.0 of the CDDL. 6*9a4a12bdSRobert Mustacchi * 7*9a4a12bdSRobert Mustacchi * A full copy of the text of the CDDL should have accompanied this 8*9a4a12bdSRobert Mustacchi * source. A copy of the CDDL is also available via the Internet at 9*9a4a12bdSRobert Mustacchi * http://www.illumos.org/license/CDDL. 10*9a4a12bdSRobert Mustacchi */ 11*9a4a12bdSRobert Mustacchi 12*9a4a12bdSRobert Mustacchi /* 13*9a4a12bdSRobert Mustacchi * Copyright 2020 Robert Mustacchi 14*9a4a12bdSRobert Mustacchi */ 15*9a4a12bdSRobert Mustacchi 16*9a4a12bdSRobert Mustacchi /* 17*9a4a12bdSRobert Mustacchi * C11 mbrtoc16(3C) support. 18*9a4a12bdSRobert Mustacchi * 19*9a4a12bdSRobert Mustacchi * The char16_t represents a UTF-16 encoding. This means that we have to deal 20*9a4a12bdSRobert Mustacchi * with surrogate pairs. 21*9a4a12bdSRobert Mustacchi */ 22*9a4a12bdSRobert Mustacchi 23*9a4a12bdSRobert Mustacchi #include <locale.h> 24*9a4a12bdSRobert Mustacchi #include <wchar.h> 25*9a4a12bdSRobert Mustacchi #include <xlocale.h> 26*9a4a12bdSRobert Mustacchi #include <uchar.h> 27*9a4a12bdSRobert Mustacchi #include "mblocal.h" 28*9a4a12bdSRobert Mustacchi #include "unicode.h" 29*9a4a12bdSRobert Mustacchi 30*9a4a12bdSRobert Mustacchi #include <sys/debug.h> 31*9a4a12bdSRobert Mustacchi 32*9a4a12bdSRobert Mustacchi /* 33*9a4a12bdSRobert Mustacchi * Ensure that we never cause our save state to ever exceed that of the 34*9a4a12bdSRobert Mustacchi * mbstate_t. See the block comment in mblocal.h. 35*9a4a12bdSRobert Mustacchi */ 36*9a4a12bdSRobert Mustacchi CTASSERT(sizeof (_CHAR16State) <= sizeof (mbstate_t)); 37*9a4a12bdSRobert Mustacchi 38*9a4a12bdSRobert Mustacchi static mbstate_t mbrtoc16_state; 39*9a4a12bdSRobert Mustacchi 40*9a4a12bdSRobert Mustacchi size_t 41*9a4a12bdSRobert Mustacchi mbrtoc16(char16_t *restrict pc16, const char *restrict str, size_t len, 42*9a4a12bdSRobert Mustacchi mbstate_t *restrict ps) 43*9a4a12bdSRobert Mustacchi { 44*9a4a12bdSRobert Mustacchi wchar_t wc; 45*9a4a12bdSRobert Mustacchi size_t ret; 46*9a4a12bdSRobert Mustacchi char16_t out; 47*9a4a12bdSRobert Mustacchi _CHAR16State *c16s; 48*9a4a12bdSRobert Mustacchi 49*9a4a12bdSRobert Mustacchi if (ps == NULL) { 50*9a4a12bdSRobert Mustacchi ps = &mbrtoc16_state; 51*9a4a12bdSRobert Mustacchi } 52*9a4a12bdSRobert Mustacchi 53*9a4a12bdSRobert Mustacchi if (str == NULL) { 54*9a4a12bdSRobert Mustacchi pc16 = NULL; 55*9a4a12bdSRobert Mustacchi str = ""; 56*9a4a12bdSRobert Mustacchi len = 1; 57*9a4a12bdSRobert Mustacchi } 58*9a4a12bdSRobert Mustacchi 59*9a4a12bdSRobert Mustacchi c16s = (_CHAR16State *)ps; 60*9a4a12bdSRobert Mustacchi if (c16s->c16_surrogate != 0) { 61*9a4a12bdSRobert Mustacchi if (pc16 != NULL) { 62*9a4a12bdSRobert Mustacchi *pc16 = c16s->c16_surrogate; 63*9a4a12bdSRobert Mustacchi } 64*9a4a12bdSRobert Mustacchi c16s->c16_surrogate = 0; 65*9a4a12bdSRobert Mustacchi return ((size_t)-3); 66*9a4a12bdSRobert Mustacchi } 67*9a4a12bdSRobert Mustacchi 68*9a4a12bdSRobert Mustacchi ret = mbrtowc_l(&wc, str, len, ps, uselocale(NULL)); 69*9a4a12bdSRobert Mustacchi if ((ssize_t)ret < 0) { 70*9a4a12bdSRobert Mustacchi return (ret); 71*9a4a12bdSRobert Mustacchi } 72*9a4a12bdSRobert Mustacchi 73*9a4a12bdSRobert Mustacchi /* 74*9a4a12bdSRobert Mustacchi * If this character is not in the basic multilingual plane then we need 75*9a4a12bdSRobert Mustacchi * a surrogate character to represent it in UTF-16 and we will need to 76*9a4a12bdSRobert Mustacchi * write that out on the next iteration. 77*9a4a12bdSRobert Mustacchi */ 78*9a4a12bdSRobert Mustacchi if (wc >= UNICODE_SUP_START) { 79*9a4a12bdSRobert Mustacchi wc -= UNICODE_SUP_START; 80*9a4a12bdSRobert Mustacchi c16s->c16_surrogate = UNICODE_SUR_LOWER | UNICODE_SUR_LMASK(wc); 81*9a4a12bdSRobert Mustacchi out = UNICODE_SUR_UPPER | UNICODE_SUR_UMASK(wc); 82*9a4a12bdSRobert Mustacchi } else { 83*9a4a12bdSRobert Mustacchi out = (char16_t)wc; 84*9a4a12bdSRobert Mustacchi } 85*9a4a12bdSRobert Mustacchi 86*9a4a12bdSRobert Mustacchi if (pc16 != NULL) { 87*9a4a12bdSRobert Mustacchi *pc16 = out; 88*9a4a12bdSRobert Mustacchi } 89*9a4a12bdSRobert Mustacchi 90*9a4a12bdSRobert Mustacchi return (ret); 91*9a4a12bdSRobert Mustacchi } 92