1 /* 2 * This file and its contents are supplied under the terms of the 3 * Common Development and Distribution License ("CDDL"), version 1.0. 4 * You may only use this file in accordance with the terms of version 5 * 1.0 of the CDDL. 6 * 7 * A full copy of the text of the CDDL should have accompanied this 8 * source. A copy of the CDDL is also available via the Internet at 9 * http://www.illumos.org/license/CDDL. 10 */ 11 12 /* 13 * Copyright 2020 Robert Mustacchi 14 */ 15 16 /* 17 * C11 mbrtoc16(3C) support. 18 * 19 * The char16_t represents a UTF-16 encoding. This means that we have to deal 20 * with surrogate pairs. 21 */ 22 23 #include <locale.h> 24 #include <wchar.h> 25 #include <xlocale.h> 26 #include <uchar.h> 27 #include "mblocal.h" 28 #include "unicode.h" 29 30 #include <sys/debug.h> 31 32 /* 33 * Ensure that we never cause our save state to ever exceed that of the 34 * mbstate_t. See the block comment in mblocal.h. 35 */ 36 CTASSERT(sizeof (_CHAR16State) <= sizeof (mbstate_t)); 37 38 static mbstate_t mbrtoc16_state; 39 40 size_t 41 mbrtoc16(char16_t *restrict pc16, const char *restrict str, size_t len, 42 mbstate_t *restrict ps) 43 { 44 wchar_t wc; 45 size_t ret; 46 char16_t out; 47 _CHAR16State *c16s; 48 49 if (ps == NULL) { 50 ps = &mbrtoc16_state; 51 } 52 53 if (str == NULL) { 54 pc16 = NULL; 55 str = ""; 56 len = 1; 57 } 58 59 c16s = (_CHAR16State *)ps; 60 if (c16s->c16_surrogate != 0) { 61 if (pc16 != NULL) { 62 *pc16 = c16s->c16_surrogate; 63 } 64 c16s->c16_surrogate = 0; 65 return ((size_t)-3); 66 } 67 68 ret = mbrtowc_l(&wc, str, len, ps, uselocale(NULL)); 69 if ((ssize_t)ret < 0) { 70 return (ret); 71 } 72 73 /* 74 * If this character is not in the basic multilingual plane then we need 75 * a surrogate character to represent it in UTF-16 and we will need to 76 * write that out on the next iteration. 77 */ 78 if (wc >= UNICODE_SUP_START) { 79 wc -= UNICODE_SUP_START; 80 c16s->c16_surrogate = UNICODE_SUR_LOWER | UNICODE_SUR_LMASK(wc); 81 out = UNICODE_SUR_UPPER | UNICODE_SUR_UMASK(wc); 82 } else { 83 out = (char16_t)wc; 84 } 85 86 if (pc16 != NULL) { 87 *pc16 = out; 88 } 89 90 return (ret); 91 } 92