1*9a4a12bdSRobert Mustacchi /* 2*9a4a12bdSRobert Mustacchi * This file and its contents are supplied under the terms of the 3*9a4a12bdSRobert Mustacchi * Common Development and Distribution License ("CDDL"), version 1.0. 4*9a4a12bdSRobert Mustacchi * You may only use this file in accordance with the terms of version 5*9a4a12bdSRobert Mustacchi * 1.0 of the CDDL. 6*9a4a12bdSRobert Mustacchi * 7*9a4a12bdSRobert Mustacchi * A full copy of the text of the CDDL should have accompanied this 8*9a4a12bdSRobert Mustacchi * source. A copy of the CDDL is also available via the Internet at 9*9a4a12bdSRobert Mustacchi * http://www.illumos.org/license/CDDL. 10*9a4a12bdSRobert Mustacchi */ 11*9a4a12bdSRobert Mustacchi 12*9a4a12bdSRobert Mustacchi /* 13*9a4a12bdSRobert Mustacchi * Copyright 2020 Robert Mustacchi 14*9a4a12bdSRobert Mustacchi */ 15*9a4a12bdSRobert Mustacchi 16*9a4a12bdSRobert Mustacchi /* 17*9a4a12bdSRobert Mustacchi * C11 c16rtomb(3C) support. 18*9a4a12bdSRobert Mustacchi * 19*9a4a12bdSRobert Mustacchi * Convert a series of char16_t values into a series of multi-byte characters. 20*9a4a12bdSRobert Mustacchi * We may be given a surrogate value, so we need to potentially store that in 21*9a4a12bdSRobert Mustacchi * the interim. 22*9a4a12bdSRobert Mustacchi */ 23*9a4a12bdSRobert Mustacchi 24*9a4a12bdSRobert Mustacchi #include <uchar.h> 25*9a4a12bdSRobert Mustacchi #include <errno.h> 26*9a4a12bdSRobert Mustacchi #include "mblocal.h" 27*9a4a12bdSRobert Mustacchi #include "unicode.h" 28*9a4a12bdSRobert Mustacchi 29*9a4a12bdSRobert Mustacchi static mbstate_t c16rtomb_state; 30*9a4a12bdSRobert Mustacchi 31*9a4a12bdSRobert Mustacchi size_t 32*9a4a12bdSRobert Mustacchi c16rtomb(char *restrict str, char16_t c16, mbstate_t *restrict ps) 33*9a4a12bdSRobert Mustacchi { 34*9a4a12bdSRobert Mustacchi char32_t c32; 35*9a4a12bdSRobert Mustacchi _CHAR16State *c16s; 36*9a4a12bdSRobert Mustacchi 37*9a4a12bdSRobert Mustacchi if (ps == NULL) { 38*9a4a12bdSRobert Mustacchi ps = &c16rtomb_state; 39*9a4a12bdSRobert Mustacchi } 40*9a4a12bdSRobert Mustacchi 41*9a4a12bdSRobert Mustacchi if (str == NULL) { 42*9a4a12bdSRobert Mustacchi c16 = L'\0'; 43*9a4a12bdSRobert Mustacchi } 44*9a4a12bdSRobert Mustacchi 45*9a4a12bdSRobert Mustacchi c16s = (_CHAR16State *)ps; 46*9a4a12bdSRobert Mustacchi if (c16s->c16_surrogate != 0) { 47*9a4a12bdSRobert Mustacchi if (c16 > UNICODE_SUR_MAX || c16 < UNICODE_SUR_MIN || 48*9a4a12bdSRobert Mustacchi (c16 & UNICODE_SUR_LOWER) != UNICODE_SUR_LOWER) { 49*9a4a12bdSRobert Mustacchi errno = EILSEQ; 50*9a4a12bdSRobert Mustacchi return ((size_t)-1); 51*9a4a12bdSRobert Mustacchi } 52*9a4a12bdSRobert Mustacchi 53*9a4a12bdSRobert Mustacchi c32 = UNICODE_SUR_UVALUE(c16s->c16_surrogate) | 54*9a4a12bdSRobert Mustacchi UNICODE_SUR_LVALUE(c16); 55*9a4a12bdSRobert Mustacchi c32 += UNICODE_SUP_START; 56*9a4a12bdSRobert Mustacchi c16s->c16_surrogate = 0; 57*9a4a12bdSRobert Mustacchi } else if (c16 >= UNICODE_SUR_MIN && c16 <= UNICODE_SUR_MAX) { 58*9a4a12bdSRobert Mustacchi /* 59*9a4a12bdSRobert Mustacchi * The lower surrogate pair mask (dc00) overlaps the upper mask 60*9a4a12bdSRobert Mustacchi * (d800), hence why we do a binary and with the upper mask. 61*9a4a12bdSRobert Mustacchi */ 62*9a4a12bdSRobert Mustacchi if ((c16 & UNICODE_SUR_LOWER) != UNICODE_SUR_UPPER) { 63*9a4a12bdSRobert Mustacchi errno = EILSEQ; 64*9a4a12bdSRobert Mustacchi return ((size_t)-1); 65*9a4a12bdSRobert Mustacchi } 66*9a4a12bdSRobert Mustacchi 67*9a4a12bdSRobert Mustacchi c16s->c16_surrogate = c16; 68*9a4a12bdSRobert Mustacchi return (0); 69*9a4a12bdSRobert Mustacchi } else { 70*9a4a12bdSRobert Mustacchi c32 = c16; 71*9a4a12bdSRobert Mustacchi } 72*9a4a12bdSRobert Mustacchi 73*9a4a12bdSRobert Mustacchi /* 74*9a4a12bdSRobert Mustacchi * Call c32rtomb() and not wcrtomb() so that way all of the unicode code 75*9a4a12bdSRobert Mustacchi * point validation is performed. 76*9a4a12bdSRobert Mustacchi */ 77*9a4a12bdSRobert Mustacchi return (c32rtomb(str, c32, ps)); 78*9a4a12bdSRobert Mustacchi } 79