xref: /titanic_50/usr/src/lib/libc/port/locale/c16rtomb.c (revision 3fc10f8cbc2fd5dd5cd13044edf9cb68a1ef422b)
1*3fc10f8cSRobert Mustacchi /*
2*3fc10f8cSRobert Mustacchi  * This file and its contents are supplied under the terms of the
3*3fc10f8cSRobert Mustacchi  * Common Development and Distribution License ("CDDL"), version 1.0.
4*3fc10f8cSRobert Mustacchi  * You may only use this file in accordance with the terms of version
5*3fc10f8cSRobert Mustacchi  * 1.0 of the CDDL.
6*3fc10f8cSRobert Mustacchi  *
7*3fc10f8cSRobert Mustacchi  * A full copy of the text of the CDDL should have accompanied this
8*3fc10f8cSRobert Mustacchi  * source.  A copy of the CDDL is also available via the Internet at
9*3fc10f8cSRobert Mustacchi  * http://www.illumos.org/license/CDDL.
10*3fc10f8cSRobert Mustacchi  */
11*3fc10f8cSRobert Mustacchi 
12*3fc10f8cSRobert Mustacchi /*
13*3fc10f8cSRobert Mustacchi  * Copyright 2020 Robert Mustacchi
14*3fc10f8cSRobert Mustacchi  */
15*3fc10f8cSRobert Mustacchi 
16*3fc10f8cSRobert Mustacchi /*
17*3fc10f8cSRobert Mustacchi  * C11 c16rtomb(3C) support.
18*3fc10f8cSRobert Mustacchi  *
19*3fc10f8cSRobert Mustacchi  * Convert a series of char16_t values into a series of multi-byte characters.
20*3fc10f8cSRobert Mustacchi  * We may be given a surrogate value, so we need to potentially store that in
21*3fc10f8cSRobert Mustacchi  * the interim.
22*3fc10f8cSRobert Mustacchi  */
23*3fc10f8cSRobert Mustacchi 
24*3fc10f8cSRobert Mustacchi #include <uchar.h>
25*3fc10f8cSRobert Mustacchi #include <errno.h>
26*3fc10f8cSRobert Mustacchi #include "mblocal.h"
27*3fc10f8cSRobert Mustacchi #include "unicode.h"
28*3fc10f8cSRobert Mustacchi 
29*3fc10f8cSRobert Mustacchi static mbstate_t c16rtomb_state;
30*3fc10f8cSRobert Mustacchi 
31*3fc10f8cSRobert Mustacchi size_t
c16rtomb(char * restrict str,char16_t c16,mbstate_t * restrict ps)32*3fc10f8cSRobert Mustacchi c16rtomb(char *restrict str, char16_t c16, mbstate_t *restrict ps)
33*3fc10f8cSRobert Mustacchi {
34*3fc10f8cSRobert Mustacchi 	char32_t c32;
35*3fc10f8cSRobert Mustacchi 	_CHAR16State *c16s;
36*3fc10f8cSRobert Mustacchi 
37*3fc10f8cSRobert Mustacchi 	if (ps == NULL) {
38*3fc10f8cSRobert Mustacchi 		ps = &c16rtomb_state;
39*3fc10f8cSRobert Mustacchi 	}
40*3fc10f8cSRobert Mustacchi 
41*3fc10f8cSRobert Mustacchi 	if (str == NULL) {
42*3fc10f8cSRobert Mustacchi 		c16 = L'\0';
43*3fc10f8cSRobert Mustacchi 	}
44*3fc10f8cSRobert Mustacchi 
45*3fc10f8cSRobert Mustacchi 	c16s = (_CHAR16State *)ps;
46*3fc10f8cSRobert Mustacchi 	if (c16s->c16_surrogate != 0) {
47*3fc10f8cSRobert Mustacchi 		if (c16 > UNICODE_SUR_MAX || c16 < UNICODE_SUR_MIN ||
48*3fc10f8cSRobert Mustacchi 		    (c16 & UNICODE_SUR_LOWER) != UNICODE_SUR_LOWER) {
49*3fc10f8cSRobert Mustacchi 			errno = EILSEQ;
50*3fc10f8cSRobert Mustacchi 			return ((size_t)-1);
51*3fc10f8cSRobert Mustacchi 		}
52*3fc10f8cSRobert Mustacchi 
53*3fc10f8cSRobert Mustacchi 		c32 = UNICODE_SUR_UVALUE(c16s->c16_surrogate) |
54*3fc10f8cSRobert Mustacchi 		    UNICODE_SUR_LVALUE(c16);
55*3fc10f8cSRobert Mustacchi 		c32 += UNICODE_SUP_START;
56*3fc10f8cSRobert Mustacchi 		c16s->c16_surrogate = 0;
57*3fc10f8cSRobert Mustacchi 	} else if (c16 >= UNICODE_SUR_MIN && c16 <= UNICODE_SUR_MAX) {
58*3fc10f8cSRobert Mustacchi 		/*
59*3fc10f8cSRobert Mustacchi 		 * The lower surrogate pair mask (dc00) overlaps the upper mask
60*3fc10f8cSRobert Mustacchi 		 * (d800), hence why we do a binary and with the upper mask.
61*3fc10f8cSRobert Mustacchi 		 */
62*3fc10f8cSRobert Mustacchi 		if ((c16 & UNICODE_SUR_LOWER) != UNICODE_SUR_UPPER) {
63*3fc10f8cSRobert Mustacchi 			errno = EILSEQ;
64*3fc10f8cSRobert Mustacchi 			return ((size_t)-1);
65*3fc10f8cSRobert Mustacchi 		}
66*3fc10f8cSRobert Mustacchi 
67*3fc10f8cSRobert Mustacchi 		c16s->c16_surrogate = c16;
68*3fc10f8cSRobert Mustacchi 		return (0);
69*3fc10f8cSRobert Mustacchi 	} else {
70*3fc10f8cSRobert Mustacchi 		c32 = c16;
71*3fc10f8cSRobert Mustacchi 	}
72*3fc10f8cSRobert Mustacchi 
73*3fc10f8cSRobert Mustacchi 	/*
74*3fc10f8cSRobert Mustacchi 	 * Call c32rtomb() and not wcrtomb() so that way all of the unicode code
75*3fc10f8cSRobert Mustacchi 	 * point validation is performed.
76*3fc10f8cSRobert Mustacchi 	 */
77*3fc10f8cSRobert Mustacchi 	return (c32rtomb(str, c32, ps));
78*3fc10f8cSRobert Mustacchi }
79