xref: /illumos-gate/usr/src/lib/libc/port/locale/c16rtomb.c (revision 78801af7286cd73dbc996d470f789e75993cf15d)
1 /*
2  * This file and its contents are supplied under the terms of the
3  * Common Development and Distribution License ("CDDL"), version 1.0.
4  * You may only use this file in accordance with the terms of version
5  * 1.0 of the CDDL.
6  *
7  * A full copy of the text of the CDDL should have accompanied this
8  * source.  A copy of the CDDL is also available via the Internet at
9  * http://www.illumos.org/license/CDDL.
10  */
11 
12 /*
13  * Copyright 2020 Robert Mustacchi
14  */
15 
16 /*
17  * C11 c16rtomb(3C) support.
18  *
19  * Convert a series of char16_t values into a series of multi-byte characters.
20  * We may be given a surrogate value, so we need to potentially store that in
21  * the interim.
22  */
23 
24 #include <uchar.h>
25 #include <errno.h>
26 #include "mblocal.h"
27 #include "unicode.h"
28 
29 static mbstate_t c16rtomb_state;
30 
31 size_t
32 c16rtomb(char *restrict str, char16_t c16, mbstate_t *restrict ps)
33 {
34 	char32_t c32;
35 	_CHAR16State *c16s;
36 
37 	if (ps == NULL) {
38 		ps = &c16rtomb_state;
39 	}
40 
41 	if (str == NULL) {
42 		c16 = L'\0';
43 	}
44 
45 	c16s = (_CHAR16State *)ps;
46 	if (c16s->c16_surrogate != 0) {
47 		if (c16 > UNICODE_SUR_MAX || c16 < UNICODE_SUR_MIN ||
48 		    (c16 & UNICODE_SUR_LOWER) != UNICODE_SUR_LOWER) {
49 			errno = EILSEQ;
50 			return ((size_t)-1);
51 		}
52 
53 		c32 = UNICODE_SUR_UVALUE(c16s->c16_surrogate) |
54 		    UNICODE_SUR_LVALUE(c16);
55 		c32 += UNICODE_SUP_START;
56 		c16s->c16_surrogate = 0;
57 	} else if (c16 >= UNICODE_SUR_MIN && c16 <= UNICODE_SUR_MAX) {
58 		/*
59 		 * The lower surrogate pair mask (dc00) overlaps the upper mask
60 		 * (d800), hence why we do a binary and with the upper mask.
61 		 */
62 		if ((c16 & UNICODE_SUR_LOWER) != UNICODE_SUR_UPPER) {
63 			errno = EILSEQ;
64 			return ((size_t)-1);
65 		}
66 
67 		c16s->c16_surrogate = c16;
68 		return (0);
69 	} else {
70 		c32 = c16;
71 	}
72 
73 	/*
74 	 * Call c32rtomb() and not wcrtomb() so that way all of the unicode code
75 	 * point validation is performed.
76 	 */
77 	return (c32rtomb(str, c32, ps));
78 }
79