1 /*
2 * This file and its contents are supplied under the terms of the
3 * Common Development and Distribution License ("CDDL"), version 1.0.
4 * You may only use this file in accordance with the terms of version
5 * 1.0 of the CDDL.
6 *
7 * A full copy of the text of the CDDL should have accompanied this
8 * source. A copy of the CDDL is also available via the Internet at
9 * http://www.illumos.org/license/CDDL.
10 */
11
12 /*
13 * Copyright 2020 Robert Mustacchi
14 */
15
16 /*
17 * C11 mbrtoc16(3C) support.
18 *
19 * The char16_t represents a UTF-16 encoding. This means that we have to deal
20 * with surrogate pairs.
21 */
22
23 #include <locale.h>
24 #include <wchar.h>
25 #include <xlocale.h>
26 #include <uchar.h>
27 #include "mblocal.h"
28 #include "unicode.h"
29
30 #include <sys/debug.h>
31
32 /*
33 * Ensure that we never cause our save state to ever exceed that of the
34 * mbstate_t. See the block comment in mblocal.h.
35 */
36 CTASSERT(sizeof (_CHAR16State) <= sizeof (mbstate_t));
37
38 static mbstate_t mbrtoc16_state;
39
40 size_t
mbrtoc16(char16_t * restrict pc16,const char * restrict str,size_t len,mbstate_t * restrict ps)41 mbrtoc16(char16_t *restrict pc16, const char *restrict str, size_t len,
42 mbstate_t *restrict ps)
43 {
44 wchar_t wc;
45 size_t ret;
46 char16_t out;
47 _CHAR16State *c16s;
48
49 if (ps == NULL) {
50 ps = &mbrtoc16_state;
51 }
52
53 if (str == NULL) {
54 pc16 = NULL;
55 str = "";
56 len = 1;
57 }
58
59 c16s = (_CHAR16State *)ps;
60 if (c16s->c16_surrogate != 0) {
61 if (pc16 != NULL) {
62 *pc16 = c16s->c16_surrogate;
63 }
64 c16s->c16_surrogate = 0;
65 return ((size_t)-3);
66 }
67
68 ret = mbrtowc_l(&wc, str, len, ps, uselocale(NULL));
69 if ((ssize_t)ret < 0) {
70 return (ret);
71 }
72
73 /*
74 * If this character is not in the basic multilingual plane then we need
75 * a surrogate character to represent it in UTF-16 and we will need to
76 * write that out on the next iteration.
77 */
78 if (wc >= UNICODE_SUP_START) {
79 wc -= UNICODE_SUP_START;
80 c16s->c16_surrogate = UNICODE_SUR_LOWER | UNICODE_SUR_LMASK(wc);
81 out = UNICODE_SUR_UPPER | UNICODE_SUR_UMASK(wc);
82 } else {
83 out = (char16_t)wc;
84 }
85
86 if (pc16 != NULL) {
87 *pc16 = out;
88 }
89
90 return (ret);
91 }
92