xref: /freebsd/lib/libc/tests/locale/mbrtoc16_test.c (revision 525fe93dc7487a1e63a90f6a2b956abc601963c1)
1 /*-
2  * Copyright (c) 2002 Tim J. Robbins
3  * All rights reserved.
4  *
5  * Copyright (c) 2013 Ed Schouten <ed@FreeBSD.org>
6  * All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27  * SUCH DAMAGE.
28  */
29 /*
30  * Test program for mbrtoc16() as specified by ISO/IEC 9899:2011.
31  */
32 
33 #include <errno.h>
34 #include <limits.h>
35 #include <locale.h>
36 #include <stdio.h>
37 #include <string.h>
38 #include <uchar.h>
39 
40 #include <atf-c.h>
41 
42 static void
43 require_lc_ctype(const char *locale_name)
44 {
45 	char *lc_ctype_set;
46 
47 	lc_ctype_set = setlocale(LC_CTYPE, locale_name);
48 	if (lc_ctype_set == NULL)
49 		atf_tc_fail("setlocale(LC_CTYPE, \"%s\") failed; errno=%d",
50 		    locale_name, errno);
51 
52 	ATF_REQUIRE(strcmp(lc_ctype_set, locale_name) == 0);
53 }
54 
55 static mbstate_t s;
56 static char16_t c16;
57 
58 ATF_TC_WITHOUT_HEAD(mbrtoc16_c_locale_test);
59 ATF_TC_BODY(mbrtoc16_c_locale_test, tc)
60 {
61 
62 	require_lc_ctype("C");
63 
64 	/* Null wide character, internal state. */
65 	ATF_REQUIRE(mbrtoc16(&c16, "", 1, NULL) == 0);
66 	ATF_REQUIRE(c16 == 0);
67 
68 	/* Null wide character. */
69 	memset(&s, 0, sizeof(s));
70 	ATF_REQUIRE(mbrtoc16(&c16, "", 1, &s) == 0);
71 	ATF_REQUIRE(c16 == 0);
72 
73 	/* Latin letter A, internal state. */
74 	ATF_REQUIRE(mbrtoc16(NULL, 0, 0, NULL) == 0);
75 	ATF_REQUIRE(mbrtoc16(&c16, "A", 1, NULL) == 1);
76 	ATF_REQUIRE(c16 == L'A');
77 
78 	/* Latin letter A. */
79 	memset(&s, 0, sizeof(s));
80 	ATF_REQUIRE(mbrtoc16(&c16, "A", 1, &s) == 1);
81 	ATF_REQUIRE(c16 == L'A');
82 
83 	/* Incomplete character sequence. */
84 	c16 = L'z';
85 	memset(&s, 0, sizeof(s));
86 	ATF_REQUIRE(mbrtoc16(&c16, "", 0, &s) == (size_t)-2);
87 	ATF_REQUIRE(c16 == L'z');
88 
89 	/* Check that mbrtoc16() doesn't access the buffer when n == 0. */
90 	c16 = L'z';
91 	memset(&s, 0, sizeof(s));
92 	ATF_REQUIRE(mbrtoc16(&c16, "", 0, &s) == (size_t)-2);
93 	ATF_REQUIRE(c16 == L'z');
94 
95 	/* Check that mbrtoc16() doesn't read ahead too aggressively. */
96 	memset(&s, 0, sizeof(s));
97 	ATF_REQUIRE(mbrtoc16(&c16, "AB", 2, &s) == 1);
98 	ATF_REQUIRE(c16 == L'A');
99 	ATF_REQUIRE(mbrtoc16(&c16, "C", 1, &s) == 1);
100 	ATF_REQUIRE(c16 == L'C');
101 
102 }
103 
104 ATF_TC_WITHOUT_HEAD(mbrtoc16_iso_8859_1_test);
105 ATF_TC_BODY(mbrtoc16_iso_8859_1_test, tc)
106 {
107 
108 	require_lc_ctype("en_US.ISO8859-1");
109 
110 	/* Currency sign. */
111 	memset(&s, 0, sizeof(s));
112 	ATF_REQUIRE(mbrtoc16(&c16, "\xa4", 1, &s) == 1);
113 	ATF_REQUIRE(c16 == 0xa4);
114 }
115 
116 ATF_TC_WITHOUT_HEAD(mbrtoc16_iso_8859_15_test);
117 ATF_TC_BODY(mbrtoc16_iso_8859_15_test, tc)
118 {
119 
120 	require_lc_ctype("en_US.ISO8859-15");
121 
122 	/* Euro sign. */
123 	memset(&s, 0, sizeof(s));
124 	ATF_REQUIRE(mbrtoc16(&c16, "\xa4", 1, &s) == 1);
125 	ATF_REQUIRE(c16 == 0x20ac);
126 }
127 
128 ATF_TC_WITHOUT_HEAD(mbrtoc16_utf_8_test);
129 ATF_TC_BODY(mbrtoc16_utf_8_test, tc)
130 {
131 
132 	require_lc_ctype("en_US.UTF-8");
133 
134 	/* Null wide character, internal state. */
135 	ATF_REQUIRE(mbrtoc16(NULL, 0, 0, NULL) == 0);
136 	ATF_REQUIRE(mbrtoc16(&c16, "", 1, NULL) == 0);
137 	ATF_REQUIRE(c16 == 0);
138 
139 	/* Null wide character. */
140 	memset(&s, 0, sizeof(s));
141 	ATF_REQUIRE(mbrtoc16(&c16, "", 1, &s) == 0);
142 	ATF_REQUIRE(c16 == 0);
143 
144 	/* Latin letter A, internal state. */
145 	ATF_REQUIRE(mbrtoc16(NULL, 0, 0, NULL) == 0);
146 	ATF_REQUIRE(mbrtoc16(&c16, "A", 1, NULL) == 1);
147 	ATF_REQUIRE(c16 == L'A');
148 
149 	/* Latin letter A. */
150 	memset(&s, 0, sizeof(s));
151 	ATF_REQUIRE(mbrtoc16(&c16, "A", 1, &s) == 1);
152 	ATF_REQUIRE(c16 == L'A');
153 
154 	/* Incomplete character sequence (zero length). */
155 	c16 = L'z';
156 	memset(&s, 0, sizeof(s));
157 	ATF_REQUIRE(mbrtoc16(&c16, "", 0, &s) == (size_t)-2);
158 	ATF_REQUIRE(c16 == L'z');
159 
160 	/* Incomplete character sequence (truncated double-byte). */
161 	memset(&s, 0, sizeof(s));
162 	c16 = 0;
163 	ATF_REQUIRE(mbrtoc16(&c16, "\xc3", 1, &s) == (size_t)-2);
164 
165 	/* Same as above, but complete. */
166 	memset(&s, 0, sizeof(s));
167 	c16 = 0;
168 	ATF_REQUIRE(mbrtoc16(&c16, "\xc3\x84", 2, &s) == 2);
169 	ATF_REQUIRE(c16 == 0xc4);
170 
171 	/* Test restarting behaviour. */
172 	memset(&s, 0, sizeof(s));
173 	c16 = 0;
174 	ATF_REQUIRE(mbrtoc16(&c16, "\xc3", 1, &s) == (size_t)-2);
175 	ATF_REQUIRE(c16 == 0);
176 	ATF_REQUIRE(mbrtoc16(&c16, "\xb7", 1, &s) == 1);
177 	ATF_REQUIRE(c16 == 0xf7);
178 
179 	/* Surrogate pair. */
180 	memset(&s, 0, sizeof(s));
181 	c16 = 0;
182 	ATF_REQUIRE(mbrtoc16(&c16, "\xf0\x9f\x92\xa9", 4, &s) == 4);
183 	ATF_REQUIRE(c16 == 0xd83d);
184 	ATF_REQUIRE(mbrtoc16(&c16, "", 0, &s) == (size_t)-3);
185 	ATF_REQUIRE(c16 == 0xdca9);
186 
187 	/* Letter e with acute, precomposed. */
188 	memset(&s, 0, sizeof(s));
189 	c16 = 0;
190 	ATF_REQUIRE(mbrtoc16(&c16, "\xc3\xa9", 2, &s) == 2);
191 	ATF_REQUIRE(c16 == 0xe9);
192 
193 	/* Letter e with acute, combined. */
194 	memset(&s, 0, sizeof(s));
195 	c16 = 0;
196 	ATF_REQUIRE(mbrtoc16(&c16, "\x65\xcc\x81", 3, &s) == 1);
197 	ATF_REQUIRE(c16 == 0x65);
198 	ATF_REQUIRE(mbrtoc16(&c16, "\xcc\x81", 2, &s) == 2);
199 	ATF_REQUIRE(c16 == 0x301);
200 }
201 
202 ATF_TP_ADD_TCS(tp)
203 {
204 
205 	ATF_TP_ADD_TC(tp, mbrtoc16_c_locale_test);
206 	ATF_TP_ADD_TC(tp, mbrtoc16_iso_8859_1_test);
207 	ATF_TP_ADD_TC(tp, mbrtoc16_iso_8859_15_test);
208 	ATF_TP_ADD_TC(tp, mbrtoc16_utf_8_test);
209 
210 	return (atf_no_error());
211 }
212