xref: /freebsd/lib/libc/tests/locale/mbrtoc16_test.c (revision 448897d366c218f9fd6208427eef1e6dd51f78d0)
1 /*-
2  * Copyright (c) 2002 Tim J. Robbins
3  * All rights reserved.
4  *
5  * Copyright (c) 2013 Ed Schouten <ed@FreeBSD.org>
6  * All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27  * SUCH DAMAGE.
28  */
29 /*
30  * Test program for mbrtoc16() as specified by ISO/IEC 9899:2011.
31  */
32 
33 #include <sys/cdefs.h>
34 __FBSDID("$FreeBSD$");
35 
36 #include <errno.h>
37 #include <limits.h>
38 #include <locale.h>
39 #include <stdio.h>
40 #include <string.h>
41 #include <uchar.h>
42 
43 #include <atf-c.h>
44 
45 static void
46 require_lc_ctype(const char *locale_name)
47 {
48 	char *lc_ctype_set;
49 
50 	lc_ctype_set = setlocale(LC_CTYPE, locale_name);
51 	if (lc_ctype_set == NULL)
52 		atf_tc_fail("setlocale(LC_CTYPE, \"%s\") failed; errno=%d",
53 		    locale_name, errno);
54 
55 	ATF_REQUIRE(strcmp(lc_ctype_set, locale_name) == 0);
56 }
57 
58 static mbstate_t s;
59 static char16_t c16;
60 
61 ATF_TC_WITHOUT_HEAD(mbrtoc16_c_locale_test);
62 ATF_TC_BODY(mbrtoc16_c_locale_test, tc)
63 {
64 
65 	require_lc_ctype("C");
66 
67 	/* Null wide character, internal state. */
68 	ATF_REQUIRE(mbrtoc16(&c16, "", 1, NULL) == 0);
69 	ATF_REQUIRE(c16 == 0);
70 
71 	/* Null wide character. */
72 	memset(&s, 0, sizeof(s));
73 	ATF_REQUIRE(mbrtoc16(&c16, "", 1, &s) == 0);
74 	ATF_REQUIRE(c16 == 0);
75 
76 	/* Latin letter A, internal state. */
77 	ATF_REQUIRE(mbrtoc16(NULL, 0, 0, NULL) == 0);
78 	ATF_REQUIRE(mbrtoc16(&c16, "A", 1, NULL) == 1);
79 	ATF_REQUIRE(c16 == L'A');
80 
81 	/* Latin letter A. */
82 	memset(&s, 0, sizeof(s));
83 	ATF_REQUIRE(mbrtoc16(&c16, "A", 1, &s) == 1);
84 	ATF_REQUIRE(c16 == L'A');
85 
86 	/* Incomplete character sequence. */
87 	c16 = L'z';
88 	memset(&s, 0, sizeof(s));
89 	ATF_REQUIRE(mbrtoc16(&c16, "", 0, &s) == (size_t)-2);
90 	ATF_REQUIRE(c16 == L'z');
91 
92 	/* Check that mbrtoc16() doesn't access the buffer when n == 0. */
93 	c16 = L'z';
94 	memset(&s, 0, sizeof(s));
95 	ATF_REQUIRE(mbrtoc16(&c16, "", 0, &s) == (size_t)-2);
96 	ATF_REQUIRE(c16 == L'z');
97 
98 	/* Check that mbrtoc16() doesn't read ahead too aggressively. */
99 	memset(&s, 0, sizeof(s));
100 	ATF_REQUIRE(mbrtoc16(&c16, "AB", 2, &s) == 1);
101 	ATF_REQUIRE(c16 == L'A');
102 	ATF_REQUIRE(mbrtoc16(&c16, "C", 1, &s) == 1);
103 	ATF_REQUIRE(c16 == L'C');
104 
105 }
106 
107 ATF_TC_WITHOUT_HEAD(mbrtoc16_iso_8859_1_test);
108 ATF_TC_BODY(mbrtoc16_iso_8859_1_test, tc)
109 {
110 
111 	require_lc_ctype("en_US.ISO8859-1");
112 
113 	/* Currency sign. */
114 	memset(&s, 0, sizeof(s));
115 	ATF_REQUIRE(mbrtoc16(&c16, "\xa4", 1, &s) == 1);
116 	ATF_REQUIRE(c16 == 0xa4);
117 }
118 
119 ATF_TC_WITHOUT_HEAD(mbrtoc16_iso_8859_15_test);
120 ATF_TC_BODY(mbrtoc16_iso_8859_15_test, tc)
121 {
122 
123 	require_lc_ctype("en_US.ISO8859-15");
124 
125 	/* Euro sign. */
126 	memset(&s, 0, sizeof(s));
127 	ATF_REQUIRE(mbrtoc16(&c16, "\xa4", 1, &s) == 1);
128 	ATF_REQUIRE(c16 == 0x20ac);
129 }
130 
131 ATF_TC_WITHOUT_HEAD(mbrtoc16_utf_8_test);
132 ATF_TC_BODY(mbrtoc16_utf_8_test, tc)
133 {
134 
135 	require_lc_ctype("en_US.UTF-8");
136 
137 	/* Null wide character, internal state. */
138 	ATF_REQUIRE(mbrtoc16(NULL, 0, 0, NULL) == 0);
139 	ATF_REQUIRE(mbrtoc16(&c16, "", 1, NULL) == 0);
140 	ATF_REQUIRE(c16 == 0);
141 
142 	/* Null wide character. */
143 	memset(&s, 0, sizeof(s));
144 	ATF_REQUIRE(mbrtoc16(&c16, "", 1, &s) == 0);
145 	ATF_REQUIRE(c16 == 0);
146 
147 	/* Latin letter A, internal state. */
148 	ATF_REQUIRE(mbrtoc16(NULL, 0, 0, NULL) == 0);
149 	ATF_REQUIRE(mbrtoc16(&c16, "A", 1, NULL) == 1);
150 	ATF_REQUIRE(c16 == L'A');
151 
152 	/* Latin letter A. */
153 	memset(&s, 0, sizeof(s));
154 	ATF_REQUIRE(mbrtoc16(&c16, "A", 1, &s) == 1);
155 	ATF_REQUIRE(c16 == L'A');
156 
157 	/* Incomplete character sequence (zero length). */
158 	c16 = L'z';
159 	memset(&s, 0, sizeof(s));
160 	ATF_REQUIRE(mbrtoc16(&c16, "", 0, &s) == (size_t)-2);
161 	ATF_REQUIRE(c16 == L'z');
162 
163 	/* Incomplete character sequence (truncated double-byte). */
164 	memset(&s, 0, sizeof(s));
165 	c16 = 0;
166 	ATF_REQUIRE(mbrtoc16(&c16, "\xc3", 1, &s) == (size_t)-2);
167 
168 	/* Same as above, but complete. */
169 	memset(&s, 0, sizeof(s));
170 	c16 = 0;
171 	ATF_REQUIRE(mbrtoc16(&c16, "\xc3\x84", 2, &s) == 2);
172 	ATF_REQUIRE(c16 == 0xc4);
173 
174 	/* Test restarting behaviour. */
175 	memset(&s, 0, sizeof(s));
176 	c16 = 0;
177 	ATF_REQUIRE(mbrtoc16(&c16, "\xc3", 1, &s) == (size_t)-2);
178 	ATF_REQUIRE(c16 == 0);
179 	ATF_REQUIRE(mbrtoc16(&c16, "\xb7", 1, &s) == 1);
180 	ATF_REQUIRE(c16 == 0xf7);
181 
182 	/* Surrogate pair. */
183 	memset(&s, 0, sizeof(s));
184 	c16 = 0;
185 	ATF_REQUIRE(mbrtoc16(&c16, "\xf0\x9f\x92\xa9", 4, &s) == 4);
186 	ATF_REQUIRE(c16 == 0xd83d);
187 	ATF_REQUIRE(mbrtoc16(&c16, "", 0, &s) == (size_t)-3);
188 	ATF_REQUIRE(c16 == 0xdca9);
189 
190 	/* Letter e with acute, precomposed. */
191 	memset(&s, 0, sizeof(s));
192 	c16 = 0;
193 	ATF_REQUIRE(mbrtoc16(&c16, "\xc3\xa9", 2, &s) == 2);
194 	ATF_REQUIRE(c16 == 0xe9);
195 
196 	/* Letter e with acute, combined. */
197 	memset(&s, 0, sizeof(s));
198 	c16 = 0;
199 	ATF_REQUIRE(mbrtoc16(&c16, "\x65\xcc\x81", 3, &s) == 1);
200 	ATF_REQUIRE(c16 == 0x65);
201 	ATF_REQUIRE(mbrtoc16(&c16, "\xcc\x81", 2, &s) == 2);
202 	ATF_REQUIRE(c16 == 0x301);
203 }
204 
205 ATF_TP_ADD_TCS(tp)
206 {
207 
208 	ATF_TP_ADD_TC(tp, mbrtoc16_c_locale_test);
209 	ATF_TP_ADD_TC(tp, mbrtoc16_iso_8859_1_test);
210 	ATF_TP_ADD_TC(tp, mbrtoc16_iso_8859_15_test);
211 	ATF_TP_ADD_TC(tp, mbrtoc16_utf_8_test);
212 
213 	return (atf_no_error());
214 }
215