xref: /freebsd/lib/libc/tests/locale/mbrtoc16_test.c (revision 22cf89c938886d14f5796fc49f9f020c23ea8eaf)
1 /*-
2  * Copyright (c) 2002 Tim J. Robbins
3  * All rights reserved.
4  *
5  * Copyright (c) 2013 Ed Schouten <ed@FreeBSD.org>
6  * All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27  * SUCH DAMAGE.
28  */
29 /*
30  * Test program for mbrtoc16() as specified by ISO/IEC 9899:2011.
31  */
32 
33 #include <sys/cdefs.h>
34 #include <errno.h>
35 #include <limits.h>
36 #include <locale.h>
37 #include <stdio.h>
38 #include <string.h>
39 #include <uchar.h>
40 
41 #include <atf-c.h>
42 
43 static void
44 require_lc_ctype(const char *locale_name)
45 {
46 	char *lc_ctype_set;
47 
48 	lc_ctype_set = setlocale(LC_CTYPE, locale_name);
49 	if (lc_ctype_set == NULL)
50 		atf_tc_fail("setlocale(LC_CTYPE, \"%s\") failed; errno=%d",
51 		    locale_name, errno);
52 
53 	ATF_REQUIRE(strcmp(lc_ctype_set, locale_name) == 0);
54 }
55 
56 static mbstate_t s;
57 static char16_t c16;
58 
59 ATF_TC_WITHOUT_HEAD(mbrtoc16_c_locale_test);
60 ATF_TC_BODY(mbrtoc16_c_locale_test, tc)
61 {
62 
63 	require_lc_ctype("C");
64 
65 	/* Null wide character, internal state. */
66 	ATF_REQUIRE(mbrtoc16(&c16, "", 1, NULL) == 0);
67 	ATF_REQUIRE(c16 == 0);
68 
69 	/* Null wide character. */
70 	memset(&s, 0, sizeof(s));
71 	ATF_REQUIRE(mbrtoc16(&c16, "", 1, &s) == 0);
72 	ATF_REQUIRE(c16 == 0);
73 
74 	/* Latin letter A, internal state. */
75 	ATF_REQUIRE(mbrtoc16(NULL, 0, 0, NULL) == 0);
76 	ATF_REQUIRE(mbrtoc16(&c16, "A", 1, NULL) == 1);
77 	ATF_REQUIRE(c16 == L'A');
78 
79 	/* Latin letter A. */
80 	memset(&s, 0, sizeof(s));
81 	ATF_REQUIRE(mbrtoc16(&c16, "A", 1, &s) == 1);
82 	ATF_REQUIRE(c16 == L'A');
83 
84 	/* Incomplete character sequence. */
85 	c16 = L'z';
86 	memset(&s, 0, sizeof(s));
87 	ATF_REQUIRE(mbrtoc16(&c16, "", 0, &s) == (size_t)-2);
88 	ATF_REQUIRE(c16 == L'z');
89 
90 	/* Check that mbrtoc16() doesn't access the buffer when n == 0. */
91 	c16 = L'z';
92 	memset(&s, 0, sizeof(s));
93 	ATF_REQUIRE(mbrtoc16(&c16, "", 0, &s) == (size_t)-2);
94 	ATF_REQUIRE(c16 == L'z');
95 
96 	/* Check that mbrtoc16() doesn't read ahead too aggressively. */
97 	memset(&s, 0, sizeof(s));
98 	ATF_REQUIRE(mbrtoc16(&c16, "AB", 2, &s) == 1);
99 	ATF_REQUIRE(c16 == L'A');
100 	ATF_REQUIRE(mbrtoc16(&c16, "C", 1, &s) == 1);
101 	ATF_REQUIRE(c16 == L'C');
102 
103 }
104 
105 ATF_TC_WITHOUT_HEAD(mbrtoc16_iso_8859_1_test);
106 ATF_TC_BODY(mbrtoc16_iso_8859_1_test, tc)
107 {
108 
109 	require_lc_ctype("en_US.ISO8859-1");
110 
111 	/* Currency sign. */
112 	memset(&s, 0, sizeof(s));
113 	ATF_REQUIRE(mbrtoc16(&c16, "\xa4", 1, &s) == 1);
114 	ATF_REQUIRE(c16 == 0xa4);
115 }
116 
117 ATF_TC_WITHOUT_HEAD(mbrtoc16_iso_8859_15_test);
118 ATF_TC_BODY(mbrtoc16_iso_8859_15_test, tc)
119 {
120 
121 	require_lc_ctype("en_US.ISO8859-15");
122 
123 	/* Euro sign. */
124 	memset(&s, 0, sizeof(s));
125 	ATF_REQUIRE(mbrtoc16(&c16, "\xa4", 1, &s) == 1);
126 	ATF_REQUIRE(c16 == 0x20ac);
127 }
128 
129 ATF_TC_WITHOUT_HEAD(mbrtoc16_utf_8_test);
130 ATF_TC_BODY(mbrtoc16_utf_8_test, tc)
131 {
132 
133 	require_lc_ctype("en_US.UTF-8");
134 
135 	/* Null wide character, internal state. */
136 	ATF_REQUIRE(mbrtoc16(NULL, 0, 0, NULL) == 0);
137 	ATF_REQUIRE(mbrtoc16(&c16, "", 1, NULL) == 0);
138 	ATF_REQUIRE(c16 == 0);
139 
140 	/* Null wide character. */
141 	memset(&s, 0, sizeof(s));
142 	ATF_REQUIRE(mbrtoc16(&c16, "", 1, &s) == 0);
143 	ATF_REQUIRE(c16 == 0);
144 
145 	/* Latin letter A, internal state. */
146 	ATF_REQUIRE(mbrtoc16(NULL, 0, 0, NULL) == 0);
147 	ATF_REQUIRE(mbrtoc16(&c16, "A", 1, NULL) == 1);
148 	ATF_REQUIRE(c16 == L'A');
149 
150 	/* Latin letter A. */
151 	memset(&s, 0, sizeof(s));
152 	ATF_REQUIRE(mbrtoc16(&c16, "A", 1, &s) == 1);
153 	ATF_REQUIRE(c16 == L'A');
154 
155 	/* Incomplete character sequence (zero length). */
156 	c16 = L'z';
157 	memset(&s, 0, sizeof(s));
158 	ATF_REQUIRE(mbrtoc16(&c16, "", 0, &s) == (size_t)-2);
159 	ATF_REQUIRE(c16 == L'z');
160 
161 	/* Incomplete character sequence (truncated double-byte). */
162 	memset(&s, 0, sizeof(s));
163 	c16 = 0;
164 	ATF_REQUIRE(mbrtoc16(&c16, "\xc3", 1, &s) == (size_t)-2);
165 
166 	/* Same as above, but complete. */
167 	memset(&s, 0, sizeof(s));
168 	c16 = 0;
169 	ATF_REQUIRE(mbrtoc16(&c16, "\xc3\x84", 2, &s) == 2);
170 	ATF_REQUIRE(c16 == 0xc4);
171 
172 	/* Test restarting behaviour. */
173 	memset(&s, 0, sizeof(s));
174 	c16 = 0;
175 	ATF_REQUIRE(mbrtoc16(&c16, "\xc3", 1, &s) == (size_t)-2);
176 	ATF_REQUIRE(c16 == 0);
177 	ATF_REQUIRE(mbrtoc16(&c16, "\xb7", 1, &s) == 1);
178 	ATF_REQUIRE(c16 == 0xf7);
179 
180 	/* Surrogate pair. */
181 	memset(&s, 0, sizeof(s));
182 	c16 = 0;
183 	ATF_REQUIRE(mbrtoc16(&c16, "\xf0\x9f\x92\xa9", 4, &s) == 4);
184 	ATF_REQUIRE(c16 == 0xd83d);
185 	ATF_REQUIRE(mbrtoc16(&c16, "", 0, &s) == (size_t)-3);
186 	ATF_REQUIRE(c16 == 0xdca9);
187 
188 	/* Letter e with acute, precomposed. */
189 	memset(&s, 0, sizeof(s));
190 	c16 = 0;
191 	ATF_REQUIRE(mbrtoc16(&c16, "\xc3\xa9", 2, &s) == 2);
192 	ATF_REQUIRE(c16 == 0xe9);
193 
194 	/* Letter e with acute, combined. */
195 	memset(&s, 0, sizeof(s));
196 	c16 = 0;
197 	ATF_REQUIRE(mbrtoc16(&c16, "\x65\xcc\x81", 3, &s) == 1);
198 	ATF_REQUIRE(c16 == 0x65);
199 	ATF_REQUIRE(mbrtoc16(&c16, "\xcc\x81", 2, &s) == 2);
200 	ATF_REQUIRE(c16 == 0x301);
201 }
202 
203 ATF_TP_ADD_TCS(tp)
204 {
205 
206 	ATF_TP_ADD_TC(tp, mbrtoc16_c_locale_test);
207 	ATF_TP_ADD_TC(tp, mbrtoc16_iso_8859_1_test);
208 	ATF_TP_ADD_TC(tp, mbrtoc16_iso_8859_15_test);
209 	ATF_TP_ADD_TC(tp, mbrtoc16_utf_8_test);
210 
211 	return (atf_no_error());
212 }
213