xref: /freebsd/lib/libc/tests/string/wcscoll_test.c (revision 22cf89c938886d14f5796fc49f9f020c23ea8eaf)
1 /*-
2  * Copyright (c) 2016 Baptiste Daroussin <bapt@FreeBSD.org>
3  * Copyright 2016 Tom Lane <tgl@sss.pgh.pa.us>
4  * Copyright 2017 Nexenta Systems, Inc.
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26  * SUCH DAMAGE.
27  */
28 
29 #include <sys/cdefs.h>
30 #include <wchar.h>
31 #include <locale.h>
32 #include <stdlib.h>
33 #include <time.h>
34 #include <errno.h>
35 
36 #include <atf-c.h>
37 
38 static int
39 cmp(const void *a, const void *b)
40 {
41 	const wchar_t wa[2] = { *(const wchar_t *)a, 0 };
42 	const wchar_t wb[2] = { *(const wchar_t *)b, 0 };
43 
44 	return (wcscoll(wa, wb));
45 }
46 
47 ATF_TC_WITHOUT_HEAD(russian_collation);
48 ATF_TC_BODY(russian_collation, tc)
49 {
50 	wchar_t c[] = L"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyzЁАБВГДЕЖЗИЙКЛМНОПРСТУФХЦЧШЩЪЫЬЭЮЯабвгдежзийклмнопрстуфхцчшщъыьэюяё";
51 	wchar_t res[] = L"aAbBcCdDeEfFgGhHiIjJkKlLmMnNoOpPqQrRsStTuUvVwWxXyYzZаАбБвВгГдДеЕёЁжЖзЗиИйЙкКлЛмМнНоОпПрРсСтТуУфФхХцЦчЧшШщЩъЪыЫьЬэЭюЮяЯ";
52 
53 	ATF_CHECK_MSG(setlocale(LC_ALL, "ru_RU.UTF-8") != NULL,
54 	    "Fail to set locale to \"ru_RU.UTF-8\"");
55 	qsort(c, wcslen(c), sizeof(wchar_t), cmp);
56 	ATF_CHECK_MSG(wcscmp(c, res) == 0,
57 	    "Bad collation, expected: '%ls' got '%ls'", res, c);
58 }
59 
60 #define	NSTRINGS 2000
61 #define	MAXSTRLEN 20
62 #define	MAXXFRMLEN (MAXSTRLEN * 20)
63 
64 typedef struct {
65 	char	sval[MAXSTRLEN];
66 	char	xval[MAXXFRMLEN];
67 } cstr;
68 
69 ATF_TC_WITHOUT_HEAD(strcoll_vs_strxfrm);
70 ATF_TC_BODY(strcoll_vs_strxfrm, tc)
71 {
72 	cstr	data[NSTRINGS];
73 	char	*curloc;
74 	int	i, j;
75 
76 	curloc = setlocale(LC_ALL, "en_US.UTF-8");
77 	ATF_CHECK_MSG(curloc != NULL, "Fail to set locale");
78 
79 	/* Ensure new random() values on every run */
80 	srandom((unsigned int) time(NULL));
81 
82 	/* Generate random UTF8 strings of length less than MAXSTRLEN bytes */
83 	for (i = 0; i < NSTRINGS; i++) {
84 		char	*p;
85 		int	len;
86 
87 again:
88 		p = data[i].sval;
89 		len = 1 + (random() % (MAXSTRLEN - 1));
90 		while (len > 0) {
91 			int c;
92 			/*
93 			 * Generate random printable char in ISO8859-1 range.
94 			 * Bias towards producing a lot of spaces.
95 			 */
96 
97 			if ((random() % 16) < 3) {
98 				c = ' ';
99 			} else {
100 				do {
101 					c = random() & 0xFF;
102 				} while (!((c >= ' ' && c <= 127) ||
103 				    (c >= 0xA0 && c <= 0xFF)));
104 			}
105 
106 			if (c <= 127) {
107 				*p++ = c;
108 				len--;
109 			} else {
110 				if (len < 2)
111 					break;
112 				/* Poor man's utf8-ification */
113 				*p++ = 0xC0 + (c >> 6);
114 				len--;
115 				*p++ = 0x80 + (c & 0x3F);
116 				len--;
117 			}
118 		}
119 		*p = '\0';
120 		/* strxfrm() each string as we produce it */
121 		errno = 0;
122 		ATF_CHECK_MSG(strxfrm(data[i].xval, data[i].sval,
123 		    MAXXFRMLEN) < MAXXFRMLEN, "strxfrm() result for %d-length "
124 		    " string exceeded %d bytes", (int)strlen(data[i].sval),
125 		    MAXXFRMLEN);
126 
127 		/*
128 		 * Amend strxfrm() failing on certain characters to be fixed and
129 		 * test later
130 		 */
131 		if (errno != 0)
132 			goto again;
133 	}
134 
135 	for (i = 0; i < NSTRINGS; i++) {
136 		for (j = 0; j < NSTRINGS; j++) {
137 			int sr = strcoll(data[i].sval, data[j].sval);
138 			int sx = strcmp(data[i].xval, data[j].xval);
139 
140 			ATF_CHECK_MSG(!((sr * sx < 0) ||
141 			    (sr * sx == 0 && sr + sx != 0)),
142 			    "%s: diff for \"%s\" and \"%s\"",
143 			    curloc, data[i].sval, data[j].sval);
144 		}
145 	}
146 }
147 
148 ATF_TP_ADD_TCS(tp)
149 {
150 	ATF_TP_ADD_TC(tp, russian_collation);
151 	ATF_TP_ADD_TC(tp, strcoll_vs_strxfrm);
152 
153 	return (atf_no_error());
154 }
155