1 /*-
2 * Copyright (c) 2016 Baptiste Daroussin <bapt@FreeBSD.org>
3 * Copyright 2016 Tom Lane <tgl@sss.pgh.pa.us>
4 * Copyright 2017 Nexenta Systems, Inc.
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26 * SUCH DAMAGE.
27 */
28
29 #include <wchar.h>
30 #include <locale.h>
31 #include <stdlib.h>
32 #include <time.h>
33 #include <errno.h>
34
35 #include <atf-c.h>
36
37 static int
cmp(const void * a,const void * b)38 cmp(const void *a, const void *b)
39 {
40 const wchar_t wa[2] = { *(const wchar_t *)a, 0 };
41 const wchar_t wb[2] = { *(const wchar_t *)b, 0 };
42
43 return (wcscoll(wa, wb));
44 }
45
46 ATF_TC_WITHOUT_HEAD(russian_collation);
ATF_TC_BODY(russian_collation,tc)47 ATF_TC_BODY(russian_collation, tc)
48 {
49 wchar_t c[] = L"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyzЁАБВГДЕЖЗИЙКЛМНОПРСТУФХЦЧШЩЪЫЬЭЮЯабвгдежзийклмнопрстуфхцчшщъыьэюяё";
50 wchar_t res[] = L"aAbBcCdDeEfFgGhHiIjJkKlLmMnNoOpPqQrRsStTuUvVwWxXyYzZаАбБвВгГдДеЕёЁжЖзЗиИйЙкКлЛмМнНоОпПрРсСтТуУфФхХцЦчЧшШщЩъЪыЫьЬэЭюЮяЯ";
51
52 ATF_CHECK_MSG(setlocale(LC_ALL, "ru_RU.UTF-8") != NULL,
53 "Fail to set locale to \"ru_RU.UTF-8\"");
54 qsort(c, wcslen(c), sizeof(wchar_t), cmp);
55 ATF_CHECK_MSG(wcscmp(c, res) == 0,
56 "Bad collation, expected: '%ls' got '%ls'", res, c);
57 }
58
59 #define NSTRINGS 2000
60 #define MAXSTRLEN 20
61 #define MAXXFRMLEN (MAXSTRLEN * 20)
62
63 typedef struct {
64 char sval[MAXSTRLEN];
65 char xval[MAXXFRMLEN];
66 } cstr;
67
68 ATF_TC_WITHOUT_HEAD(strcoll_vs_strxfrm);
ATF_TC_BODY(strcoll_vs_strxfrm,tc)69 ATF_TC_BODY(strcoll_vs_strxfrm, tc)
70 {
71 cstr data[NSTRINGS];
72 char *curloc;
73 int i, j;
74
75 curloc = setlocale(LC_ALL, "en_US.UTF-8");
76 ATF_CHECK_MSG(curloc != NULL, "Fail to set locale");
77
78 /* Ensure new random() values on every run */
79 srandom((unsigned int) time(NULL));
80
81 /* Generate random UTF8 strings of length less than MAXSTRLEN bytes */
82 for (i = 0; i < NSTRINGS; i++) {
83 char *p;
84 int len;
85
86 again:
87 p = data[i].sval;
88 len = 1 + (random() % (MAXSTRLEN - 1));
89 while (len > 0) {
90 int c;
91 /*
92 * Generate random printable char in ISO8859-1 range.
93 * Bias towards producing a lot of spaces.
94 */
95
96 if ((random() % 16) < 3) {
97 c = ' ';
98 } else {
99 do {
100 c = random() & 0xFF;
101 } while (!((c >= ' ' && c <= 127) ||
102 (c >= 0xA0 && c <= 0xFF)));
103 }
104
105 if (c <= 127) {
106 *p++ = c;
107 len--;
108 } else {
109 if (len < 2)
110 break;
111 /* Poor man's utf8-ification */
112 *p++ = 0xC0 + (c >> 6);
113 len--;
114 *p++ = 0x80 + (c & 0x3F);
115 len--;
116 }
117 }
118 *p = '\0';
119 /* strxfrm() each string as we produce it */
120 errno = 0;
121 ATF_CHECK_MSG(strxfrm(data[i].xval, data[i].sval,
122 MAXXFRMLEN) < MAXXFRMLEN, "strxfrm() result for %d-length "
123 " string exceeded %d bytes", (int)strlen(data[i].sval),
124 MAXXFRMLEN);
125
126 /*
127 * Amend strxfrm() failing on certain characters to be fixed and
128 * test later
129 */
130 if (errno != 0)
131 goto again;
132 }
133
134 for (i = 0; i < NSTRINGS; i++) {
135 for (j = 0; j < NSTRINGS; j++) {
136 int sr = strcoll(data[i].sval, data[j].sval);
137 int sx = strcmp(data[i].xval, data[j].xval);
138
139 ATF_CHECK_MSG(!((sr * sx < 0) ||
140 (sr * sx == 0 && sr + sx != 0)),
141 "%s: diff for \"%s\" and \"%s\"",
142 curloc, data[i].sval, data[j].sval);
143 }
144 }
145 }
146
ATF_TP_ADD_TCS(tp)147 ATF_TP_ADD_TCS(tp)
148 {
149 ATF_TP_ADD_TC(tp, russian_collation);
150 ATF_TP_ADD_TC(tp, strcoll_vs_strxfrm);
151
152 return (atf_no_error());
153 }
154