1 /*- 2 * Copyright (c) 2016 Baptiste Daroussin <bapt@FreeBSD.org> 3 * Copyright 2016 Tom Lane <tgl@sss.pgh.pa.us> 4 * Copyright 2017 Nexenta Systems, Inc. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 */ 28 29 #include <sys/cdefs.h> 30 __FBSDID("$FreeBSD$"); 31 32 #include <wchar.h> 33 #include <locale.h> 34 #include <stdlib.h> 35 #include <time.h> 36 #include <errno.h> 37 38 #include <atf-c.h> 39 40 static int 41 cmp(const void *a, const void *b) 42 { 43 const wchar_t wa[2] = { *(const wchar_t *)a, 0 }; 44 const wchar_t wb[2] = { *(const wchar_t *)b, 0 }; 45 46 return (wcscoll(wa, wb)); 47 } 48 49 ATF_TC_WITHOUT_HEAD(russian_collation); 50 ATF_TC_BODY(russian_collation, tc) 51 { 52 wchar_t c[] = L"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyzЁАБВГДЕЖЗИЙКЛМНОПРСТУФХЦЧШЩЪЫЬЭЮЯабвгдежзийклмнопрстуфхцчшщъыьэюяё"; 53 wchar_t res[] = L"aAbBcCdDeEfFgGhHiIjJkKlLmMnNoOpPqQrRsStTuUvVwWxXyYzZаАбБвВгГдДеЕёЁжЖзЗиИйЙкКлЛмМнНоОпПрРсСтТуУфФхХцЦчЧшШщЩъЪыЫьЬэЭюЮяЯ"; 54 55 ATF_CHECK_MSG(setlocale(LC_ALL, "ru_RU.UTF-8") != NULL, 56 "Fail to set locale to \"ru_RU.UTF-8\""); 57 qsort(c, wcslen(c), sizeof(wchar_t), cmp); 58 ATF_CHECK_MSG(wcscmp(c, res) == 0, 59 "Bad collation, expected: '%ls' got '%ls'", res, c); 60 } 61 62 #define NSTRINGS 2000 63 #define MAXSTRLEN 20 64 #define MAXXFRMLEN (MAXSTRLEN * 20) 65 66 typedef struct { 67 char sval[MAXSTRLEN]; 68 char xval[MAXXFRMLEN]; 69 } cstr; 70 71 ATF_TC_WITHOUT_HEAD(strcoll_vs_strxfrm); 72 ATF_TC_BODY(strcoll_vs_strxfrm, tc) 73 { 74 cstr data[NSTRINGS]; 75 char *curloc; 76 int i, j; 77 78 curloc = setlocale(LC_ALL, "en_US.UTF-8"); 79 ATF_CHECK_MSG(curloc != NULL, "Fail to set locale"); 80 81 /* Ensure new random() values on every run */ 82 srandom((unsigned int) time(NULL)); 83 84 /* Generate random UTF8 strings of length less than MAXSTRLEN bytes */ 85 for (i = 0; i < NSTRINGS; i++) { 86 char *p; 87 int len; 88 89 again: 90 p = data[i].sval; 91 len = 1 + (random() % (MAXSTRLEN - 1)); 92 while (len > 0) { 93 int c; 94 /* 95 * Generate random printable char in ISO8859-1 range. 96 * Bias towards producing a lot of spaces. 97 */ 98 99 if ((random() % 16) < 3) { 100 c = ' '; 101 } else { 102 do { 103 c = random() & 0xFF; 104 } while (!((c >= ' ' && c <= 127) || 105 (c >= 0xA0 && c <= 0xFF))); 106 } 107 108 if (c <= 127) { 109 *p++ = c; 110 len--; 111 } else { 112 if (len < 2) 113 break; 114 /* Poor man's utf8-ification */ 115 *p++ = 0xC0 + (c >> 6); 116 len--; 117 *p++ = 0x80 + (c & 0x3F); 118 len--; 119 } 120 } 121 *p = '\0'; 122 /* strxfrm() each string as we produce it */ 123 errno = 0; 124 ATF_CHECK_MSG(strxfrm(data[i].xval, data[i].sval, 125 MAXXFRMLEN) < MAXXFRMLEN, "strxfrm() result for %d-length " 126 " string exceeded %d bytes", (int)strlen(data[i].sval), 127 MAXXFRMLEN); 128 129 /* 130 * Amend strxfrm() failing on certain characters to be fixed and 131 * test later 132 */ 133 if (errno != 0) 134 goto again; 135 } 136 137 for (i = 0; i < NSTRINGS; i++) { 138 for (j = 0; j < NSTRINGS; j++) { 139 int sr = strcoll(data[i].sval, data[j].sval); 140 int sx = strcmp(data[i].xval, data[j].xval); 141 142 ATF_CHECK_MSG(!((sr * sx < 0) || 143 (sr * sx == 0 && sr + sx != 0)), 144 "%s: diff for \"%s\" and \"%s\"", 145 curloc, data[i].sval, data[j].sval); 146 } 147 } 148 } 149 150 ATF_TP_ADD_TCS(tp) 151 { 152 ATF_TP_ADD_TC(tp, russian_collation); 153 ATF_TP_ADD_TC(tp, strcoll_vs_strxfrm); 154 155 return (atf_no_error()); 156 } 157