1 /*- 2 * Copyright (c) 2016 Baptiste Daroussin <bapt@FreeBSD.org> 3 * Copyright 2016 Tom Lane <tgl@sss.pgh.pa.us> 4 * Copyright 2017 Nexenta Systems, Inc. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 */ 28 29 #include <sys/cdefs.h> 30 #include <wchar.h> 31 #include <locale.h> 32 #include <stdlib.h> 33 #include <time.h> 34 #include <errno.h> 35 36 #include <atf-c.h> 37 38 static int 39 cmp(const void *a, const void *b) 40 { 41 const wchar_t wa[2] = { *(const wchar_t *)a, 0 }; 42 const wchar_t wb[2] = { *(const wchar_t *)b, 0 }; 43 44 return (wcscoll(wa, wb)); 45 } 46 47 ATF_TC_WITHOUT_HEAD(russian_collation); 48 ATF_TC_BODY(russian_collation, tc) 49 { 50 wchar_t c[] = L"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyzЁАБВГДЕЖЗИЙКЛМНОПРСТУФХЦЧШЩЪЫЬЭЮЯабвгдежзийклмнопрстуфхцчшщъыьэюяё"; 51 wchar_t res[] = L"aAbBcCdDeEfFgGhHiIjJkKlLmMnNoOpPqQrRsStTuUvVwWxXyYzZаАбБвВгГдДеЕёЁжЖзЗиИйЙкКлЛмМнНоОпПрРсСтТуУфФхХцЦчЧшШщЩъЪыЫьЬэЭюЮяЯ"; 52 53 ATF_CHECK_MSG(setlocale(LC_ALL, "ru_RU.UTF-8") != NULL, 54 "Fail to set locale to \"ru_RU.UTF-8\""); 55 qsort(c, wcslen(c), sizeof(wchar_t), cmp); 56 ATF_CHECK_MSG(wcscmp(c, res) == 0, 57 "Bad collation, expected: '%ls' got '%ls'", res, c); 58 } 59 60 #define NSTRINGS 2000 61 #define MAXSTRLEN 20 62 #define MAXXFRMLEN (MAXSTRLEN * 20) 63 64 typedef struct { 65 char sval[MAXSTRLEN]; 66 char xval[MAXXFRMLEN]; 67 } cstr; 68 69 ATF_TC_WITHOUT_HEAD(strcoll_vs_strxfrm); 70 ATF_TC_BODY(strcoll_vs_strxfrm, tc) 71 { 72 cstr data[NSTRINGS]; 73 char *curloc; 74 int i, j; 75 76 curloc = setlocale(LC_ALL, "en_US.UTF-8"); 77 ATF_CHECK_MSG(curloc != NULL, "Fail to set locale"); 78 79 /* Ensure new random() values on every run */ 80 srandom((unsigned int) time(NULL)); 81 82 /* Generate random UTF8 strings of length less than MAXSTRLEN bytes */ 83 for (i = 0; i < NSTRINGS; i++) { 84 char *p; 85 int len; 86 87 again: 88 p = data[i].sval; 89 len = 1 + (random() % (MAXSTRLEN - 1)); 90 while (len > 0) { 91 int c; 92 /* 93 * Generate random printable char in ISO8859-1 range. 94 * Bias towards producing a lot of spaces. 95 */ 96 97 if ((random() % 16) < 3) { 98 c = ' '; 99 } else { 100 do { 101 c = random() & 0xFF; 102 } while (!((c >= ' ' && c <= 127) || 103 (c >= 0xA0 && c <= 0xFF))); 104 } 105 106 if (c <= 127) { 107 *p++ = c; 108 len--; 109 } else { 110 if (len < 2) 111 break; 112 /* Poor man's utf8-ification */ 113 *p++ = 0xC0 + (c >> 6); 114 len--; 115 *p++ = 0x80 + (c & 0x3F); 116 len--; 117 } 118 } 119 *p = '\0'; 120 /* strxfrm() each string as we produce it */ 121 errno = 0; 122 ATF_CHECK_MSG(strxfrm(data[i].xval, data[i].sval, 123 MAXXFRMLEN) < MAXXFRMLEN, "strxfrm() result for %d-length " 124 " string exceeded %d bytes", (int)strlen(data[i].sval), 125 MAXXFRMLEN); 126 127 /* 128 * Amend strxfrm() failing on certain characters to be fixed and 129 * test later 130 */ 131 if (errno != 0) 132 goto again; 133 } 134 135 for (i = 0; i < NSTRINGS; i++) { 136 for (j = 0; j < NSTRINGS; j++) { 137 int sr = strcoll(data[i].sval, data[j].sval); 138 int sx = strcmp(data[i].xval, data[j].xval); 139 140 ATF_CHECK_MSG(!((sr * sx < 0) || 141 (sr * sx == 0 && sr + sx != 0)), 142 "%s: diff for \"%s\" and \"%s\"", 143 curloc, data[i].sval, data[j].sval); 144 } 145 } 146 } 147 148 ATF_TP_ADD_TCS(tp) 149 { 150 ATF_TP_ADD_TC(tp, russian_collation); 151 ATF_TP_ADD_TC(tp, strcoll_vs_strxfrm); 152 153 return (atf_no_error()); 154 } 155