1 /*- 2 * Copyright (c) 2016 Baptiste Daroussin <bapt@FreeBSD.org> 3 * Copyright 2016 Tom Lane <tgl@sss.pgh.pa.us> 4 * Copyright 2017 Nexenta Systems, Inc. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 */ 28 29 #include <wchar.h> 30 #include <locale.h> 31 #include <stdlib.h> 32 #include <time.h> 33 #include <errno.h> 34 35 #include <atf-c.h> 36 37 static int 38 cmp(const void *a, const void *b) 39 { 40 const wchar_t wa[2] = { *(const wchar_t *)a, 0 }; 41 const wchar_t wb[2] = { *(const wchar_t *)b, 0 }; 42 43 return (wcscoll(wa, wb)); 44 } 45 46 ATF_TC_WITHOUT_HEAD(russian_collation); 47 ATF_TC_BODY(russian_collation, tc) 48 { 49 wchar_t c[] = L"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyzЁАБВГДЕЖЗИЙКЛМНОПРСТУФХЦЧШЩЪЫЬЭЮЯабвгдежзийклмнопрстуфхцчшщъыьэюяё"; 50 wchar_t res[] = L"aAbBcCdDeEfFgGhHiIjJkKlLmMnNoOpPqQrRsStTuUvVwWxXyYzZаАбБвВгГдДеЕёЁжЖзЗиИйЙкКлЛмМнНоОпПрРсСтТуУфФхХцЦчЧшШщЩъЪыЫьЬэЭюЮяЯ"; 51 52 ATF_CHECK_MSG(setlocale(LC_ALL, "ru_RU.UTF-8") != NULL, 53 "Fail to set locale to \"ru_RU.UTF-8\""); 54 qsort(c, wcslen(c), sizeof(wchar_t), cmp); 55 ATF_CHECK_MSG(wcscmp(c, res) == 0, 56 "Bad collation, expected: '%ls' got '%ls'", res, c); 57 } 58 59 #define NSTRINGS 2000 60 #define MAXSTRLEN 20 61 #define MAXXFRMLEN (MAXSTRLEN * 20) 62 63 typedef struct { 64 char sval[MAXSTRLEN]; 65 char xval[MAXXFRMLEN]; 66 } cstr; 67 68 ATF_TC_WITHOUT_HEAD(strcoll_vs_strxfrm); 69 ATF_TC_BODY(strcoll_vs_strxfrm, tc) 70 { 71 cstr data[NSTRINGS]; 72 char *curloc; 73 int i, j; 74 75 curloc = setlocale(LC_ALL, "en_US.UTF-8"); 76 ATF_CHECK_MSG(curloc != NULL, "Fail to set locale"); 77 78 /* Ensure new random() values on every run */ 79 srandom((unsigned int) time(NULL)); 80 81 /* Generate random UTF8 strings of length less than MAXSTRLEN bytes */ 82 for (i = 0; i < NSTRINGS; i++) { 83 char *p; 84 int len; 85 86 again: 87 p = data[i].sval; 88 len = 1 + (random() % (MAXSTRLEN - 1)); 89 while (len > 0) { 90 int c; 91 /* 92 * Generate random printable char in ISO8859-1 range. 93 * Bias towards producing a lot of spaces. 94 */ 95 96 if ((random() % 16) < 3) { 97 c = ' '; 98 } else { 99 do { 100 c = random() & 0xFF; 101 } while (!((c >= ' ' && c <= 127) || 102 (c >= 0xA0 && c <= 0xFF))); 103 } 104 105 if (c <= 127) { 106 *p++ = c; 107 len--; 108 } else { 109 if (len < 2) 110 break; 111 /* Poor man's utf8-ification */ 112 *p++ = 0xC0 + (c >> 6); 113 len--; 114 *p++ = 0x80 + (c & 0x3F); 115 len--; 116 } 117 } 118 *p = '\0'; 119 /* strxfrm() each string as we produce it */ 120 errno = 0; 121 ATF_CHECK_MSG(strxfrm(data[i].xval, data[i].sval, 122 MAXXFRMLEN) < MAXXFRMLEN, "strxfrm() result for %d-length " 123 " string exceeded %d bytes", (int)strlen(data[i].sval), 124 MAXXFRMLEN); 125 126 /* 127 * Amend strxfrm() failing on certain characters to be fixed and 128 * test later 129 */ 130 if (errno != 0) 131 goto again; 132 } 133 134 for (i = 0; i < NSTRINGS; i++) { 135 for (j = 0; j < NSTRINGS; j++) { 136 int sr = strcoll(data[i].sval, data[j].sval); 137 int sx = strcmp(data[i].xval, data[j].xval); 138 139 ATF_CHECK_MSG(!((sr * sx < 0) || 140 (sr * sx == 0 && sr + sx != 0)), 141 "%s: diff for \"%s\" and \"%s\"", 142 curloc, data[i].sval, data[j].sval); 143 } 144 } 145 } 146 147 ATF_TP_ADD_TCS(tp) 148 { 149 ATF_TP_ADD_TC(tp, russian_collation); 150 ATF_TP_ADD_TC(tp, strcoll_vs_strxfrm); 151 152 return (atf_no_error()); 153 } 154