1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause 3 * 4 * Copyright (C) 2012 Oleg Moskalenko <mom040267@gmail.com> 5 * Copyright (C) 2012 Gabor Kovesdan <gabor@FreeBSD.org> 6 * All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27 * SUCH DAMAGE. 28 */ 29 30 #include <sys/types.h> 31 32 #include <ctype.h> 33 #include <stdlib.h> 34 #include <string.h> 35 36 #include "sort.h" 37 #include "vsort.h" 38 39 static inline bool 40 isdigit_clocale(wchar_t c) 41 { 42 43 return (c >= L'0' && c <= L'9'); 44 } 45 46 static inline bool 47 isalpha_clocale(wchar_t c) 48 { 49 50 return ((c >= L'a' && c <= L'z') || (c >= L'A' && c <= L'Z')); 51 } 52 53 static inline bool 54 isalnum_clocale(wchar_t c) 55 { 56 57 return ((c >= L'a' && c <= L'z') || (c >= L'A' && c <= L'Z') || 58 (c >= L'0' && c <= L'9')); 59 } 60 61 /* 62 * Find string suffix of format: (\.[A-Za-z~][A-Za-z0-9~]*)*$ 63 * Set length of string before suffix. 64 */ 65 static void 66 find_suffix(bwstring_iterator si, bwstring_iterator se, size_t *len) 67 { 68 wchar_t c; 69 size_t clen; 70 bool expect_alpha, sfx; 71 72 sfx = false; 73 expect_alpha = false; 74 *len = 0; 75 clen = 0; 76 77 while ((si < se) && (c = bws_get_iter_value(si))) { 78 if (expect_alpha) { 79 expect_alpha = false; 80 if (!isalpha_clocale(c) && (c != L'~')) 81 sfx = false; 82 } else if (c == L'.') { 83 expect_alpha = true; 84 if (!sfx) { 85 sfx = true; 86 *len = clen; 87 } 88 } else if (!isalnum_clocale(c) && (c != L'~')) 89 sfx = false; 90 91 si = bws_iterator_inc(si, 1); 92 ++clen; 93 } 94 95 /* This code must be here to make the implementation compatible 96 * with WORDING of GNU sort documentation. 97 * But the GNU sort implementation is not following its own 98 * documentation. GNU sort allows empty file extensions 99 * (just dot with nothing after); but the regular expression in 100 * their documentation does not allow empty file extensions. 101 * We chose to make our implementation compatible with GNU sort 102 * implementation. If they will ever fix their bug, this code 103 * must be uncommented. Or they may choose to fix the info page, 104 * then the code stays commented. 105 * 106 if (expect_alpha) 107 sfx = false; 108 */ 109 110 if (!sfx) 111 *len = clen; 112 } 113 114 static inline int 115 cmp_chars(wchar_t c1, wchar_t c2) 116 { 117 118 if (c1 == c2) 119 return (0); 120 121 if (c1 == L'~') 122 return (-1); 123 if (c2 == L'~') 124 return (+1); 125 126 if (isdigit_clocale(c1) || !c1) 127 return ((isdigit_clocale(c2) || !c2) ? 0 : -1); 128 129 if (isdigit_clocale(c2) || !c2) 130 return (+1); 131 132 if (isalpha_clocale(c1)) 133 return ((isalpha_clocale(c2)) ? ((int) c1 - (int) c2) : -1); 134 135 if (isalpha_clocale(c2)) 136 return (+1); 137 138 return ((int) c1 - (int) c2); 139 } 140 141 static int 142 cmpversions(bwstring_iterator si1, bwstring_iterator se1, 143 bwstring_iterator si2, bwstring_iterator se2) 144 { 145 int cmp, diff; 146 147 while ((si1 < se1) || (si2 < se2)) { 148 diff = 0; 149 150 while (((si1 < se1) && 151 !isdigit_clocale(bws_get_iter_value(si1))) || 152 ((si2 < se2) && !isdigit_clocale(bws_get_iter_value(si2)))) { 153 wchar_t c1, c2; 154 155 c1 = (si1 < se1) ? bws_get_iter_value(si1) : 0; 156 c2 = (si2 < se2) ? bws_get_iter_value(si2) : 0; 157 158 cmp = cmp_chars(c1, c2); 159 if (cmp) 160 return (cmp); 161 162 if (si1 < se1) 163 si1 = bws_iterator_inc(si1, 1); 164 if (si2 < se2) 165 si2 = bws_iterator_inc(si2, 1); 166 } 167 168 while (bws_get_iter_value(si1) == L'0') 169 si1 = bws_iterator_inc(si1, 1); 170 171 while (bws_get_iter_value(si2) == L'0') 172 si2 = bws_iterator_inc(si2, 1); 173 174 while (isdigit_clocale(bws_get_iter_value(si1)) && 175 isdigit_clocale(bws_get_iter_value(si2))) { 176 if (!diff) 177 diff = ((int)bws_get_iter_value(si1) - 178 (int)bws_get_iter_value(si2)); 179 si1 = bws_iterator_inc(si1, 1); 180 si2 = bws_iterator_inc(si2, 1); 181 } 182 183 if (isdigit_clocale(bws_get_iter_value(si1))) 184 return (1); 185 186 if (isdigit_clocale(bws_get_iter_value(si2))) 187 return (-1); 188 189 if (diff) 190 return (diff); 191 } 192 193 return (0); 194 } 195 196 /* 197 * Compare two version strings 198 */ 199 int 200 vcmp(struct bwstring *s1, struct bwstring *s2) 201 { 202 bwstring_iterator si1, si2; 203 wchar_t c1, c2; 204 size_t len1, len2, slen1, slen2; 205 int cmp_bytes, cmp_res; 206 207 if (s1 == s2) 208 return (0); 209 210 cmp_bytes = bwscmp(s1, s2, 0); 211 if (cmp_bytes == 0) 212 return (0); 213 214 len1 = slen1 = BWSLEN(s1); 215 len2 = slen2 = BWSLEN(s2); 216 217 if (slen1 < 1) 218 return (-1); 219 if (slen2 < 1) 220 return (+1); 221 222 si1 = bws_begin(s1); 223 si2 = bws_begin(s2); 224 225 c1 = bws_get_iter_value(si1); 226 c2 = bws_get_iter_value(si2); 227 228 if (c1 == L'.' && (slen1 == 1)) 229 return (-1); 230 231 if (c2 == L'.' && (slen2 == 1)) 232 return (+1); 233 234 if (slen1 == 2 && c1 == L'.' && 235 bws_get_iter_value(bws_iterator_inc(si1, 1)) == L'.') 236 return (-1); 237 if (slen2 == 2 && c2 == L'.' && 238 bws_get_iter_value(bws_iterator_inc(si2, 1)) == L'.') 239 return (+1); 240 241 if (c1 == L'.' && c2 != L'.') 242 return (-1); 243 if (c1 != L'.' && c2 == L'.') 244 return (+1); 245 246 if (c1 == L'.' && c2 == L'.') { 247 si1 = bws_iterator_inc(si1, 1); 248 si2 = bws_iterator_inc(si2, 1); 249 } 250 251 find_suffix(si1, bws_end(s1), &len1); 252 find_suffix(si2, bws_end(s2), &len2); 253 254 if ((len1 == len2) && (bws_iterator_cmp(si1, si2, len1) == 0)) 255 return (cmp_bytes); 256 257 cmp_res = cmpversions(si1, bws_iterator_inc(si1, len1), si2, 258 bws_iterator_inc(si2, len2)); 259 260 if (cmp_res == 0) 261 cmp_res = cmp_bytes; 262 263 return (cmp_res); 264 } 265