1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD 3 * 4 * Copyright (C) 2012 Oleg Moskalenko <mom040267@gmail.com> 5 * Copyright (C) 2012 Gabor Kovesdan <gabor@FreeBSD.org> 6 * All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27 * SUCH DAMAGE. 28 */ 29 30 #include <sys/cdefs.h> 31 __FBSDID("$FreeBSD$"); 32 33 #include <sys/types.h> 34 35 #include <ctype.h> 36 #include <stdlib.h> 37 #include <string.h> 38 39 #include "sort.h" 40 #include "vsort.h" 41 42 static inline bool 43 isdigit_clocale(wchar_t c) 44 { 45 46 return (c >= L'0' && c <= L'9'); 47 } 48 49 static inline bool 50 isalpha_clocale(wchar_t c) 51 { 52 53 return ((c >= L'a' && c <= L'z') || (c >= L'A' && c <= L'Z')); 54 } 55 56 static inline bool 57 isalnum_clocale(wchar_t c) 58 { 59 60 return ((c >= L'a' && c <= L'z') || (c >= L'A' && c <= L'Z') || 61 (c >= L'0' && c <= L'9')); 62 } 63 64 /* 65 * Find string suffix of format: (\.[A-Za-z~][A-Za-z0-9~]*)*$ 66 * Set length of string before suffix. 67 */ 68 static void 69 find_suffix(bwstring_iterator si, bwstring_iterator se, size_t *len) 70 { 71 wchar_t c; 72 size_t clen; 73 bool expect_alpha, sfx; 74 75 sfx = false; 76 expect_alpha = false; 77 *len = 0; 78 clen = 0; 79 80 while ((si < se) && (c = bws_get_iter_value(si))) { 81 if (expect_alpha) { 82 expect_alpha = false; 83 if (!isalpha_clocale(c) && (c != L'~')) 84 sfx = false; 85 } else if (c == L'.') { 86 expect_alpha = true; 87 if (!sfx) { 88 sfx = true; 89 *len = clen; 90 } 91 } else if (!isalnum_clocale(c) && (c != L'~')) 92 sfx = false; 93 94 si = bws_iterator_inc(si, 1); 95 ++clen; 96 } 97 98 /* This code must be here to make the implementation compatible 99 * with WORDING of GNU sort documentation. 100 * But the GNU sort implementation is not following its own 101 * documentation. GNU sort allows empty file extensions 102 * (just dot with nothing after); but the regular expression in 103 * their documentation does not allow empty file extensions. 104 * We chose to make our implementation compatible with GNU sort 105 * implementation. If they will ever fix their bug, this code 106 * must be uncommented. Or they may choose to fix the info page, 107 * then the code stays commented. 108 * 109 if (expect_alpha) 110 sfx = false; 111 */ 112 113 if (!sfx) 114 *len = clen; 115 } 116 117 static inline int 118 cmp_chars(wchar_t c1, wchar_t c2) 119 { 120 121 if (c1 == c2) 122 return (0); 123 124 if (c1 == L'~') 125 return (-1); 126 if (c2 == L'~') 127 return (+1); 128 129 if (isdigit_clocale(c1) || !c1) 130 return ((isdigit_clocale(c2) || !c2) ? 0 : -1); 131 132 if (isdigit_clocale(c2) || !c2) 133 return (+1); 134 135 if (isalpha_clocale(c1)) 136 return ((isalpha_clocale(c2)) ? ((int) c1 - (int) c2) : -1); 137 138 if (isalpha_clocale(c2)) 139 return (+1); 140 141 return ((int) c1 - (int) c2); 142 } 143 144 static int 145 cmpversions(bwstring_iterator si1, bwstring_iterator se1, 146 bwstring_iterator si2, bwstring_iterator se2) 147 { 148 int cmp, diff; 149 150 while ((si1 < se1) || (si2 < se2)) { 151 diff = 0; 152 153 while (((si1 < se1) && 154 !isdigit_clocale(bws_get_iter_value(si1))) || 155 ((si2 < se2) && !isdigit_clocale(bws_get_iter_value(si2)))) { 156 wchar_t c1, c2; 157 158 c1 = (si1 < se1) ? bws_get_iter_value(si1) : 0; 159 c2 = (si2 < se2) ? bws_get_iter_value(si2) : 0; 160 161 cmp = cmp_chars(c1, c2); 162 if (cmp) 163 return (cmp); 164 165 if (si1 < se1) 166 si1 = bws_iterator_inc(si1, 1); 167 if (si2 < se2) 168 si2 = bws_iterator_inc(si2, 1); 169 } 170 171 while (bws_get_iter_value(si1) == L'0') 172 si1 = bws_iterator_inc(si1, 1); 173 174 while (bws_get_iter_value(si2) == L'0') 175 si2 = bws_iterator_inc(si2, 1); 176 177 while (isdigit_clocale(bws_get_iter_value(si1)) && 178 isdigit_clocale(bws_get_iter_value(si2))) { 179 if (!diff) 180 diff = ((int)bws_get_iter_value(si1) - 181 (int)bws_get_iter_value(si2)); 182 si1 = bws_iterator_inc(si1, 1); 183 si2 = bws_iterator_inc(si2, 1); 184 } 185 186 if (isdigit_clocale(bws_get_iter_value(si1))) 187 return (1); 188 189 if (isdigit_clocale(bws_get_iter_value(si2))) 190 return (-1); 191 192 if (diff) 193 return (diff); 194 } 195 196 return (0); 197 } 198 199 /* 200 * Compare two version strings 201 */ 202 int 203 vcmp(struct bwstring *s1, struct bwstring *s2) 204 { 205 bwstring_iterator si1, si2; 206 wchar_t c1, c2; 207 size_t len1, len2, slen1, slen2; 208 int cmp_bytes, cmp_res; 209 210 if (s1 == s2) 211 return (0); 212 213 cmp_bytes = bwscmp(s1, s2, 0); 214 if (cmp_bytes == 0) 215 return (0); 216 217 len1 = slen1 = BWSLEN(s1); 218 len2 = slen2 = BWSLEN(s2); 219 220 if (slen1 < 1) 221 return (-1); 222 if (slen2 < 1) 223 return (+1); 224 225 si1 = bws_begin(s1); 226 si2 = bws_begin(s2); 227 228 c1 = bws_get_iter_value(si1); 229 c2 = bws_get_iter_value(si2); 230 231 if (c1 == L'.' && (slen1 == 1)) 232 return (-1); 233 234 if (c2 == L'.' && (slen2 == 1)) 235 return (+1); 236 237 if (slen1 == 2 && c1 == L'.' && 238 bws_get_iter_value(bws_iterator_inc(si1, 1)) == L'.') 239 return (-1); 240 if (slen2 == 2 && c2 == L'.' && 241 bws_get_iter_value(bws_iterator_inc(si2, 1)) == L'.') 242 return (+1); 243 244 if (c1 == L'.' && c2 != L'.') 245 return (-1); 246 if (c1 != L'.' && c2 == L'.') 247 return (+1); 248 249 if (c1 == L'.' && c2 == L'.') { 250 si1 = bws_iterator_inc(si1, 1); 251 si2 = bws_iterator_inc(si2, 1); 252 } 253 254 find_suffix(si1, bws_end(s1), &len1); 255 find_suffix(si2, bws_end(s2), &len2); 256 257 if ((len1 == len2) && (bws_iterator_cmp(si1, si2, len1) == 0)) 258 return (cmp_bytes); 259 260 cmp_res = cmpversions(si1, bws_iterator_inc(si1, len1), si2, 261 bws_iterator_inc(si2, len2)); 262 263 if (cmp_res == 0) 264 cmp_res = cmp_bytes; 265 266 return (cmp_res); 267 } 268