1 /*- 2 * Copyright (C) 2012 Oleg Moskalenko <mom040267@gmail.com> 3 * Copyright (C) 2012 Gabor Kovesdan <gabor@FreeBSD.org> 4 * All rights reserved. 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 25 * SUCH DAMAGE. 26 */ 27 28 #include <sys/cdefs.h> 29 __FBSDID("$FreeBSD$"); 30 31 #include <sys/types.h> 32 33 #include <ctype.h> 34 #include <stdlib.h> 35 #include <string.h> 36 37 #include "sort.h" 38 #include "vsort.h" 39 40 static inline bool 41 isdigit_clocale(wchar_t c) 42 { 43 44 return (c >= L'0' && c <= L'9'); 45 } 46 47 static inline bool 48 isalpha_clocale(wchar_t c) 49 { 50 51 return ((c >= L'a' && c <= L'z') || (c >= L'A' && c <= L'Z')); 52 } 53 54 static inline bool 55 isalnum_clocale(wchar_t c) 56 { 57 58 return ((c >= L'a' && c <= L'z') || (c >= L'A' && c <= L'Z') || 59 (c >= L'0' && c <= L'9')); 60 } 61 62 /* 63 * Find string suffix of format: (\.[A-Za-z~][A-Za-z0-9~]*)*$ 64 * Set length of string before suffix. 65 */ 66 static void 67 find_suffix(bwstring_iterator si, bwstring_iterator se, size_t *len) 68 { 69 wchar_t c; 70 size_t clen; 71 bool expect_alpha, sfx; 72 73 sfx = false; 74 expect_alpha = false; 75 *len = 0; 76 clen = 0; 77 78 while ((si < se) && (c = bws_get_iter_value(si))) { 79 if (expect_alpha) { 80 expect_alpha = false; 81 if (!isalpha_clocale(c) && (c != L'~')) 82 sfx = false; 83 } else if (c == L'.') { 84 expect_alpha = true; 85 if (!sfx) { 86 sfx = true; 87 *len = clen; 88 } 89 } else if (!isalnum_clocale(c) && (c != L'~')) 90 sfx = false; 91 92 si = bws_iterator_inc(si, 1); 93 ++clen; 94 } 95 96 /* This code must be here to make the implementation compatible 97 * with WORDING of GNU sort documentation. 98 * But the GNU sort implementation is not following its own 99 * documentation. GNU sort allows empty file extensions 100 * (just dot with nothing after); but the regular expression in 101 * their documentation does not allow empty file extensions. 102 * We chose to make our implementation compatible with GNU sort 103 * implementation. If they will ever fix their bug, this code 104 * must be uncommented. Or they may choose to fix the info page, 105 * then the code stays commented. 106 * 107 if (expect_alpha) 108 sfx = false; 109 */ 110 111 if (!sfx) 112 *len = clen; 113 } 114 115 static inline int 116 cmp_chars(wchar_t c1, wchar_t c2) 117 { 118 119 if (c1 == c2) 120 return (0); 121 122 if (c1 == L'~') 123 return (-1); 124 if (c2 == L'~') 125 return (+1); 126 127 if (isdigit_clocale(c1) || !c1) 128 return ((isdigit_clocale(c2) || !c2) ? 0 : -1); 129 130 if (isdigit_clocale(c2) || !c2) 131 return (+1); 132 133 if (isalpha_clocale(c1)) 134 return ((isalpha_clocale(c2)) ? ((int) c1 - (int) c2) : -1); 135 136 if (isalpha_clocale(c2)) 137 return (+1); 138 139 return ((int) c1 - (int) c2); 140 } 141 142 static int 143 cmpversions(bwstring_iterator si1, bwstring_iterator se1, 144 bwstring_iterator si2, bwstring_iterator se2) 145 { 146 int cmp, diff; 147 148 while ((si1 < se1) || (si2 < se2)) { 149 diff = 0; 150 151 while (((si1 < se1) && 152 !isdigit_clocale(bws_get_iter_value(si1))) || 153 ((si2 < se2) && !isdigit_clocale(bws_get_iter_value(si2)))) { 154 wchar_t c1, c2; 155 156 c1 = (si1 < se1) ? bws_get_iter_value(si1) : 0; 157 c2 = (si2 < se2) ? bws_get_iter_value(si2) : 0; 158 159 cmp = cmp_chars(c1, c2); 160 if (cmp) 161 return (cmp); 162 163 if (si1 < se1) 164 si1 = bws_iterator_inc(si1, 1); 165 if (si2 < se2) 166 si2 = bws_iterator_inc(si2, 1); 167 } 168 169 while (bws_get_iter_value(si1) == L'0') 170 si1 = bws_iterator_inc(si1, 1); 171 172 while (bws_get_iter_value(si2) == L'0') 173 si2 = bws_iterator_inc(si2, 1); 174 175 while (isdigit_clocale(bws_get_iter_value(si1)) && 176 isdigit_clocale(bws_get_iter_value(si2))) { 177 if (!diff) 178 diff = ((int)bws_get_iter_value(si1) - 179 (int)bws_get_iter_value(si2)); 180 si1 = bws_iterator_inc(si1, 1); 181 si2 = bws_iterator_inc(si2, 1); 182 } 183 184 if (isdigit_clocale(bws_get_iter_value(si1))) 185 return (1); 186 187 if (isdigit_clocale(bws_get_iter_value(si2))) 188 return (-1); 189 190 if (diff) 191 return (diff); 192 } 193 194 return (0); 195 } 196 197 /* 198 * Compare two version strings 199 */ 200 int 201 vcmp(struct bwstring *s1, struct bwstring *s2) 202 { 203 bwstring_iterator si1, si2; 204 wchar_t c1, c2; 205 size_t len1, len2, slen1, slen2; 206 int cmp_bytes, cmp_res; 207 208 if (s1 == s2) 209 return (0); 210 211 cmp_bytes = bwscmp(s1, s2, 0); 212 if (cmp_bytes == 0) 213 return (0); 214 215 len1 = slen1 = BWSLEN(s1); 216 len2 = slen2 = BWSLEN(s2); 217 218 if (slen1 < 1) 219 return (-1); 220 if (slen2 < 1) 221 return (+1); 222 223 si1 = bws_begin(s1); 224 si2 = bws_begin(s2); 225 226 c1 = bws_get_iter_value(si1); 227 c2 = bws_get_iter_value(si2); 228 229 if (c1 == L'.' && (slen1 == 1)) 230 return (-1); 231 232 if (c2 == L'.' && (slen2 == 1)) 233 return (+1); 234 235 if (slen1 == 2 && c1 == L'.' && 236 bws_get_iter_value(bws_iterator_inc(si1, 1)) == L'.') 237 return (-1); 238 if (slen2 == 2 && c2 == L'.' && 239 bws_get_iter_value(bws_iterator_inc(si2, 1)) == L'.') 240 return (+1); 241 242 if (c1 == L'.' && c2 != L'.') 243 return (-1); 244 if (c1 != L'.' && c2 == L'.') 245 return (+1); 246 247 if (c1 == L'.' && c2 == L'.') { 248 si1 = bws_iterator_inc(si1, 1); 249 si2 = bws_iterator_inc(si2, 1); 250 } 251 252 find_suffix(si1, bws_end(s1), &len1); 253 find_suffix(si2, bws_end(s2), &len2); 254 255 if ((len1 == len2) && (bws_iterator_cmp(si1, si2, len1) == 0)) 256 return (cmp_bytes); 257 258 cmp_res = cmpversions(si1, bws_iterator_inc(si1, len1), si2, 259 bws_iterator_inc(si2, len2)); 260 261 if (cmp_res == 0) 262 cmp_res = cmp_bytes; 263 264 return (cmp_res); 265 } 266