1 /*-
2 * SPDX-License-Identifier: BSD-2-Clause
3 *
4 * Copyright (C) 2012 Oleg Moskalenko <mom040267@gmail.com>
5 * Copyright (C) 2012 Gabor Kovesdan <gabor@FreeBSD.org>
6 * All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 * SUCH DAMAGE.
28 */
29
30 #include <sys/types.h>
31
32 #include <ctype.h>
33 #include <stdlib.h>
34 #include <string.h>
35
36 #include "sort.h"
37 #include "vsort.h"
38
39 static inline bool
isdigit_clocale(wchar_t c)40 isdigit_clocale(wchar_t c)
41 {
42
43 return (c >= L'0' && c <= L'9');
44 }
45
46 static inline bool
isalpha_clocale(wchar_t c)47 isalpha_clocale(wchar_t c)
48 {
49
50 return ((c >= L'a' && c <= L'z') || (c >= L'A' && c <= L'Z'));
51 }
52
53 static inline bool
isalnum_clocale(wchar_t c)54 isalnum_clocale(wchar_t c)
55 {
56
57 return ((c >= L'a' && c <= L'z') || (c >= L'A' && c <= L'Z') ||
58 (c >= L'0' && c <= L'9'));
59 }
60
61 /*
62 * Find string suffix of format: (\.[A-Za-z~][A-Za-z0-9~]*)*$
63 * Set length of string before suffix.
64 */
65 static void
find_suffix(bwstring_iterator si,bwstring_iterator se,size_t * len)66 find_suffix(bwstring_iterator si, bwstring_iterator se, size_t *len)
67 {
68 wchar_t c;
69 size_t clen;
70 bool expect_alpha, sfx;
71
72 sfx = false;
73 expect_alpha = false;
74 *len = 0;
75 clen = 0;
76
77 while ((si < se) && (c = bws_get_iter_value(si))) {
78 if (expect_alpha) {
79 expect_alpha = false;
80 if (!isalpha_clocale(c) && (c != L'~'))
81 sfx = false;
82 } else if (c == L'.') {
83 expect_alpha = true;
84 if (!sfx) {
85 sfx = true;
86 *len = clen;
87 }
88 } else if (!isalnum_clocale(c) && (c != L'~'))
89 sfx = false;
90
91 si = bws_iterator_inc(si, 1);
92 ++clen;
93 }
94
95 /* This code must be here to make the implementation compatible
96 * with WORDING of GNU sort documentation.
97 * But the GNU sort implementation is not following its own
98 * documentation. GNU sort allows empty file extensions
99 * (just dot with nothing after); but the regular expression in
100 * their documentation does not allow empty file extensions.
101 * We chose to make our implementation compatible with GNU sort
102 * implementation. If they will ever fix their bug, this code
103 * must be uncommented. Or they may choose to fix the info page,
104 * then the code stays commented.
105 *
106 if (expect_alpha)
107 sfx = false;
108 */
109
110 if (!sfx)
111 *len = clen;
112 }
113
114 static inline int
cmp_chars(wchar_t c1,wchar_t c2)115 cmp_chars(wchar_t c1, wchar_t c2)
116 {
117
118 if (c1 == c2)
119 return (0);
120
121 if (c1 == L'~')
122 return (-1);
123 if (c2 == L'~')
124 return (+1);
125
126 if (isdigit_clocale(c1) || !c1)
127 return ((isdigit_clocale(c2) || !c2) ? 0 : -1);
128
129 if (isdigit_clocale(c2) || !c2)
130 return (+1);
131
132 if (isalpha_clocale(c1))
133 return ((isalpha_clocale(c2)) ? ((int) c1 - (int) c2) : -1);
134
135 if (isalpha_clocale(c2))
136 return (+1);
137
138 return ((int) c1 - (int) c2);
139 }
140
141 static int
cmpversions(bwstring_iterator si1,bwstring_iterator se1,bwstring_iterator si2,bwstring_iterator se2)142 cmpversions(bwstring_iterator si1, bwstring_iterator se1,
143 bwstring_iterator si2, bwstring_iterator se2)
144 {
145 int cmp, diff;
146
147 while ((si1 < se1) || (si2 < se2)) {
148 diff = 0;
149
150 while (((si1 < se1) &&
151 !isdigit_clocale(bws_get_iter_value(si1))) ||
152 ((si2 < se2) && !isdigit_clocale(bws_get_iter_value(si2)))) {
153 wchar_t c1, c2;
154
155 c1 = (si1 < se1) ? bws_get_iter_value(si1) : 0;
156 c2 = (si2 < se2) ? bws_get_iter_value(si2) : 0;
157
158 cmp = cmp_chars(c1, c2);
159 if (cmp)
160 return (cmp);
161
162 if (si1 < se1)
163 si1 = bws_iterator_inc(si1, 1);
164 if (si2 < se2)
165 si2 = bws_iterator_inc(si2, 1);
166 }
167
168 while (bws_get_iter_value(si1) == L'0')
169 si1 = bws_iterator_inc(si1, 1);
170
171 while (bws_get_iter_value(si2) == L'0')
172 si2 = bws_iterator_inc(si2, 1);
173
174 while (isdigit_clocale(bws_get_iter_value(si1)) &&
175 isdigit_clocale(bws_get_iter_value(si2))) {
176 if (!diff)
177 diff = ((int)bws_get_iter_value(si1) -
178 (int)bws_get_iter_value(si2));
179 si1 = bws_iterator_inc(si1, 1);
180 si2 = bws_iterator_inc(si2, 1);
181 }
182
183 if (isdigit_clocale(bws_get_iter_value(si1)))
184 return (1);
185
186 if (isdigit_clocale(bws_get_iter_value(si2)))
187 return (-1);
188
189 if (diff)
190 return (diff);
191 }
192
193 return (0);
194 }
195
196 /*
197 * Compare two version strings
198 */
199 int
vcmp(struct bwstring * s1,struct bwstring * s2)200 vcmp(struct bwstring *s1, struct bwstring *s2)
201 {
202 bwstring_iterator si1, si2;
203 wchar_t c1, c2;
204 size_t len1, len2, slen1, slen2;
205 int cmp_bytes, cmp_res;
206
207 if (s1 == s2)
208 return (0);
209
210 cmp_bytes = bwscmp(s1, s2, 0);
211 if (cmp_bytes == 0)
212 return (0);
213
214 len1 = slen1 = BWSLEN(s1);
215 len2 = slen2 = BWSLEN(s2);
216
217 if (slen1 < 1)
218 return (-1);
219 if (slen2 < 1)
220 return (+1);
221
222 si1 = bws_begin(s1);
223 si2 = bws_begin(s2);
224
225 c1 = bws_get_iter_value(si1);
226 c2 = bws_get_iter_value(si2);
227
228 if (c1 == L'.' && (slen1 == 1))
229 return (-1);
230
231 if (c2 == L'.' && (slen2 == 1))
232 return (+1);
233
234 if (slen1 == 2 && c1 == L'.' &&
235 bws_get_iter_value(bws_iterator_inc(si1, 1)) == L'.')
236 return (-1);
237 if (slen2 == 2 && c2 == L'.' &&
238 bws_get_iter_value(bws_iterator_inc(si2, 1)) == L'.')
239 return (+1);
240
241 if (c1 == L'.' && c2 != L'.')
242 return (-1);
243 if (c1 != L'.' && c2 == L'.')
244 return (+1);
245
246 if (c1 == L'.' && c2 == L'.') {
247 si1 = bws_iterator_inc(si1, 1);
248 si2 = bws_iterator_inc(si2, 1);
249 }
250
251 find_suffix(si1, bws_end(s1), &len1);
252 find_suffix(si2, bws_end(s2), &len2);
253
254 if ((len1 == len2) && (bws_iterator_cmp(si1, si2, len1) == 0))
255 return (cmp_bytes);
256
257 cmp_res = cmpversions(si1, bws_iterator_inc(si1, len1), si2,
258 bws_iterator_inc(si2, len2));
259
260 if (cmp_res == 0)
261 cmp_res = cmp_bytes;
262
263 return (cmp_res);
264 }
265