1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright (c) 1998, 2010, Oracle and/or its affiliates. All rights reserved.
24 */
25
26 #include "fields.h"
27
28 /*
29 * fields
30 *
31 * Overview
32 * By a field, we mean the various delimited character sequences within each
33 * line of the input files. The sort key consists of an ordered sequence of
34 * fields, which need not include all possible fields for the given line.
35 * (Furthermore, not every line need contain sufficient fields for the fields
36 * given within the sort key. In fact, none of the lines in the input stream
37 * need contain sufficient fields.)
38 *
39 * There are two methods for specifying fields for sort(1); these are
40 * discussed in options.c. Here we discuss only the internal representation
41 * of fields, as used for constructing the collation vector for each line as
42 * defined by the sort key.
43 *
44 * Representation
45 * The sort key is a singly-linked list of field specifiers. At present,
46 * fields may belong to one of three species: alphabetical, numerical, or
47 * monthly; the species (f_species) then indicates the conversion function
48 * (f_convert) used to transform the raw characters of the character sequence
49 * to a collatable form. (In principle, this allows us to consider future
50 * field species such as hexadecimal.)
51 *
52 * Fields and offsets are numbered such that zero refers to the first field or
53 * character, respectively. Thus, the interpretation of a key specifier, m.n,
54 * is that the field begins at the nth character beyond the mth occurence of
55 * the key separator. If the blanks flag has been specified, then the field
56 * begins at the nth non-blank character past the mth key separator. If the
57 * key separator is unspecified, then the key separator is defined as one or
58 * more blank characters.
59 *
60 * In general, the various options afforded by sort may be broken into two
61 * categories: field species and field modifiers. For each field species,
62 * there is one or more conversion routines that take a delimited character
63 * sequence and convert it to a character sequence collatable by strcmp() or
64 * memcmp(). For field species that may be further modified, such as the
65 * fold-to-uppercase option for alphabetic fields, the conversion routine may
66 * be aware of how the modifier affects collation. Finally, the no-modifiers
67 * case may present an opportunity for a simplified, faster version.
68 *
69 * Code Structure
70 * The code paths for single-byte and multi-byte locales diverge significantly
71 * in fields.c. Most routines have an *_wide() version, which produces an
72 * equivalent effect for line records whose data field is composed of wide
73 * characters (wchar_t). However, the l_collated field of a line record is
74 * always composed of characters, so that the radix sorts provided in
75 * internal.c can work in both single- and multi-byte locales. Thus, in the
76 * various convert_*_wide() routines, the output is placed in l_collated, with
77 * a length multiplier of 4.
78 */
79
80 #define BEFORE_NUMBER 0x0
81 #define IN_NUMBER 0x1
82
83 static char numerical_separator;
84 static char numerical_decimal;
85 static char monetary_separator;
86 static char monetary_decimal;
87
88 static wchar_t w_numerical_separator;
89 static wchar_t w_numerical_decimal;
90 static wchar_t w_monetary_separator;
91 static wchar_t w_monetary_decimal;
92
93 #define MONTHS_IN_YEAR 12
94 #define MAX_MON_LEN 20
95
96 enum { MO_NONE = 1, MO_OFFSET = 2 };
97
98 static char *months[MONTHS_IN_YEAR];
99 static size_t month_lengths[MONTHS_IN_YEAR];
100 static wchar_t *w_months[MONTHS_IN_YEAR];
101 static size_t w_month_lengths[MONTHS_IN_YEAR];
102
103 #define DECIMAL_CHAR (numerical_decimal)
104 #define IS_BLANK(x) (isspace((uchar_t)(x)) && (x) != '\n')
105 #define IS_SEPARATOR(x) \
106 ((numerical_separator != '\0' && (x) == numerical_separator) || \
107 (monetary_separator != '\0' && (x) == monetary_separator))
108 #define IS_DECIMAL(x) \
109 ((x) == numerical_decimal || \
110 (monetary_decimal != '\0' && (x) == monetary_decimal))
111 #define W_DECIMAL_CHAR (w_numerical_decimal)
112 #define W_IS_BLANK(x) (iswspace(x) && (x) != L'\n')
113 #define W_IS_SEPARATOR(x) \
114 ((numerical_separator != '\0' && (x) == w_numerical_separator) || \
115 (monetary_separator != '\0' && (x) == w_monetary_separator))
116 #define W_IS_DECIMAL(x) \
117 (((x) == w_numerical_decimal) || \
118 (monetary_decimal != '\0' && (x) == w_monetary_decimal))
119
120 #define INTERFIELD_SEPARATOR '\0'
121 #define W_INTERFIELD_SEPARATOR L'\0'
122
123 #define INT_SIGN_FLIP_MASK 0x80000000
124 #define INT_SIGN_PASS_MASK 0x00000000
125
126 /*
127 * strx_ops_t, xfrm_len, and xfrm_cpy: In the case where we are sorting in the
128 * C locale, we want to avoid the expense of transforming strings to collatable
129 * forms since, by definition, an arbitrary string in the C locale is already in
130 * its collatable form. Therefore, we construct a small ops vector (the
131 * strx_ops) and two wrappers: xfrm_len() to massage the strxfrm(NULL, ...) into
132 * strlen()-like behaviour, and xfrm_cpy() to make strncpy() appear
133 * strxfrm()-like.
134 */
135 /*ARGSUSED*/
136 static size_t
xfrm_len(const char * s2,size_t len)137 xfrm_len(const char *s2, size_t len)
138 {
139 return (strxfrm(NULL, s2, 0) + 1);
140 }
141
142 /*
143 * The length represented by n includes a null character, so to return the
144 * correct length we subtract 1. Note that this function is only used by
145 * field_convert_alpha, and isn't for general use, as it assumes that n is the
146 * length of s2 plus a null character.
147 */
148 static size_t
C_ncpy(char * s1,const char * s2,size_t n)149 C_ncpy(char *s1, const char *s2, size_t n)
150 {
151 (void) strncpy(s1, s2, n);
152 return (n - 1);
153 }
154
155 /*ARGSUSED*/
156 static size_t
C_len(const char * s,size_t len)157 C_len(const char *s, size_t len)
158 {
159 ASSERT(s != NULL);
160 return (len);
161 }
162
163 typedef struct _strx_ops {
164 size_t (*sx_len)(const char *, size_t);
165 size_t (*sx_xfrm)(char *, const char *, size_t);
166 } strx_ops_t;
167
168 static const strx_ops_t C_ops = { C_len, C_ncpy };
169 static const strx_ops_t SB_ops = { xfrm_len, strxfrm };
170
171 static const strx_ops_t *xfrm_ops;
172
173 static void
field_initialize_separator(void)174 field_initialize_separator(void)
175 {
176 /*
177 * A locale need not define all of the cases below: only decimal_point
178 * must be defined. Furthermore, sort(1) has traditionally not used the
179 * positive_sign and negative_sign, grouping, or currency_symbols (or
180 * their numeric counterparts, if any).
181 */
182 struct lconv *conv = localeconv();
183
184 if (!xstreql(conv->thousands_sep, "")) {
185 numerical_separator = *conv->thousands_sep;
186 (void) mbtowc(&w_numerical_separator, conv->thousands_sep,
187 MB_CUR_MAX);
188 } else
189 numerical_separator = '\0';
190
191 if (!xstreql(conv->mon_thousands_sep, "")) {
192 monetary_separator = *conv->mon_thousands_sep;
193 (void) mbtowc(&w_monetary_separator, conv->mon_thousands_sep,
194 MB_CUR_MAX);
195 } else
196 monetary_separator = '\0';
197
198 if (!xstreql(conv->mon_decimal_point, "")) {
199 monetary_decimal = *conv->mon_decimal_point;
200 (void) mbtowc(&w_monetary_decimal, conv->mon_decimal_point,
201 MB_CUR_MAX);
202 } else
203 monetary_decimal = '\0';
204
205 numerical_decimal = *conv->decimal_point;
206 (void) mbtowc(&w_numerical_decimal, conv->decimal_point, MB_CUR_MAX);
207 }
208
209 static void
field_initialize_month(int is_c_locale)210 field_initialize_month(int is_c_locale)
211 {
212 int i;
213 int j;
214 struct tm this_month;
215 const char *c_months[MONTHS_IN_YEAR] = {
216 "JAN", "FEB", "MAR", "APR", "MAY", "JUN",
217 "JUL", "AUG", "SEP", "OCT", "NOV", "DEC"
218 };
219
220 char month_name[MAX_MON_LEN * MB_LEN_MAX];
221 wchar_t w_month_name[MAX_MON_LEN];
222
223 if (is_c_locale) {
224 for (i = 0; i < MONTHS_IN_YEAR; i++) {
225 months[i] = (char *)c_months[i];
226 month_lengths[i] = strlen(c_months[i]);
227 }
228 /*
229 * We don't need to initialize the wide version of the month
230 * names.
231 */
232 return;
233 }
234
235 (void) memset(&this_month, 0, sizeof (this_month));
236
237 for (i = 0; i < MONTHS_IN_YEAR; i++) {
238 this_month.tm_mon = i;
239
240 (void) strftime(month_name, sizeof (month_name),
241 "%b", &this_month);
242
243 for (j = 0; j < strlen(month_name); j++)
244 month_name[j] = toupper(month_name[j]);
245 (void) mbstowcs(w_month_name, month_name, MAX_MON_LEN);
246
247 months[i] = strdup(month_name);
248 month_lengths[i] = strlen(month_name);
249 w_months[i] = wsdup(w_month_name);
250 w_month_lengths[i] = wslen(w_month_name);
251 }
252 }
253
254 void
field_initialize(sort_t * S)255 field_initialize(sort_t *S)
256 {
257 field_initialize_month(S->m_c_locale);
258 field_initialize_separator();
259
260 if (S->m_c_locale)
261 xfrm_ops = &C_ops;
262 else
263 xfrm_ops = &SB_ops;
264 }
265
266 field_t *
field_new(sort_t * S)267 field_new(sort_t *S)
268 {
269 field_t *F = safe_realloc(NULL, sizeof (field_t));
270
271 F->f_start_field = -1;
272 F->f_start_offset = -1;
273 F->f_end_field = -1;
274 F->f_end_offset = -1;
275 F->f_next = NULL;
276
277 if (S == NULL) {
278 F->f_species = ALPHA;
279 F->f_options = 0;
280 } else {
281 F->f_species = S->m_default_species;
282 F->f_options = S->m_field_options;
283 }
284
285 return (F);
286 }
287
288 void
field_delete(field_t * F)289 field_delete(field_t *F)
290 {
291 free(F);
292 }
293
294 /*
295 * The recursive implementation of field_add_to_chain() given below is
296 * inappropriate if function calls are expensive, or a truly large number of
297 * fields are anticipated.
298 */
299 void
field_add_to_chain(field_t ** F,field_t * A)300 field_add_to_chain(field_t **F, field_t *A)
301 {
302 if (*F == NULL)
303 *F = A;
304 else
305 field_add_to_chain(&((*F)->f_next), A);
306 }
307
308 #ifdef DEBUG
309 #ifndef _LP64
310 #define FIELD_FMT \
311 "\nStart field: %d\tStart offset: %d\nEnd field: %d\tEnd offset: %d\n"
312 #else /* !_LP64 */
313 #define FIELD_FMT \
314 "\nStart field: %ld\tStart offset: %ld\nEnd field: %ld\tEnd offset: %ld\n"
315 #endif /* !_LP64 */
316
317 /*
318 * field_print is used only for debugging purposes.
319 */
320 void
field_print(field_t * F)321 field_print(field_t *F)
322 {
323 char *field_names[] = {"ALPHA", "MONTH", "NUMERIC"};
324 int status = 0;
325
326 (void) fprintf(stderr, "Type: %s", field_names[F->f_species]);
327 (void) fprintf(stderr, "\tOptions: ");
328
329 if (F->f_options & FIELD_REVERSE_COMPARISONS) {
330 (void) fprintf(stderr, "REVERSE");
331 status++;
332 }
333 if (F->f_options & FIELD_DICTIONARY_ORDER) {
334 (void) fprintf(stderr, "DICTIONARY ");
335 status++;
336 }
337 if (F->f_options & FIELD_FOLD_UPPERCASE) {
338 (void) fprintf(stderr, "UPPERCASE ");
339 status++;
340 }
341 if (F->f_options & FIELD_IGNORE_NONPRINTABLES) {
342 (void) fprintf(stderr, "PRINTABLES ");
343 status++;
344 }
345 if (F->f_options & FIELD_IGNORE_BLANKS_START) {
346 (void) fprintf(stderr, "BLANKS_START ");
347 status++;
348 }
349 if (F->f_options & FIELD_IGNORE_BLANKS_END) {
350 (void) fprintf(stderr, "BLANKS_END ");
351 status++;
352 }
353
354 if (status == 0)
355 (void) fprintf(stderr, "NO_MODIFIERS");
356
357 (void) fprintf(stderr, FIELD_FMT, F->f_start_field, F->f_start_offset,
358 F->f_end_field, F->f_end_offset);
359 }
360 #endif /* DEBUG */
361
362 static ssize_t
field_boundary(field_t * F,line_rec_t * L,int is_end,int is_blanks)363 field_boundary(field_t *F, line_rec_t *L, int is_end, int is_blanks)
364 {
365 char *S = L->l_data.sp;
366 char *T = S;
367 char *eol = S + L->l_data_length;
368 ssize_t field = is_end ? F->f_end_field : F->f_start_field;
369 ssize_t offset = is_end ? F->f_end_offset : F->f_start_offset;
370 ssize_t ret;
371
372 ASSERT(is_end || field > -1);
373
374 if (is_end && field == -1)
375 return (L->l_data_length);
376
377 while (field-- > 0) {
378 while (T < eol && IS_BLANK(*T))
379 T++;
380
381 while (T < eol && !IS_BLANK(*T))
382 T++;
383 }
384
385 if ((!is_end || offset > 0) && is_blanks) {
386 while (IS_BLANK(*T))
387 T++;
388 }
389
390 if ((ret = MAX(T - S, 0) + offset) >= L->l_data_length)
391 return (L->l_data_length);
392
393 return (ret);
394 }
395
396 static void
field_delimit(field_t * F,line_rec_t * L,ssize_t * start,ssize_t * end)397 field_delimit(field_t *F, line_rec_t *L, ssize_t *start, ssize_t *end)
398 {
399 ASSERT(F->f_start_field > -1);
400
401 *start = field_boundary(F, L, 0,
402 F->f_options & FIELD_IGNORE_BLANKS_START);
403 *end = field_boundary(F, L, 1,
404 F->f_options & FIELD_IGNORE_BLANKS_END);
405 }
406
407 static ssize_t
field_boundary_wide(field_t * F,line_rec_t * L,int is_end,int is_blanks)408 field_boundary_wide(field_t *F, line_rec_t *L, int is_end, int is_blanks)
409 {
410 wchar_t *S = L->l_data.wp;
411 wchar_t *T = S;
412 wchar_t *eol = S + L->l_data_length;
413 ssize_t field = is_end ? F->f_end_field : F->f_start_field;
414 ssize_t offset = is_end ? F->f_end_offset : F->f_start_offset;
415 ssize_t ret;
416
417 ASSERT(is_end || field > -1);
418
419 if (is_end && field == -1)
420 return (L->l_data_length);
421
422 while (field-- > 0) {
423 while (T < eol && W_IS_BLANK(*T))
424 T++;
425
426 while (T < eol && !W_IS_BLANK(*T))
427 T++;
428 }
429
430 if ((!is_end || offset > 0) && is_blanks) {
431 while (W_IS_BLANK(*T))
432 T++;
433 }
434
435 if ((ret = MAX(T - S, 0) + offset) >= L->l_data_length)
436 return (L->l_data_length);
437
438 return (ret);
439 }
440
441 static void
field_delimit_wide(field_t * F,line_rec_t * L,ssize_t * start,ssize_t * end)442 field_delimit_wide(field_t *F, line_rec_t *L, ssize_t *start, ssize_t *end)
443 {
444 ASSERT(F->f_start_field > -1);
445
446 *start = field_boundary_wide(F, L, 0,
447 F->f_options & FIELD_IGNORE_BLANKS_START);
448 *end = field_boundary_wide(F, L, 1,
449 F->f_options & FIELD_IGNORE_BLANKS_END);
450 }
451
452 static ssize_t
field_boundary_tabbed(field_t * F,line_rec_t * L,int is_end,int is_blanks,vchar_t delimiter)453 field_boundary_tabbed(field_t *F, line_rec_t *L, int is_end, int is_blanks,
454 vchar_t delimiter)
455 {
456 char *S = L->l_data.sp;
457 char *T = S;
458 char *eol = S + L->l_data_length;
459 ssize_t field = is_end ? F->f_end_field : F->f_start_field;
460 ssize_t offset = is_end ? F->f_end_offset : F->f_start_offset;
461 ssize_t ret;
462
463 ASSERT(is_end || field > -1);
464
465 if (is_end && field == -1)
466 return (L->l_data_length);
467
468 while (field-- > 0) {
469 T = xstrnchr(T, delimiter.sc, eol - T);
470 if (T == NULL || T > eol)
471 return (L->l_data_length);
472
473 T++;
474 }
475
476 if ((!is_end || offset != 0) && is_blanks) {
477 while (IS_BLANK(*T))
478 T++;
479 }
480
481 if ((ret = MAX(T - S, 0) + offset) >= L->l_data_length) {
482 if (L->l_data_length <= 0)
483 return (0);
484 if (S[L->l_data_length - 1] == delimiter.sc) {
485 return (L->l_data_length - 1);
486 } else {
487 return (L->l_data_length);
488 }
489 }
490
491 if (is_end && offset == 0)
492 ret--;
493
494 return (ret);
495 }
496
497 /*
498 * field_delimit_tabbed() is called when a field separator has been defined
499 * using the -t option. The character at the offset, start, is either one or
500 * more character positions past the delimiter marking the start of the
501 * field, or at the end of the line.
502 */
503 static void
field_delimit_tabbed(field_t * F,line_rec_t * L,ssize_t * start,ssize_t * end,vchar_t delimiter)504 field_delimit_tabbed(field_t *F, line_rec_t *L, ssize_t *start, ssize_t *end,
505 vchar_t delimiter)
506 {
507 ASSERT(F->f_start_field > -1);
508
509 *start = field_boundary_tabbed(F, L, 0, F->f_options &
510 FIELD_IGNORE_BLANKS_START, delimiter);
511 *end = field_boundary_tabbed(F, L, 1, F->f_options &
512 FIELD_IGNORE_BLANKS_END, delimiter);
513 }
514
515 static ssize_t
field_boundary_tabbed_wide(field_t * F,line_rec_t * L,int is_end,int is_blanks,vchar_t delimiter)516 field_boundary_tabbed_wide(field_t *F, line_rec_t *L, int is_end, int is_blanks,
517 vchar_t delimiter)
518 {
519 wchar_t *S = L->l_data.wp;
520 wchar_t *T = S;
521 wchar_t *eol = S + L->l_data_length;
522 ssize_t field = is_end ? F->f_end_field : F->f_start_field;
523 ssize_t offset = is_end ? F->f_end_offset : F->f_start_offset;
524 ssize_t ret;
525
526 ASSERT(is_end || field > -1);
527
528 if (is_end && field == -1)
529 return (L->l_data_length);
530
531 while (field-- > 0) {
532 T = xwsnchr(T, delimiter.wc, eol - T);
533 if (T == NULL || T > eol)
534 return (L->l_data_length);
535
536 T++;
537 }
538
539 if ((!is_end || offset != 0) && is_blanks) {
540 while (W_IS_BLANK(*T))
541 T++;
542 }
543
544 if ((ret = MAX(T - S, 0) + offset) >= L->l_data_length) {
545 if (L->l_data_length <= 0)
546 return (0);
547 if (S[L->l_data_length - 1] == delimiter.wc) {
548 return (L->l_data_length - 1);
549 } else {
550 return (L->l_data_length);
551 }
552 }
553
554 if (is_end && offset == 0)
555 ret--;
556
557 return (ret);
558 }
559
560 static void
field_delimit_tabbed_wide(field_t * F,line_rec_t * L,ssize_t * start,ssize_t * end,vchar_t delimiter)561 field_delimit_tabbed_wide(field_t *F, line_rec_t *L, ssize_t *start,
562 ssize_t *end, vchar_t delimiter)
563 {
564 ASSERT(F->f_start_field > -1);
565
566 *start = field_boundary_tabbed_wide(F, L, 0, F->f_options &
567 FIELD_IGNORE_BLANKS_START, delimiter);
568 *end = field_boundary_tabbed_wide(F, L, 1, F->f_options &
569 FIELD_IGNORE_BLANKS_END, delimiter);
570 }
571
572 /*ARGSUSED*/
573 ssize_t
field_convert_month(field_t * F,line_rec_t * L,vchar_t delimiter,ssize_t data_offset,ssize_t data_length,ssize_t coll_offset)574 field_convert_month(field_t *F, line_rec_t *L, vchar_t delimiter,
575 ssize_t data_offset, ssize_t data_length, ssize_t coll_offset)
576 {
577 int j;
578 ssize_t val;
579 char month_candidate[MAX_MON_LEN * MB_LEN_MAX];
580 ssize_t month_length = data_length;
581 ssize_t month_offset = data_offset;
582
583 if (sizeof (char) > L->l_collate_bufsize - coll_offset)
584 return (-1);
585
586 (void) memset(month_candidate, 0, MAX_MON_LEN * MB_LEN_MAX);
587
588
589 /*
590 * The month field formally begins with the first non-blank character.
591 */
592 while (IS_BLANK(*(L->l_data.sp + month_offset))) {
593 month_offset++;
594 month_length--;
595 }
596
597 for (j = 0; j < MAX_MON_LEN && j < month_length; j++)
598 month_candidate[j] = toupper((L->l_data.sp + month_offset)[j]);
599
600 for (j = 0; j < MONTHS_IN_YEAR; j++) {
601 if (xstrneql(month_candidate, months[j], month_lengths[j])) {
602 *(L->l_collate.sp + coll_offset) = '\0' + j + MO_OFFSET;
603 return (1);
604 }
605 }
606
607 /*
608 * no matching month; copy string into field. required behaviour is
609 * that "month-free" keys sort before month-sortable keys, so insert
610 * a "will sort first" token.
611 */
612 *(L->l_collate.sp + coll_offset) = '\0' + MO_NONE;
613
614 val = field_convert_alpha_simple(F, L, delimiter, data_offset,
615 data_length, coll_offset + 1);
616
617 if (val < 0)
618 return (-1);
619 else
620 return (val + 1);
621 }
622
623 /*ARGSUSED*/
624 ssize_t
field_convert_month_wide(field_t * F,line_rec_t * L,vchar_t delimiter,ssize_t data_offset,ssize_t data_length,ssize_t coll_offset)625 field_convert_month_wide(field_t *F, line_rec_t *L, vchar_t delimiter,
626 ssize_t data_offset, ssize_t data_length, ssize_t coll_offset)
627 {
628 ssize_t j;
629 ssize_t val;
630 wchar_t month_candidate[MAX_MON_LEN];
631 wchar_t *month;
632 wchar_t *buffer = L->l_collate.wp + coll_offset;
633 ssize_t month_length = data_length;
634 ssize_t month_offset = data_offset;
635
636 if (L->l_collate_bufsize - coll_offset * sizeof (wchar_t) <
637 sizeof (wchar_t))
638 return (-1);
639
640 (void) memset(month_candidate, 0, MAX_MON_LEN * sizeof (wchar_t));
641
642
643 while (W_IS_BLANK(*(L->l_data.wp + month_offset))) {
644 month_offset++;
645 month_length--;
646 }
647
648 month = L->l_data.wp + month_offset;
649
650 for (j = 0; j < MAX_MON_LEN && j < month_length; j++)
651 month_candidate[j] = towupper(month[j]);
652
653 for (j = 0; j < MONTHS_IN_YEAR; j++)
654 if (xwcsneql(month_candidate, w_months[j],
655 w_month_lengths[j])) {
656 *buffer = L'\0' + j + MO_OFFSET;
657 return (1);
658 }
659
660 *buffer = L'\0' + MO_NONE;
661
662 val = field_convert_alpha_wide(F, L, delimiter, data_offset,
663 data_length, coll_offset + sizeof (wchar_t));
664
665 if (val < 0)
666 return (-1);
667 else
668 return (val + 1);
669 }
670
671 /*
672 * field_convert_alpha() always fails with return value -1 if the converted
673 * string would cause l_collate_length to exceed l_collate_bufsize
674 */
675 /*ARGSUSED*/
676 ssize_t
field_convert_alpha(field_t * F,line_rec_t * L,vchar_t delimiter,ssize_t data_offset,ssize_t data_length,ssize_t coll_offset)677 field_convert_alpha(field_t *F, line_rec_t *L, vchar_t delimiter,
678 ssize_t data_offset, ssize_t data_length, ssize_t coll_offset)
679 {
680 static char *compose;
681 static ssize_t compose_length;
682
683 ssize_t clength = 0;
684 ssize_t dlength;
685 ssize_t i;
686
687 if (compose_length < (data_length + 1)) {
688 compose_length = data_length + 1;
689 compose = safe_realloc(compose, compose_length * sizeof (char));
690 }
691
692 for (i = data_offset; i < data_offset + data_length; i++) {
693 char t = (L->l_data.sp)[i];
694
695 if ((F->f_options & FIELD_IGNORE_NONPRINTABLES) &&
696 !isprint((uchar_t)t))
697 continue;
698
699 if ((F->f_options & FIELD_DICTIONARY_ORDER) &&
700 !isalnum((uchar_t)t) && !isspace((uchar_t)t))
701 continue;
702
703 if (F->f_options & FIELD_FOLD_UPPERCASE)
704 t = toupper(t);
705
706 compose[clength++] = t;
707 }
708 compose[clength] = '\0';
709
710 if ((dlength = xfrm_ops->sx_len(compose, clength)) <
711 L->l_collate_bufsize - coll_offset)
712 return (xfrm_ops->sx_xfrm(L->l_collate.sp + coll_offset,
713 compose, dlength + 1));
714 else
715 return ((ssize_t)-1);
716 }
717
718 /*ARGSUSED*/
719 ssize_t
field_convert_alpha_simple(field_t * F,line_rec_t * L,vchar_t delimiter,ssize_t data_offset,ssize_t data_length,ssize_t coll_offset)720 field_convert_alpha_simple(field_t *F, line_rec_t *L, vchar_t delimiter,
721 ssize_t data_offset, ssize_t data_length, ssize_t coll_offset)
722 {
723 static char *compose;
724 static ssize_t compose_length;
725
726 ssize_t clength;
727 ssize_t dlength;
728
729 if (compose_length < (data_length + 1)) {
730 compose_length = data_length + 1;
731 compose = safe_realloc(compose, compose_length * sizeof (char));
732 }
733
734 (void) memcpy(compose, L->l_data.sp + data_offset, data_length);
735 clength = data_length;
736 compose[clength] = '\0';
737
738 if ((dlength = xfrm_ops->sx_len(compose, clength)) <
739 L->l_collate_bufsize - coll_offset)
740 return (xfrm_ops->sx_xfrm(L->l_collate.sp + coll_offset,
741 compose, dlength + 1));
742 else
743 return ((ssize_t)-1);
744 }
745
746 /*ARGSUSED*/
747 ssize_t
field_convert_alpha_wide(field_t * F,line_rec_t * L,vchar_t delimiter,ssize_t data_offset,ssize_t data_length,ssize_t coll_offset)748 field_convert_alpha_wide(field_t *F, line_rec_t *L, vchar_t delimiter,
749 ssize_t data_offset, ssize_t data_length, ssize_t coll_offset)
750 {
751 wchar_t *compose = safe_realloc(NULL, (data_length + 1) *
752 sizeof (wchar_t));
753 ssize_t clength = 0;
754 ssize_t dlength;
755 ssize_t i;
756 ssize_t ret;
757
758 for (i = data_offset; i < data_offset + data_length; i++) {
759 wchar_t t = (L->l_data.wp)[i];
760
761 if ((F->f_options & FIELD_IGNORE_NONPRINTABLES) && !iswprint(t))
762 continue;
763
764 if ((F->f_options & FIELD_DICTIONARY_ORDER) && !iswalnum(t) &&
765 !iswspace(t))
766 continue;
767
768 if (F->f_options & FIELD_FOLD_UPPERCASE)
769 t = towupper(t);
770
771 compose[clength++] = t;
772 }
773 compose[clength] = L'\0';
774
775 dlength = wcsxfrm(NULL, compose, (size_t)0);
776 if ((dlength * sizeof (wchar_t)) < L->l_collate_bufsize -
777 coll_offset * sizeof (wchar_t)) {
778 ret = (ssize_t)wcsxfrm(L->l_collate.wp + coll_offset, compose,
779 (size_t)dlength + 1);
780 } else {
781 ret = (ssize_t)-1;
782 }
783
784 safe_free(compose);
785
786 return (ret);
787 }
788
789 /*
790 * field_convert_numeric() converts the given field into a collatable numerical
791 * sequence. The sequence is ordered as { log, integer, separator, fraction },
792 * with an optional sentinel component at the sequence end.
793 */
794 /*ARGSUSED*/
795 ssize_t
field_convert_numeric(field_t * F,line_rec_t * L,vchar_t delimiter,ssize_t data_offset,ssize_t data_length,ssize_t coll_offset)796 field_convert_numeric(field_t *F, line_rec_t *L, vchar_t delimiter,
797 ssize_t data_offset, ssize_t data_length, ssize_t coll_offset)
798 {
799 char *number;
800 char *buffer = L->l_collate.sp + coll_offset;
801 ssize_t length;
802
803 char sign = '2';
804 int log_ten;
805 char *digits = buffer + 1 + sizeof (int) / sizeof (char);
806 size_t j = 0;
807 size_t i;
808
809 int state = BEFORE_NUMBER;
810
811 number = L->l_data.sp + data_offset;
812 length = data_length;
813
814 /*
815 * Eat leading blanks, if any.
816 */
817 for (i = 0; i < length; i++)
818 if (!IS_BLANK(number[i]))
819 break;
820
821 /*
822 * Test that there is sufficient size in the collation buffer for our
823 * number. In addition to the possible remaining characters in the
824 * field, we also require space for the sign (char), logarithm (int),
825 * separator (char), and as many as two string terminators (for reverse
826 * sorts).
827 */
828 if (((length - i) + 4 * sizeof (char) + sizeof (int)) >
829 (L->l_collate_bufsize - coll_offset))
830 return ((ssize_t)-1);
831
832 /*
833 * If negative, set sign.
834 */
835 if (number[i] == '-') {
836 i++;
837 sign = '0';
838 }
839
840 /*
841 * Scan integer part; eat leading zeros.
842 */
843 for (; i < length; i++) {
844 if (IS_SEPARATOR(number[i]))
845 continue;
846
847 if (number[i] == '0' && !(state & IN_NUMBER))
848 continue;
849
850 if (!isdigit((uchar_t)number[i]))
851 break;
852
853 state |= IN_NUMBER;
854 if (sign == '0')
855 digits[j++] = '0' + '9' - number[i];
856 else
857 digits[j++] = number[i];
858 }
859
860 if (i < length && IS_DECIMAL(number[i])) {
861 /*
862 * Integer part terminated by decimal.
863 */
864 digits[j] = DECIMAL_CHAR;
865 log_ten = j++;
866
867 /*
868 * Scan fractional part.
869 */
870 for (++i; i < length; i++) {
871 if (IS_SEPARATOR(number[i]))
872 continue;
873
874 if (!isdigit((uchar_t)number[i]))
875 break;
876
877 if (number[i] != '0')
878 state |= IN_NUMBER;
879
880 if (sign == '0')
881 digits[j++] = '0' + '9' - number[i];
882 else
883 digits[j++] = number[i];
884 }
885
886 if (sign == '0')
887 digits[j++] = (char)(UCHAR_MAX - INTERFIELD_SEPARATOR);
888 } else {
889 /*
890 * Nondigit or end of string seen.
891 */
892 log_ten = (int)j;
893 if (sign == '0')
894 digits[j++] = (char)(UCHAR_MAX - INTERFIELD_SEPARATOR);
895 else
896 digits[j] = INTERFIELD_SEPARATOR;
897 }
898
899 if ((state & IN_NUMBER) == 0) {
900 /*
901 * A non-zero number was not detected; treat as defined zero.
902 */
903 sign = '1';
904 log_ten = 0;
905 digits[0] = '0';
906 j = 1;
907 }
908
909 /*
910 * We subtract a constant from the log of negative values so that
911 * they will correctly precede positive values with a zero logarithm.
912 */
913 if (sign == '0') {
914 if (j != 0)
915 log_ten = -log_ten - 2;
916 else
917 /*
918 * Special case for -0.
919 */
920 log_ten = -1;
921 }
922
923 buffer[0] = sign;
924
925 /*
926 * Place logarithm in big-endian form.
927 */
928 for (i = 0; i < sizeof (int); i++)
929 buffer[i + 1] = (log_ten << (i * NBBY))
930 >> ((sizeof (int) - 1) * NBBY);
931
932 if (j + sizeof (char) + sizeof (int) <
933 L->l_collate_bufsize - coll_offset)
934 return (j + 1 + sizeof (int));
935 else
936 return ((ssize_t)-1);
937 }
938
939 /*ARGSUSED*/
940 ssize_t
field_convert_numeric_wide(field_t * F,line_rec_t * L,vchar_t delimiter,ssize_t data_offset,ssize_t data_length,ssize_t coll_offset)941 field_convert_numeric_wide(field_t *F, line_rec_t *L, vchar_t delimiter,
942 ssize_t data_offset, ssize_t data_length, ssize_t coll_offset)
943 {
944 wchar_t *number;
945 wchar_t *buffer = L->l_collate.wp + coll_offset;
946 char *lbuffer;
947 ssize_t length;
948
949 wchar_t sign = L'2';
950 int log_ten;
951 wchar_t *digits = buffer + 1 + sizeof (int)/sizeof (wchar_t);
952 size_t j = 0;
953 size_t i;
954
955 int state = BEFORE_NUMBER;
956
957 number = L->l_data.wp + data_offset;
958 length = data_length;
959
960 for (i = 0; i < length; i++)
961 if (!W_IS_BLANK(number[i]))
962 break;
963
964 if (((length - i) * sizeof (wchar_t) + 4 * sizeof (wchar_t) +
965 sizeof (int)) > (L->l_collate_bufsize - coll_offset))
966 return ((ssize_t)-1);
967
968 if (number[i] == L'-') {
969 i++;
970 sign = L'0';
971 }
972
973 for (; i < length; i++) {
974 if (W_IS_SEPARATOR(number[i]))
975 continue;
976
977 if (number[i] == L'0' && !(state & IN_NUMBER))
978 continue;
979
980 if (!iswdigit(number[i]))
981 break;
982
983 state |= IN_NUMBER;
984 if (sign == L'0')
985 digits[j++] = L'0' + L'9' - number[i];
986 else
987 digits[j++] = number[i];
988 }
989
990 if (i < length && W_IS_DECIMAL(number[i])) {
991 digits[j] = W_DECIMAL_CHAR;
992 log_ten = j++;
993
994 for (++i; i < length; i++) {
995 if (W_IS_SEPARATOR(number[i]))
996 continue;
997
998 if (!iswdigit(number[i]))
999 break;
1000
1001 if (number[i] != L'0')
1002 state |= IN_NUMBER;
1003
1004 if (sign == L'0')
1005 digits[j++] = L'0' + L'9' - number[i];
1006 else
1007 digits[j++] = number[i];
1008 }
1009
1010 if (sign == L'0')
1011 digits[j++] = (wchar_t)(WCHAR_MAX -
1012 W_INTERFIELD_SEPARATOR);
1013 } else {
1014 log_ten = (int)j;
1015 if (sign == L'0')
1016 digits[j++] = (wchar_t)(WCHAR_MAX -
1017 W_INTERFIELD_SEPARATOR);
1018 else
1019 digits[j] = W_INTERFIELD_SEPARATOR;
1020 }
1021
1022 if ((state & IN_NUMBER) == 0) {
1023 sign = L'1';
1024 log_ten = 0;
1025 digits[0] = L'0';
1026 j = 1;
1027 }
1028
1029 if (sign == L'0') {
1030 if (j != 0)
1031 log_ten = -log_ten - 2;
1032 else
1033 log_ten = -1;
1034 }
1035
1036 buffer[0] = sign;
1037 /*
1038 * Place logarithm in big-endian form.
1039 */
1040 lbuffer = (char *)(buffer + 1);
1041 for (i = 0; i < sizeof (int); i++)
1042 lbuffer[i] = (log_ten << (i * NBBY))
1043 >> ((sizeof (int) - 1) * NBBY);
1044
1045 if ((j + 1 + sizeof (int)/sizeof (wchar_t)) * sizeof (wchar_t) <
1046 L->l_collate_bufsize - coll_offset * sizeof (wchar_t))
1047 return (j + 1 + sizeof (int) / sizeof (wchar_t));
1048 else
1049 return ((ssize_t)-1);
1050 }
1051
1052 /*
1053 * flags contains one of CV_REALLOC, CV_FAIL, specifying the preferred behaviour
1054 * when coll_offset exceeds l_collate_bufsize.
1055 */
1056 ssize_t
field_convert(field_t * F,line_rec_t * L,int flags,vchar_t field_separator)1057 field_convert(field_t *F, line_rec_t *L, int flags, vchar_t field_separator)
1058 {
1059 ssize_t coll_offset = 0;
1060 ssize_t start, end, distance;
1061 field_t *cur_fieldp = F;
1062
1063 while (cur_fieldp != NULL) {
1064 /*
1065 * delimit field
1066 */
1067 if (!field_separator.sc)
1068 field_delimit(cur_fieldp, L, &start, &end);
1069 else
1070 field_delimit_tabbed(cur_fieldp, L, &start, &end,
1071 field_separator);
1072
1073 distance = 0;
1074 if (end - start > 0 ||
1075 (end - start == 0 && F->f_species == NUMERIC)) {
1076 /*
1077 * Convert field, appending to collated field of line
1078 * record.
1079 */
1080 distance = cur_fieldp->f_convert(cur_fieldp, L,
1081 field_separator, start, end - start, coll_offset);
1082
1083 /*
1084 * branch should execute comparatively rarely
1085 */
1086 if (distance == -1) {
1087 if (flags & FCV_REALLOC) {
1088 ASSERT(L->l_collate_bufsize > 0);
1089 L->l_collate_bufsize *= 2;
1090 L->l_collate.sp =
1091 safe_realloc(L->l_collate.sp,
1092 L->l_collate_bufsize);
1093
1094 __S(stats_incr_convert_reallocs());
1095 continue;
1096 } else {
1097 /*
1098 * FCV_FAIL has been set.
1099 */
1100 return (-1);
1101 }
1102 }
1103 }
1104
1105 if (cur_fieldp->f_options & FIELD_REVERSE_COMPARISONS) {
1106 xstrninv(L->l_collate.sp, coll_offset, distance);
1107 *(L->l_collate.sp + coll_offset + distance) =
1108 (char)(UCHAR_MAX - INTERFIELD_SEPARATOR);
1109 distance++;
1110 }
1111
1112 ASSERT(distance >= 0);
1113 coll_offset += distance;
1114 if (coll_offset >= L->l_collate_bufsize) {
1115 if (flags & FCV_REALLOC) {
1116 ASSERT(L->l_collate_bufsize > 0);
1117 L->l_collate_bufsize *= 2;
1118 L->l_collate.sp = safe_realloc(L->l_collate.sp,
1119 L->l_collate_bufsize);
1120
1121 __S(stats_incr_convert_reallocs());
1122 } else {
1123 return (-1);
1124 }
1125 }
1126 *(L->l_collate.sp + coll_offset) = INTERFIELD_SEPARATOR;
1127 coll_offset++;
1128
1129 cur_fieldp = cur_fieldp->f_next;
1130 }
1131
1132 L->l_collate_length = coll_offset;
1133
1134 return (L->l_collate_length);
1135 }
1136
1137 ssize_t
field_convert_wide(field_t * F,line_rec_t * L,int flags,vchar_t field_separator)1138 field_convert_wide(field_t *F, line_rec_t *L, int flags,
1139 vchar_t field_separator)
1140 {
1141 ssize_t coll_offset = 0;
1142 ssize_t start, end, distance;
1143 field_t *cur_fieldp = F;
1144
1145 while (cur_fieldp != NULL) {
1146 if (!field_separator.wc)
1147 field_delimit_wide(cur_fieldp, L, &start, &end);
1148 else
1149 field_delimit_tabbed_wide(cur_fieldp, L, &start, &end,
1150 field_separator);
1151
1152 distance = 0;
1153 if (end - start > 0 ||
1154 end - start == 0 && F->f_species == NUMERIC) {
1155 distance = cur_fieldp->f_convert(cur_fieldp, L,
1156 field_separator, start, end - start, coll_offset);
1157
1158 if (distance == -1) {
1159 if (flags & FCV_REALLOC) {
1160 ASSERT(L->l_collate_bufsize > 0);
1161 L->l_collate_bufsize *= 2;
1162 L->l_collate.wp = safe_realloc(
1163 L->l_collate.wp,
1164 L->l_collate_bufsize);
1165
1166 __S(stats_incr_convert_reallocs());
1167 continue;
1168 } else {
1169 return (-1);
1170 }
1171 }
1172 }
1173
1174 if (cur_fieldp->f_options & FIELD_REVERSE_COMPARISONS) {
1175 xwcsninv(L->l_collate.wp, coll_offset, distance);
1176 *(L->l_collate.wp + coll_offset + distance) =
1177 WCHAR_MAX - INTERFIELD_SEPARATOR;
1178 distance++;
1179 }
1180
1181 ASSERT(distance >= 0);
1182 coll_offset += distance;
1183 if (coll_offset * sizeof (wchar_t) >= L->l_collate_bufsize) {
1184 if (flags & FCV_REALLOC) {
1185 ASSERT(L->l_collate_bufsize > 0);
1186 L->l_collate_bufsize *= 2;
1187 L->l_collate.wp = safe_realloc(L->l_collate.wp,
1188 L->l_collate_bufsize);
1189
1190 __S(stats_incr_convert_reallocs());
1191 } else {
1192 return (-1);
1193 }
1194 }
1195 *(L->l_collate.wp + coll_offset) = W_INTERFIELD_SEPARATOR;
1196 coll_offset++;
1197
1198 cur_fieldp = cur_fieldp->f_next;
1199 }
1200
1201 L->l_collate_length = coll_offset * sizeof (wchar_t);
1202 #ifdef _LITTLE_ENDIAN
1203 xwcsntomsb(L->l_collate.wp, coll_offset);
1204 #endif /* _LITTLE_ENDIAN */
1205
1206 return (L->l_collate_length);
1207 }
1208
1209 /*
1210 * line_convert() and line_convert_wide() are called when the collation vector
1211 * of a given line has been exhausted, and we are performing the final,
1212 * full-line comparison required by the sort specification. Because we do not
1213 * have a guarantee that l_data is null-terminated, we create an explicitly
1214 * null-terminated copy suitable for transformation to a collatable form for the
1215 * current locale.
1216 */
1217 static void
line_convert(line_rec_t * L)1218 line_convert(line_rec_t *L)
1219 {
1220 static ssize_t bufsize;
1221 static char *buffer;
1222
1223 if (L->l_raw_collate.sp != NULL)
1224 return;
1225
1226 if (L->l_data_length + 1 > bufsize) {
1227 buffer = safe_realloc(buffer, L->l_data_length + 1);
1228 bufsize = L->l_data_length + 1;
1229 }
1230
1231 (void) strncpy(buffer, L->l_data.sp, L->l_data_length);
1232 buffer[L->l_data_length] = '\0';
1233
1234 L->l_raw_collate.sp = safe_realloc(L->l_raw_collate.sp,
1235 xfrm_ops->sx_len(buffer, L->l_data_length) + 1);
1236 xfrm_ops->sx_xfrm(L->l_raw_collate.sp, buffer,
1237 xfrm_ops->sx_len(buffer, L->l_data_length) + 1);
1238
1239 __S(stats_incr_line_conversions());
1240 }
1241
1242 static void
line_convert_wide(line_rec_t * L)1243 line_convert_wide(line_rec_t *L)
1244 {
1245 static wchar_t *buffer;
1246 static ssize_t bufsize;
1247
1248 ssize_t dlength;
1249
1250 if (L->l_raw_collate.wp != NULL)
1251 return;
1252
1253 if (L->l_data_length + 1 > bufsize) {
1254 buffer = safe_realloc(buffer, (L->l_data_length + 1) *
1255 sizeof (wchar_t));
1256 bufsize = L->l_data_length + 1;
1257 }
1258
1259 (void) wcsncpy(buffer, L->l_data.wp, L->l_data_length);
1260 buffer[L->l_data_length] = L'\0';
1261
1262 dlength = wcsxfrm(NULL, buffer, 0) + 1;
1263 L->l_raw_collate.wp = safe_realloc(L->l_raw_collate.wp, dlength *
1264 sizeof (wchar_t));
1265 (void) wcsxfrm(L->l_raw_collate.wp, buffer, dlength);
1266
1267 __S(stats_incr_line_conversions());
1268 }
1269
1270 /*
1271 * Our convention for collation is
1272 *
1273 * A > B => r > 0,
1274 * A == B => r = 0,
1275 * A < B => r < 0
1276 *
1277 * This convention is consistent with the definition of memcmp(), strcmp(), and
1278 * strncmp() in the C locale. collated() and collated_wide() have two optional
1279 * behaviours, which can be activated by setting the appropriate values in
1280 * coll_flag: COLL_UNIQUE, which returns 0 if the l_collate fields of the line
1281 * records being compared are identical; COLL_DATA_ONLY, which ignores the
1282 * l_collate field for the current comparison; and COLL_REVERSE, which flips the
1283 * result for comparisons that fall through to an actual data comparison (since
1284 * the collated vector should already reflect reverse ordering from field
1285 * conversion).
1286 */
1287 int
collated(line_rec_t * A,line_rec_t * B,ssize_t depth,flag_t coll_flag)1288 collated(line_rec_t *A, line_rec_t *B, ssize_t depth, flag_t coll_flag)
1289 {
1290 ssize_t ml = MIN(A->l_collate_length, B->l_collate_length) - depth;
1291 int r;
1292 int mask = (coll_flag & COLL_REVERSE) ? INT_SIGN_FLIP_MASK :
1293 INT_SIGN_PASS_MASK;
1294 ssize_t la, lb;
1295
1296 if (!(coll_flag & COLL_DATA_ONLY)) {
1297 if (ml > 0) {
1298 r = memcmp(A->l_collate.sp + depth,
1299 B->l_collate.sp + depth, ml);
1300
1301 if (r)
1302 return (r);
1303 }
1304
1305 if (A->l_collate_length < B->l_collate_length)
1306 return (-1);
1307
1308 if (A->l_collate_length > B->l_collate_length)
1309 return (1);
1310 }
1311
1312 /*
1313 * This is where we cut out, if we know that the current sort is over
1314 * the entire line.
1315 */
1316 if (coll_flag & COLL_UNIQUE)
1317 return (0);
1318
1319 line_convert(A);
1320 line_convert(B);
1321
1322 la = strlen(A->l_raw_collate.sp);
1323 lb = strlen(B->l_raw_collate.sp);
1324
1325 r = memcmp(A->l_raw_collate.sp, B->l_raw_collate.sp, MIN(la, lb));
1326
1327 if (r)
1328 return (r ^ mask);
1329
1330 if (la < lb)
1331 return (-1 ^ mask);
1332
1333 if (la > lb)
1334 return (1 ^ mask);
1335
1336 return (0);
1337 }
1338
1339 int
collated_wide(line_rec_t * A,line_rec_t * B,ssize_t depth,flag_t coll_flag)1340 collated_wide(line_rec_t *A, line_rec_t *B, ssize_t depth, flag_t coll_flag)
1341 {
1342 ssize_t ml = MIN(A->l_collate_length, B->l_collate_length) - depth;
1343 int r;
1344 int mask = (coll_flag & COLL_REVERSE) ? INT_SIGN_FLIP_MASK :
1345 INT_SIGN_PASS_MASK;
1346 ssize_t la, lb;
1347
1348 if (!(coll_flag & COLL_DATA_ONLY)) {
1349 if (ml > 0) {
1350 r = memcmp(A->l_collate.sp + depth,
1351 B->l_collate.sp + depth, ml);
1352
1353 if (r)
1354 return (r);
1355 }
1356 if (A->l_collate_length < B->l_collate_length)
1357 return (-1);
1358
1359 if (A->l_collate_length > B->l_collate_length)
1360 return (1);
1361 }
1362
1363 if (coll_flag & COLL_UNIQUE)
1364 return (0);
1365
1366 line_convert_wide(A);
1367 line_convert_wide(B);
1368
1369 la = wcslen(A->l_raw_collate.wp);
1370 lb = wcslen(B->l_raw_collate.wp);
1371
1372 r = wmemcmp(A->l_raw_collate.wp, B->l_raw_collate.wp,
1373 (size_t)MIN(la, lb));
1374
1375 if (r)
1376 return (r ^ mask);
1377
1378 if (la < lb)
1379 return (-1 ^ mask);
1380
1381 if (la > lb)
1382 return (1 ^ mask);
1383
1384 return (0);
1385 }
1386