xref: /freebsd/usr.bin/sort/radixsort.c (revision 78cd75393ec79565c63927bf200f06f839a1dc05)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause
3  *
4  * Copyright (C) 2012 Oleg Moskalenko <mom040267@gmail.com>
5  * Copyright (C) 2012 Gabor Kovesdan <gabor@FreeBSD.org>
6  * All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27  * SUCH DAMAGE.
28  */
29 
30 #include <sys/cdefs.h>
31 #include <errno.h>
32 #include <err.h>
33 #include <langinfo.h>
34 #include <math.h>
35 #if defined(SORT_THREADS)
36 #include <pthread.h>
37 #include <semaphore.h>
38 #endif
39 #include <stdlib.h>
40 #include <string.h>
41 #include <wchar.h>
42 #include <wctype.h>
43 #include <unistd.h>
44 
45 #include "coll.h"
46 #include "radixsort.h"
47 
48 #define DEFAULT_SORT_FUNC_RADIXSORT mergesort
49 
50 #define TINY_NODE(sl) ((sl)->tosort_num < 65)
51 #define SMALL_NODE(sl) ((sl)->tosort_num < 5)
52 
53 /* are we sorting in reverse order ? */
54 static bool reverse_sort;
55 
56 /* sort sub-levels array size */
57 static const size_t slsz = 256 * sizeof(struct sort_level*);
58 
59 /* one sort level structure */
60 struct sort_level
61 {
62 	struct sort_level	**sublevels;
63 	struct sort_list_item	**leaves;
64 	struct sort_list_item	**sorted;
65 	struct sort_list_item	**tosort;
66 	size_t			  leaves_num;
67 	size_t			  leaves_sz;
68 	size_t			  level;
69 	size_t			  real_sln;
70 	size_t			  start_position;
71 	size_t			  sln;
72 	size_t			  tosort_num;
73 	size_t			  tosort_sz;
74 };
75 
76 /* stack of sort levels ready to be sorted */
77 struct level_stack {
78 	struct level_stack	 *next;
79 	struct sort_level	 *sl;
80 };
81 
82 static struct level_stack *g_ls;
83 
84 #if defined(SORT_THREADS)
85 /* stack guarding mutex */
86 static pthread_cond_t g_ls_cond;
87 static pthread_mutex_t g_ls_mutex;
88 
89 /* counter: how many items are left */
90 static size_t sort_left;
91 /* guarding mutex */
92 
93 /* semaphore to count threads */
94 static sem_t mtsem;
95 
96 /*
97  * Decrement items counter
98  */
99 static inline void
100 sort_left_dec(size_t n)
101 {
102 	pthread_mutex_lock(&g_ls_mutex);
103 	sort_left -= n;
104 	if (sort_left == 0 && nthreads > 1)
105 		pthread_cond_broadcast(&g_ls_cond);
106 	pthread_mutex_unlock(&g_ls_mutex);
107 }
108 
109 /*
110  * Do we have something to sort ?
111  *
112  * This routine does not need to be locked.
113  */
114 static inline bool
115 have_sort_left(void)
116 {
117 	bool ret;
118 
119 	ret = (sort_left > 0);
120 
121 	return (ret);
122 }
123 
124 #else
125 
126 #define sort_left_dec(n)
127 
128 #endif /* SORT_THREADS */
129 
130 static void
131 _push_ls(struct level_stack *ls)
132 {
133 
134 	ls->next = g_ls;
135 	g_ls = ls;
136 }
137 
138 /*
139  * Push sort level to the stack
140  */
141 static inline void
142 push_ls(struct sort_level *sl)
143 {
144 	struct level_stack *new_ls;
145 
146 	new_ls = sort_malloc(sizeof(struct level_stack));
147 	new_ls->sl = sl;
148 
149 #if defined(SORT_THREADS)
150 	if (nthreads > 1) {
151 		pthread_mutex_lock(&g_ls_mutex);
152 		_push_ls(new_ls);
153 		pthread_cond_signal(&g_ls_cond);
154 		pthread_mutex_unlock(&g_ls_mutex);
155 	} else
156 #endif
157 		_push_ls(new_ls);
158 }
159 
160 /*
161  * Pop sort level from the stack (single-threaded style)
162  */
163 static inline struct sort_level*
164 pop_ls_st(void)
165 {
166 	struct sort_level *sl;
167 
168 	if (g_ls) {
169 		struct level_stack *saved_ls;
170 
171 		sl = g_ls->sl;
172 		saved_ls = g_ls;
173 		g_ls = g_ls->next;
174 		sort_free(saved_ls);
175 	} else
176 		sl = NULL;
177 
178 	return (sl);
179 }
180 
181 #if defined(SORT_THREADS)
182 
183 /*
184  * Pop sort level from the stack (multi-threaded style)
185  */
186 static inline struct sort_level*
187 pop_ls_mt(void)
188 {
189 	struct level_stack *saved_ls;
190 	struct sort_level *sl;
191 
192 	pthread_mutex_lock(&g_ls_mutex);
193 
194 	for (;;) {
195 		if (g_ls) {
196 			sl = g_ls->sl;
197 			saved_ls = g_ls;
198 			g_ls = g_ls->next;
199 			break;
200 		}
201 		sl = NULL;
202 		saved_ls = NULL;
203 
204 		if (have_sort_left() == 0)
205 			break;
206 		pthread_cond_wait(&g_ls_cond, &g_ls_mutex);
207 	}
208 
209 	pthread_mutex_unlock(&g_ls_mutex);
210 
211 	sort_free(saved_ls);
212 
213 	return (sl);
214 }
215 
216 #endif /* defined(SORT_THREADS) */
217 
218 static void
219 add_to_sublevel(struct sort_level *sl, struct sort_list_item *item, size_t indx)
220 {
221 	struct sort_level *ssl;
222 
223 	ssl = sl->sublevels[indx];
224 
225 	if (ssl == NULL) {
226 		ssl = sort_calloc(1, sizeof(struct sort_level));
227 
228 		ssl->level = sl->level + 1;
229 		sl->sublevels[indx] = ssl;
230 
231 		++(sl->real_sln);
232 	}
233 
234 	if (++(ssl->tosort_num) > ssl->tosort_sz) {
235 		ssl->tosort_sz = ssl->tosort_num + 128;
236 		ssl->tosort = sort_realloc(ssl->tosort,
237 		    sizeof(struct sort_list_item*) * (ssl->tosort_sz));
238 	}
239 
240 	ssl->tosort[ssl->tosort_num - 1] = item;
241 }
242 
243 static inline void
244 add_leaf(struct sort_level *sl, struct sort_list_item *item)
245 {
246 
247 	if (++(sl->leaves_num) > sl->leaves_sz) {
248 		sl->leaves_sz = sl->leaves_num + 128;
249 		sl->leaves = sort_realloc(sl->leaves,
250 		    (sizeof(struct sort_list_item*) * (sl->leaves_sz)));
251 	}
252 	sl->leaves[sl->leaves_num - 1] = item;
253 }
254 
255 static inline int
256 get_wc_index(struct sort_list_item *sli, size_t level)
257 {
258 	const size_t wcfact = (mb_cur_max == 1) ? 1 : sizeof(wchar_t);
259 	const struct key_value *kv;
260 	const struct bwstring *bws;
261 
262 	kv = get_key_from_keys_array(&sli->ka, 0);
263 	bws = kv->k;
264 
265 	if ((BWSLEN(bws) * wcfact > level)) {
266 		wchar_t res;
267 
268 		/*
269 		 * Sort wchar strings a byte at a time, rather than a single
270 		 * byte from each wchar.
271 		 */
272 		res = (wchar_t)BWS_GET(bws, level / wcfact);
273 		/* Sort most-significant byte first. */
274 		if (level % wcfact < wcfact - 1)
275 			res = (res >> (8 * (wcfact - 1 - (level % wcfact))));
276 
277 		return (res & 0xff);
278 	}
279 
280 	return (-1);
281 }
282 
283 static void
284 place_item(struct sort_level *sl, size_t item)
285 {
286 	struct sort_list_item *sli;
287 	int c;
288 
289 	sli = sl->tosort[item];
290 	c = get_wc_index(sli, sl->level);
291 
292 	if (c == -1)
293 		add_leaf(sl, sli);
294 	else
295 		add_to_sublevel(sl, sli, c);
296 }
297 
298 static void
299 free_sort_level(struct sort_level *sl)
300 {
301 
302 	if (sl) {
303 		if (sl->leaves)
304 			sort_free(sl->leaves);
305 
306 		if (sl->level > 0)
307 			sort_free(sl->tosort);
308 
309 		if (sl->sublevels) {
310 			struct sort_level *slc;
311 			size_t sln;
312 
313 			sln = sl->sln;
314 
315 			for (size_t i = 0; i < sln; ++i) {
316 				slc = sl->sublevels[i];
317 				if (slc)
318 					free_sort_level(slc);
319 			}
320 
321 			sort_free(sl->sublevels);
322 		}
323 
324 		sort_free(sl);
325 	}
326 }
327 
328 static void
329 run_sort_level_next(struct sort_level *sl)
330 {
331 	const size_t wcfact = (mb_cur_max == 1) ? 1 : sizeof(wchar_t);
332 	struct sort_level *slc;
333 	size_t i, sln, tosort_num;
334 
335 	if (sl->sublevels) {
336 		sort_free(sl->sublevels);
337 		sl->sublevels = NULL;
338 	}
339 
340 	switch (sl->tosort_num) {
341 	case 0:
342 		goto end;
343 	case (1):
344 		sl->sorted[sl->start_position] = sl->tosort[0];
345 		sort_left_dec(1);
346 		goto end;
347 	case (2):
348 		/*
349 		 * Radixsort only processes a single byte at a time.  In wchar
350 		 * mode, this can be a subset of the length of a character.
351 		 * list_coll_offset() offset is in units of wchar, not bytes.
352 		 * So to calculate the offset, we must divide by
353 		 * sizeof(wchar_t) and round down to the index of the first
354 		 * character this level references.
355 		 */
356 		if (list_coll_offset(&(sl->tosort[0]), &(sl->tosort[1]),
357 		    sl->level / wcfact) > 0) {
358 			sl->sorted[sl->start_position++] = sl->tosort[1];
359 			sl->sorted[sl->start_position] = sl->tosort[0];
360 		} else {
361 			sl->sorted[sl->start_position++] = sl->tosort[0];
362 			sl->sorted[sl->start_position] = sl->tosort[1];
363 		}
364 		sort_left_dec(2);
365 
366 		goto end;
367 	default:
368 		if (TINY_NODE(sl) || (sl->level > 15)) {
369 			listcoll_t func;
370 
371 			/*
372 			 * Collate comparison offset is in units of
373 			 * character-width, so we must divide the level (bytes)
374 			 * by operating character width (wchar_t or char).  See
375 			 * longer comment above.
376 			 */
377 			func = get_list_call_func(sl->level / wcfact);
378 
379 			sl->leaves = sl->tosort;
380 			sl->leaves_num = sl->tosort_num;
381 			sl->leaves_sz = sl->leaves_num;
382 			sl->leaves = sort_realloc(sl->leaves,
383 			    (sizeof(struct sort_list_item *) *
384 			    (sl->leaves_sz)));
385 			sl->tosort = NULL;
386 			sl->tosort_num = 0;
387 			sl->tosort_sz = 0;
388 			sl->sln = 0;
389 			sl->real_sln = 0;
390 			if (sort_opts_vals.sflag) {
391 				if (mergesort(sl->leaves, sl->leaves_num,
392 				    sizeof(struct sort_list_item *),
393 				    (int(*)(const void *, const void *)) func) == -1)
394 					/* NOTREACHED */
395 					err(2, "Radix sort error 3");
396 			} else
397 				DEFAULT_SORT_FUNC_RADIXSORT(sl->leaves, sl->leaves_num,
398 				    sizeof(struct sort_list_item *),
399 				    (int(*)(const void *, const void *)) func);
400 
401 			memcpy(sl->sorted + sl->start_position,
402 			    sl->leaves, sl->leaves_num *
403 			    sizeof(struct sort_list_item*));
404 
405 			sort_left_dec(sl->leaves_num);
406 
407 			goto end;
408 		} else {
409 			sl->tosort_sz = sl->tosort_num;
410 			sl->tosort = sort_realloc(sl->tosort,
411 			    sizeof(struct sort_list_item*) * (sl->tosort_sz));
412 		}
413 	}
414 
415 	sl->sln = 256;
416 	sl->sublevels = sort_calloc(1, slsz);
417 
418 	sl->real_sln = 0;
419 
420 	tosort_num = sl->tosort_num;
421 	for (i = 0; i < tosort_num; ++i)
422 		place_item(sl, i);
423 
424 	sort_free(sl->tosort);
425 	sl->tosort = NULL;
426 	sl->tosort_num = 0;
427 	sl->tosort_sz = 0;
428 
429 	if (sl->leaves_num > 1) {
430 		if (keys_num > 1) {
431 			if (sort_opts_vals.sflag) {
432 				mergesort(sl->leaves, sl->leaves_num,
433 				    sizeof(struct sort_list_item *),
434 				    (int(*)(const void *, const void *)) list_coll);
435 			} else {
436 				DEFAULT_SORT_FUNC_RADIXSORT(sl->leaves, sl->leaves_num,
437 				    sizeof(struct sort_list_item *),
438 				    (int(*)(const void *, const void *)) list_coll);
439 			}
440 		} else if (!sort_opts_vals.sflag && sort_opts_vals.complex_sort) {
441 			DEFAULT_SORT_FUNC_RADIXSORT(sl->leaves, sl->leaves_num,
442 			    sizeof(struct sort_list_item *),
443 			    (int(*)(const void *, const void *)) list_coll_by_str_only);
444 		}
445 	}
446 
447 	sl->leaves_sz = sl->leaves_num;
448 	sl->leaves = sort_realloc(sl->leaves, (sizeof(struct sort_list_item *) *
449 	    (sl->leaves_sz)));
450 
451 	if (!reverse_sort) {
452 		memcpy(sl->sorted + sl->start_position, sl->leaves,
453 		    sl->leaves_num * sizeof(struct sort_list_item*));
454 		sl->start_position += sl->leaves_num;
455 		sort_left_dec(sl->leaves_num);
456 
457 		sort_free(sl->leaves);
458 		sl->leaves = NULL;
459 		sl->leaves_num = 0;
460 		sl->leaves_sz = 0;
461 
462 		sln = sl->sln;
463 
464 		for (i = 0; i < sln; ++i) {
465 			slc = sl->sublevels[i];
466 
467 			if (slc) {
468 				slc->sorted = sl->sorted;
469 				slc->start_position = sl->start_position;
470 				sl->start_position += slc->tosort_num;
471 				if (SMALL_NODE(slc))
472 					run_sort_level_next(slc);
473 				else
474 					push_ls(slc);
475 				sl->sublevels[i] = NULL;
476 			}
477 		}
478 
479 	} else {
480 		size_t n;
481 
482 		sln = sl->sln;
483 
484 		for (i = 0; i < sln; ++i) {
485 			n = sln - i - 1;
486 			slc = sl->sublevels[n];
487 
488 			if (slc) {
489 				slc->sorted = sl->sorted;
490 				slc->start_position = sl->start_position;
491 				sl->start_position += slc->tosort_num;
492 				if (SMALL_NODE(slc))
493 					run_sort_level_next(slc);
494 				else
495 					push_ls(slc);
496 				sl->sublevels[n] = NULL;
497 			}
498 		}
499 
500 		memcpy(sl->sorted + sl->start_position, sl->leaves,
501 		    sl->leaves_num * sizeof(struct sort_list_item*));
502 		sort_left_dec(sl->leaves_num);
503 	}
504 
505 end:
506 	free_sort_level(sl);
507 }
508 
509 /*
510  * Single-threaded sort cycle
511  */
512 static void
513 run_sort_cycle_st(void)
514 {
515 	struct sort_level *slc;
516 
517 	for (;;) {
518 		slc = pop_ls_st();
519 		if (slc == NULL) {
520 			break;
521 		}
522 		run_sort_level_next(slc);
523 	}
524 }
525 
526 #if defined(SORT_THREADS)
527 
528 /*
529  * Multi-threaded sort cycle
530  */
531 static void
532 run_sort_cycle_mt(void)
533 {
534 	struct sort_level *slc;
535 
536 	for (;;) {
537 		slc = pop_ls_mt();
538 		if (slc == NULL)
539 			break;
540 		run_sort_level_next(slc);
541 	}
542 }
543 
544 /*
545  * Sort cycle thread (in multi-threaded mode)
546  */
547 static void*
548 sort_thread(void* arg)
549 {
550 	run_sort_cycle_mt();
551 	sem_post(&mtsem);
552 
553 	return (arg);
554 }
555 
556 #endif /* defined(SORT_THREADS) */
557 
558 static void
559 run_top_sort_level(struct sort_level *sl)
560 {
561 	struct sort_level *slc;
562 
563 	reverse_sort = sort_opts_vals.kflag ? keys[0].sm.rflag :
564 	    default_sort_mods->rflag;
565 
566 	sl->start_position = 0;
567 	sl->sln = 256;
568 	sl->sublevels = sort_calloc(1, slsz);
569 
570 	for (size_t i = 0; i < sl->tosort_num; ++i)
571 		place_item(sl, i);
572 
573 	if (sl->leaves_num > 1) {
574 		if (keys_num > 1) {
575 			if (sort_opts_vals.sflag) {
576 				mergesort(sl->leaves, sl->leaves_num,
577 				    sizeof(struct sort_list_item *),
578 				    (int(*)(const void *, const void *)) list_coll);
579 			} else {
580 				DEFAULT_SORT_FUNC_RADIXSORT(sl->leaves, sl->leaves_num,
581 				    sizeof(struct sort_list_item *),
582 				    (int(*)(const void *, const void *)) list_coll);
583 			}
584 		} else if (!sort_opts_vals.sflag && sort_opts_vals.complex_sort) {
585 			DEFAULT_SORT_FUNC_RADIXSORT(sl->leaves, sl->leaves_num,
586 			    sizeof(struct sort_list_item *),
587 			    (int(*)(const void *, const void *)) list_coll_by_str_only);
588 		}
589 	}
590 
591 	if (!reverse_sort) {
592 		memcpy(sl->tosort + sl->start_position, sl->leaves,
593 		    sl->leaves_num * sizeof(struct sort_list_item*));
594 		sl->start_position += sl->leaves_num;
595 		sort_left_dec(sl->leaves_num);
596 
597 		for (size_t i = 0; i < sl->sln; ++i) {
598 			slc = sl->sublevels[i];
599 
600 			if (slc) {
601 				slc->sorted = sl->tosort;
602 				slc->start_position = sl->start_position;
603 				sl->start_position += slc->tosort_num;
604 				push_ls(slc);
605 				sl->sublevels[i] = NULL;
606 			}
607 		}
608 
609 	} else {
610 		size_t n;
611 
612 		for (size_t i = 0; i < sl->sln; ++i) {
613 
614 			n = sl->sln - i - 1;
615 			slc = sl->sublevels[n];
616 
617 			if (slc) {
618 				slc->sorted = sl->tosort;
619 				slc->start_position = sl->start_position;
620 				sl->start_position += slc->tosort_num;
621 				push_ls(slc);
622 				sl->sublevels[n] = NULL;
623 			}
624 		}
625 
626 		memcpy(sl->tosort + sl->start_position, sl->leaves,
627 		    sl->leaves_num * sizeof(struct sort_list_item*));
628 
629 		sort_left_dec(sl->leaves_num);
630 	}
631 
632 #if defined(SORT_THREADS)
633 	if (nthreads < 2) {
634 #endif
635 		run_sort_cycle_st();
636 #if defined(SORT_THREADS)
637 	} else {
638 		size_t i;
639 
640 		for(i = 0; i < nthreads; ++i) {
641 			pthread_attr_t attr;
642 			pthread_t pth;
643 
644 			pthread_attr_init(&attr);
645 			pthread_attr_setdetachstate(&attr, PTHREAD_DETACHED);
646 
647 			for (;;) {
648 				int res = pthread_create(&pth, &attr,
649 				    sort_thread, NULL);
650 				if (res >= 0)
651 					break;
652 				if (errno == EAGAIN) {
653 					pthread_yield();
654 					continue;
655 				}
656 				err(2, NULL);
657 			}
658 
659 			pthread_attr_destroy(&attr);
660 		}
661 
662 		for (i = 0; i < nthreads; ++i)
663 			sem_wait(&mtsem);
664 	}
665 #endif /* defined(SORT_THREADS) */
666 }
667 
668 static void
669 run_sort(struct sort_list_item **base, size_t nmemb)
670 {
671 	struct sort_level *sl;
672 
673 #if defined(SORT_THREADS)
674 	size_t nthreads_save = nthreads;
675 	if (nmemb < MT_SORT_THRESHOLD)
676 		nthreads = 1;
677 
678 	if (nthreads > 1) {
679 		pthread_mutexattr_t mattr;
680 
681 		pthread_mutexattr_init(&mattr);
682 		pthread_mutexattr_settype(&mattr, PTHREAD_MUTEX_ADAPTIVE_NP);
683 
684 		pthread_mutex_init(&g_ls_mutex, &mattr);
685 		pthread_cond_init(&g_ls_cond, NULL);
686 
687 		pthread_mutexattr_destroy(&mattr);
688 
689 		sem_init(&mtsem, 0, 0);
690 
691 	}
692 #endif
693 
694 	sl = sort_calloc(1, sizeof(struct sort_level));
695 
696 	sl->tosort = base;
697 	sl->tosort_num = nmemb;
698 	sl->tosort_sz = nmemb;
699 
700 #if defined(SORT_THREADS)
701 	sort_left = nmemb;
702 #endif
703 
704 	run_top_sort_level(sl);
705 
706 	free_sort_level(sl);
707 
708 #if defined(SORT_THREADS)
709 	if (nthreads > 1) {
710 		sem_destroy(&mtsem);
711 		pthread_mutex_destroy(&g_ls_mutex);
712 	}
713 	nthreads = nthreads_save;
714 #endif
715 }
716 
717 void
718 rxsort(struct sort_list_item **base, size_t nmemb)
719 {
720 
721 	run_sort(base, nmemb);
722 }
723