xref: /freebsd/lib/libc/string/strcoll.c (revision 2a6abeebef961038d455abfcfcda14c16aec5b52)
158f0484fSRodney W. Grimes /*-
2*2a6abeebSBaptiste Daroussin  * Copyright 2010 Nexenta Systems, Inc.  All rights reserved.
3a4d5d0cbSAndrey A. Chernov  * Copyright (c) 1995 Alex Tatmanjants <alex@elvisti.kiev.ua>
4a4d5d0cbSAndrey A. Chernov  *		at Electronni Visti IA, Kiev, Ukraine.
5a4d5d0cbSAndrey A. Chernov  *			All rights reserved.
658f0484fSRodney W. Grimes  *
73c87aa1dSDavid Chisnall  * Copyright (c) 2011 The FreeBSD Foundation
83c87aa1dSDavid Chisnall  * All rights reserved.
93c87aa1dSDavid Chisnall  * Portions of this software were developed by David Chisnall
103c87aa1dSDavid Chisnall  * under sponsorship from the FreeBSD Foundation.
113c87aa1dSDavid Chisnall  *
1258f0484fSRodney W. Grimes  * Redistribution and use in source and binary forms, with or without
1358f0484fSRodney W. Grimes  * modification, are permitted provided that the following conditions
1458f0484fSRodney W. Grimes  * are met:
1558f0484fSRodney W. Grimes  * 1. Redistributions of source code must retain the above copyright
1658f0484fSRodney W. Grimes  *    notice, this list of conditions and the following disclaimer.
1758f0484fSRodney W. Grimes  * 2. Redistributions in binary form must reproduce the above copyright
1858f0484fSRodney W. Grimes  *    notice, this list of conditions and the following disclaimer in the
1958f0484fSRodney W. Grimes  *    documentation and/or other materials provided with the distribution.
2058f0484fSRodney W. Grimes  *
21a4d5d0cbSAndrey A. Chernov  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND
2258f0484fSRodney W. Grimes  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
2358f0484fSRodney W. Grimes  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24a4d5d0cbSAndrey A. Chernov  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR BE LIABLE
2558f0484fSRodney W. Grimes  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
2658f0484fSRodney W. Grimes  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
2758f0484fSRodney W. Grimes  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
2858f0484fSRodney W. Grimes  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
2958f0484fSRodney W. Grimes  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
3058f0484fSRodney W. Grimes  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
3158f0484fSRodney W. Grimes  * SUCH DAMAGE.
3258f0484fSRodney W. Grimes  */
3358f0484fSRodney W. Grimes 
34de5fe5d5SDavid E. O'Brien #include <sys/cdefs.h>
35de5fe5d5SDavid E. O'Brien __FBSDID("$FreeBSD$");
365864b79cSDavid E. O'Brien 
37a4d5d0cbSAndrey A. Chernov #include <stdlib.h>
3858f0484fSRodney W. Grimes #include <string.h>
39*2a6abeebSBaptiste Daroussin #include <errno.h>
40*2a6abeebSBaptiste Daroussin #include <wchar.h>
41a4d5d0cbSAndrey A. Chernov #include "collate.h"
4258f0484fSRodney W. Grimes 
433c87aa1dSDavid Chisnall 
44*2a6abeebSBaptiste Daroussin /*
45*2a6abeebSBaptiste Daroussin  * In order to properly handle multibyte locales, its easiet to just
46*2a6abeebSBaptiste Daroussin  * convert to wide characters and then use wcscoll.  However if an
47*2a6abeebSBaptiste Daroussin  * error occurs, we gracefully fall back to simple strcmp.  Caller
48*2a6abeebSBaptiste Daroussin  * should check errno.
49*2a6abeebSBaptiste Daroussin  */
5058f0484fSRodney W. Grimes int
510f701093SEitan Adler strcoll_l(const char *s, const char *s2, locale_t locale)
5258f0484fSRodney W. Grimes {
53*2a6abeebSBaptiste Daroussin 	int ret;
54*2a6abeebSBaptiste Daroussin 	wchar_t *t1 = NULL, *t2 = NULL;
55*2a6abeebSBaptiste Daroussin 	wchar_t *w1 = NULL, *w2 = NULL;
56*2a6abeebSBaptiste Daroussin 	const char *cs1, *cs2;
57*2a6abeebSBaptiste Daroussin 	mbstate_t mbs1;
58*2a6abeebSBaptiste Daroussin 	mbstate_t mbs2;
59*2a6abeebSBaptiste Daroussin 	size_t sz1, sz2;
60*2a6abeebSBaptiste Daroussin 
61*2a6abeebSBaptiste Daroussin 	memset(&mbs1, 0, sizeof (mbstate_t));
62*2a6abeebSBaptiste Daroussin 	memset(&mbs2, 0, sizeof (mbstate_t));
63*2a6abeebSBaptiste Daroussin 
64*2a6abeebSBaptiste Daroussin 	/*
65*2a6abeebSBaptiste Daroussin 	 * The mbsrtowcs_l function can set the src pointer to null upon
66*2a6abeebSBaptiste Daroussin 	 * failure, so it should act on a copy to avoid:
67*2a6abeebSBaptiste Daroussin 	 *   - sending null pointer to strcmp
68*2a6abeebSBaptiste Daroussin 	 *   - having strcoll/strcoll_l change *s or *s2 to null
69*2a6abeebSBaptiste Daroussin 	 */
70*2a6abeebSBaptiste Daroussin 	cs1 = s;
71*2a6abeebSBaptiste Daroussin 	cs2 = s2;
72*2a6abeebSBaptiste Daroussin 
733c87aa1dSDavid Chisnall 	FIX_LOCALE(locale);
743c87aa1dSDavid Chisnall 	struct xlocale_collate *table =
753c87aa1dSDavid Chisnall 		(struct xlocale_collate*)locale->components[XLC_COLLATE];
76a4d5d0cbSAndrey A. Chernov 
773c87aa1dSDavid Chisnall 	if (table->__collate_load_error)
78*2a6abeebSBaptiste Daroussin 		goto error;
79a4d5d0cbSAndrey A. Chernov 
80*2a6abeebSBaptiste Daroussin 	sz1 = strlen(s) + 1;
81*2a6abeebSBaptiste Daroussin 	sz2 = strlen(s2) + 1;
82a4d5d0cbSAndrey A. Chernov 
83*2a6abeebSBaptiste Daroussin 	/*
84*2a6abeebSBaptiste Daroussin 	 * Simple assumption: conversion to wide format is strictly
85*2a6abeebSBaptiste Daroussin 	 * reducing, i.e. a single byte (or multibyte character)
86*2a6abeebSBaptiste Daroussin 	 * cannot result in multiple wide characters.
87*2a6abeebSBaptiste Daroussin 	 */
88*2a6abeebSBaptiste Daroussin 	if ((t1 = malloc(sz1 * sizeof (wchar_t))) == NULL)
89*2a6abeebSBaptiste Daroussin 		goto error;
90*2a6abeebSBaptiste Daroussin 	w1 = t1;
91*2a6abeebSBaptiste Daroussin 	if ((t2 = malloc(sz2 * sizeof (wchar_t))) == NULL)
92*2a6abeebSBaptiste Daroussin 		goto error;
93*2a6abeebSBaptiste Daroussin 	w2 = t2;
94*2a6abeebSBaptiste Daroussin 
95*2a6abeebSBaptiste Daroussin 	if ((mbsrtowcs_l(w1, &cs1, sz1, &mbs1, locale)) == (size_t)-1)
96*2a6abeebSBaptiste Daroussin 		goto error;
97*2a6abeebSBaptiste Daroussin 
98*2a6abeebSBaptiste Daroussin 	if ((mbsrtowcs_l(w2, &cs2, sz2, &mbs2, locale)) == (size_t)-1)
99*2a6abeebSBaptiste Daroussin 		goto error;
100*2a6abeebSBaptiste Daroussin 
101*2a6abeebSBaptiste Daroussin 	ret = wcscoll_l(w1, w2, locale);
102*2a6abeebSBaptiste Daroussin 	if (t1)
103*2a6abeebSBaptiste Daroussin 		free(t1);
104*2a6abeebSBaptiste Daroussin 	if (t2)
105*2a6abeebSBaptiste Daroussin 		free(t2);
106*2a6abeebSBaptiste Daroussin 
107*2a6abeebSBaptiste Daroussin 	return (ret);
108*2a6abeebSBaptiste Daroussin 
109*2a6abeebSBaptiste Daroussin error:
110*2a6abeebSBaptiste Daroussin 	if (t1)
111*2a6abeebSBaptiste Daroussin 		free(t1);
112*2a6abeebSBaptiste Daroussin 	if (t2)
113*2a6abeebSBaptiste Daroussin 		free(t2);
114*2a6abeebSBaptiste Daroussin 	return (strcmp(s, s2));
11558f0484fSRodney W. Grimes }
1163c87aa1dSDavid Chisnall 
1173c87aa1dSDavid Chisnall int
1180f701093SEitan Adler strcoll(const char *s, const char *s2)
1193c87aa1dSDavid Chisnall {
1200f701093SEitan Adler 	return strcoll_l(s, s2, __get_locale());
1213c87aa1dSDavid Chisnall }
1223c87aa1dSDavid Chisnall 
123