xref: /titanic_44/usr/src/lib/libc/port/locale/collate.h (revision 2d08521bd15501c8370ba2153b9cca4f094979d0)
14297a3b0SGarrett D'Amore /*
2*2d08521bSGarrett D'Amore  * Copyright 2013 Garrett D'Amore <garrett@damore.org>
36b5e5868SGarrett D'Amore  * Copyright 2010 Nexenta Systmes, Inc.  All rights reserved.
44297a3b0SGarrett D'Amore  * Copyright (c) 1995 Alex Tatmanjants <alex@elvisti.kiev.ua>
54297a3b0SGarrett D'Amore  *		at Electronni Visti IA, Kiev, Ukraine.
64297a3b0SGarrett D'Amore  *			All rights reserved.
74297a3b0SGarrett D'Amore  *
84297a3b0SGarrett D'Amore  * Redistribution and use in source and binary forms, with or without
94297a3b0SGarrett D'Amore  * modification, are permitted provided that the following conditions
104297a3b0SGarrett D'Amore  * are met:
114297a3b0SGarrett D'Amore  * 1. Redistributions of source code must retain the above copyright
124297a3b0SGarrett D'Amore  *    notice, this list of conditions and the following disclaimer.
134297a3b0SGarrett D'Amore  * 2. Redistributions in binary form must reproduce the above copyright
144297a3b0SGarrett D'Amore  *    notice, this list of conditions and the following disclaimer in the
154297a3b0SGarrett D'Amore  *    documentation and/or other materials provided with the distribution.
164297a3b0SGarrett D'Amore  *
174297a3b0SGarrett D'Amore  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND
184297a3b0SGarrett D'Amore  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
194297a3b0SGarrett D'Amore  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
204297a3b0SGarrett D'Amore  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR BE LIABLE
214297a3b0SGarrett D'Amore  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
224297a3b0SGarrett D'Amore  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
234297a3b0SGarrett D'Amore  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
244297a3b0SGarrett D'Amore  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
254297a3b0SGarrett D'Amore  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
264297a3b0SGarrett D'Amore  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
274297a3b0SGarrett D'Amore  * SUCH DAMAGE.
284297a3b0SGarrett D'Amore  */
294297a3b0SGarrett D'Amore 
304297a3b0SGarrett D'Amore #ifndef _COLLATE_H_
314297a3b0SGarrett D'Amore #define	_COLLATE_H_
324297a3b0SGarrett D'Amore 
334297a3b0SGarrett D'Amore #include <sys/types.h>
344297a3b0SGarrett D'Amore #include <limits.h>
35*2d08521bSGarrett D'Amore #include <locale.h>
36*2d08521bSGarrett D'Amore #include "localeimpl.h"
374297a3b0SGarrett D'Amore 
386b5e5868SGarrett D'Amore #define	COLLATE_STR_LEN		24		/* should be 64-bit multiple */
39723fee08SGarrett D'Amore #define	COLLATE_VERSION		"IllumosCollate2\n"
404297a3b0SGarrett D'Amore 
416b5e5868SGarrett D'Amore #define	COLLATE_MAX_PRIORITY	(0x7fffffff)	/* max signed value */
426b5e5868SGarrett D'Amore #define	COLLATE_SUBST_PRIORITY	(0x40000000)	/* bit indicates subst table */
434297a3b0SGarrett D'Amore 
446b5e5868SGarrett D'Amore #define	DIRECTIVE_UNDEF		0x00
456b5e5868SGarrett D'Amore #define	DIRECTIVE_FORWARD	0x01
466b5e5868SGarrett D'Amore #define	DIRECTIVE_BACKWARD	0x02
476b5e5868SGarrett D'Amore #define	DIRECTIVE_POSITION	0x04
486b5e5868SGarrett D'Amore #define	DIRECTIVE_UNDEFINED	0x08	/* special last weight for UNDEFINED */
494297a3b0SGarrett D'Amore 
506b5e5868SGarrett D'Amore #define	DIRECTIVE_DIRECTION_MASK (DIRECTIVE_FORWARD | DIRECTIVE_BACKWARD)
514297a3b0SGarrett D'Amore 
526b5e5868SGarrett D'Amore /*
536b5e5868SGarrett D'Amore  * The collate file format is as follows:
546b5e5868SGarrett D'Amore  *
556b5e5868SGarrett D'Amore  * char		version[COLLATE_STR_LEN];	// must be COLLATE_VERSION
566b5e5868SGarrett D'Amore  * collate_info_t	info;			// see below, includes padding
576b5e5868SGarrett D'Amore  * collate_char_pri_t	char_data[256];		// 8 bit char values
586b5e5868SGarrett D'Amore  * collate_subst_t	subst[*];		// 0 or more substitutions
596b5e5868SGarrett D'Amore  * collate_chain_pri_t	chains[*];		// 0 or more chains
606b5e5868SGarrett D'Amore  * collate_large_pri_t	large[*];		// extended char priorities
616b5e5868SGarrett D'Amore  *
626b5e5868SGarrett D'Amore  * Note that all structures must be 32-bit aligned, as each structure
636b5e5868SGarrett D'Amore  * contains 32-bit member fields.  The entire file is mmap'd, so its
646b5e5868SGarrett D'Amore  * critical that alignment be observed.  It is not generally safe to
656b5e5868SGarrett D'Amore  * use any 64-bit values in the structures.
666b5e5868SGarrett D'Amore  */
676b5e5868SGarrett D'Amore 
686b5e5868SGarrett D'Amore typedef struct collate_info {
696b5e5868SGarrett D'Amore 	uint8_t directive_count;
706b5e5868SGarrett D'Amore 	uint8_t directive[COLL_WEIGHTS_MAX];
71723fee08SGarrett D'Amore 	int32_t pri_count[COLL_WEIGHTS_MAX];
726b5e5868SGarrett D'Amore 	int32_t flags;
736b5e5868SGarrett D'Amore 	int32_t chain_count;
74723fee08SGarrett D'Amore 	int32_t large_count;
756b5e5868SGarrett D'Amore 	int32_t subst_count[COLL_WEIGHTS_MAX];
766b5e5868SGarrett D'Amore 	int32_t undef_pri[COLL_WEIGHTS_MAX];
776b5e5868SGarrett D'Amore } collate_info_t;
786b5e5868SGarrett D'Amore 
79723fee08SGarrett D'Amore typedef struct collate_char {
806b5e5868SGarrett D'Amore 	int32_t pri[COLL_WEIGHTS_MAX];
81723fee08SGarrett D'Amore } collate_char_t;
826b5e5868SGarrett D'Amore 
83723fee08SGarrett D'Amore typedef struct collate_chain {
846b5e5868SGarrett D'Amore 	wchar_t str[COLLATE_STR_LEN];
856b5e5868SGarrett D'Amore 	int32_t pri[COLL_WEIGHTS_MAX];
86723fee08SGarrett D'Amore } collate_chain_t;
876b5e5868SGarrett D'Amore 
88723fee08SGarrett D'Amore typedef struct collate_large {
896b5e5868SGarrett D'Amore 	int32_t val;
90723fee08SGarrett D'Amore 	collate_char_t pri;
91723fee08SGarrett D'Amore } collate_large_t;
926b5e5868SGarrett D'Amore 
936b5e5868SGarrett D'Amore typedef struct collate_subst {
946b5e5868SGarrett D'Amore 	int32_t key;
956b5e5868SGarrett D'Amore 	int32_t pri[COLLATE_STR_LEN];
966b5e5868SGarrett D'Amore } collate_subst_t;
976b5e5868SGarrett D'Amore 
98*2d08521bSGarrett D'Amore struct lc_collate {
99*2d08521bSGarrett D'Amore 	int		lc_is_posix;
1006b5e5868SGarrett D'Amore 
101*2d08521bSGarrett D'Amore 	uint8_t		lc_directive_count;
102*2d08521bSGarrett D'Amore 	uint8_t		lc_directive[COLL_WEIGHTS_MAX];
103*2d08521bSGarrett D'Amore 	int32_t		lc_pri_count[COLL_WEIGHTS_MAX];
104*2d08521bSGarrett D'Amore 	int32_t		lc_flags;
105*2d08521bSGarrett D'Amore 	int32_t		lc_chain_count;
106*2d08521bSGarrett D'Amore 	int32_t		lc_large_count;
107*2d08521bSGarrett D'Amore 	int32_t		lc_subst_count[COLL_WEIGHTS_MAX];
108*2d08521bSGarrett D'Amore 	int32_t		lc_undef_pri[COLL_WEIGHTS_MAX];
109*2d08521bSGarrett D'Amore 
110*2d08521bSGarrett D'Amore 	collate_info_t	*lc_info;
111*2d08521bSGarrett D'Amore 	collate_char_t	*lc_char_table;
112*2d08521bSGarrett D'Amore 	collate_large_t	*lc_large_table;
113*2d08521bSGarrett D'Amore 	collate_chain_t	*lc_chain_table;
114*2d08521bSGarrett D'Amore 	collate_subst_t	*lc_subst_table[COLL_WEIGHTS_MAX];
115*2d08521bSGarrett D'Amore };
116*2d08521bSGarrett D'Amore 
117*2d08521bSGarrett D'Amore void	_collate_lookup(const struct lc_collate *, const wchar_t *,
118*2d08521bSGarrett D'Amore     int *, int *, int, const int **);
119*2d08521bSGarrett D'Amore size_t	_collate_wxfrm(const struct lc_collate *, const wchar_t *,
120*2d08521bSGarrett D'Amore     wchar_t *, size_t);
121*2d08521bSGarrett D'Amore size_t	_collate_sxfrm(const wchar_t *, char *, size_t, locale_t);
122*2d08521bSGarrett D'Amore int	_collate_range_cmp(wchar_t, wchar_t, locale_t);
1234297a3b0SGarrett D'Amore 
1244297a3b0SGarrett D'Amore #endif /* !_COLLATE_H_ */
125