xref: /illumos-gate/usr/src/lib/libc/port/locale/collate.h (revision 5422785d352a2bb398daceab3d1898a8aa64d006)
1 /*
2  * Copyright 2013 Garrett D'Amore <garrett@damore.org>
3  * Copyright 2010 Nexenta Systmes, Inc.  All rights reserved.
4  * Copyright (c) 1995 Alex Tatmanjants <alex@elvisti.kiev.ua>
5  *		at Electronni Visti IA, Kiev, Ukraine.
6  *			All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND
18  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR BE LIABLE
21  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27  * SUCH DAMAGE.
28  */
29 
30 #ifndef _COLLATE_H_
31 #define	_COLLATE_H_
32 
33 #include <sys/types.h>
34 #include <limits.h>
35 #include <locale.h>
36 #include "localeimpl.h"
37 
38 #define	COLLATE_STR_LEN		24		/* should be 64-bit multiple */
39 #define	COLLATE_VERSION		"IllumosCollate2\n"
40 
41 #define	COLLATE_MAX_PRIORITY	(0x7fffffff)	/* max signed value */
42 #define	COLLATE_SUBST_PRIORITY	(0x40000000)	/* bit indicates subst table */
43 
44 #define	DIRECTIVE_UNDEF		0x00
45 #define	DIRECTIVE_FORWARD	0x01
46 #define	DIRECTIVE_BACKWARD	0x02
47 #define	DIRECTIVE_POSITION	0x04
48 #define	DIRECTIVE_UNDEFINED	0x08	/* special last weight for UNDEFINED */
49 
50 #define	DIRECTIVE_DIRECTION_MASK (DIRECTIVE_FORWARD | DIRECTIVE_BACKWARD)
51 
52 /*
53  * The collate file format is as follows:
54  *
55  * char		version[COLLATE_STR_LEN];	// must be COLLATE_VERSION
56  * collate_info_t	info;			// see below, includes padding
57  * collate_char_pri_t	char_data[256];		// 8 bit char values
58  * collate_subst_t	subst[*];		// 0 or more substitutions
59  * collate_chain_pri_t	chains[*];		// 0 or more chains
60  * collate_large_pri_t	large[*];		// extended char priorities
61  *
62  * Note that all structures must be 32-bit aligned, as each structure
63  * contains 32-bit member fields.  The entire file is mmap'd, so its
64  * critical that alignment be observed.  It is not generally safe to
65  * use any 64-bit values in the structures.
66  */
67 
68 typedef struct collate_info {
69 	uint8_t directive_count;
70 	uint8_t directive[COLL_WEIGHTS_MAX];
71 	int32_t pri_count[COLL_WEIGHTS_MAX];
72 	int32_t flags;
73 	int32_t chain_count;
74 	int32_t large_count;
75 	int32_t subst_count[COLL_WEIGHTS_MAX];
76 	int32_t undef_pri[COLL_WEIGHTS_MAX];
77 } collate_info_t;
78 
79 typedef struct collate_char {
80 	int32_t pri[COLL_WEIGHTS_MAX];
81 } collate_char_t;
82 
83 typedef struct collate_chain {
84 	wchar_t str[COLLATE_STR_LEN];
85 	int32_t pri[COLL_WEIGHTS_MAX];
86 } collate_chain_t;
87 
88 typedef struct collate_large {
89 	int32_t val;
90 	collate_char_t pri;
91 } collate_large_t;
92 
93 typedef struct collate_subst {
94 	int32_t key;
95 	int32_t pri[COLLATE_STR_LEN];
96 } collate_subst_t;
97 
98 struct lc_collate {
99 	int		lc_is_posix;
100 
101 	uint8_t		lc_directive_count;
102 	uint8_t		lc_directive[COLL_WEIGHTS_MAX];
103 	int32_t		lc_pri_count[COLL_WEIGHTS_MAX];
104 	int32_t		lc_flags;
105 	int32_t		lc_chain_count;
106 	int32_t		lc_large_count;
107 	int32_t		lc_subst_count[COLL_WEIGHTS_MAX];
108 	int32_t		lc_undef_pri[COLL_WEIGHTS_MAX];
109 
110 	collate_info_t	*lc_info;
111 	collate_char_t	*lc_char_table;
112 	collate_large_t	*lc_large_table;
113 	collate_chain_t	*lc_chain_table;
114 	collate_subst_t	*lc_subst_table[COLL_WEIGHTS_MAX];
115 };
116 
117 void	_collate_lookup(const struct lc_collate *, const wchar_t *,
118     int *, int *, int, const int **);
119 size_t	_collate_wxfrm(const struct lc_collate *, const wchar_t *,
120     wchar_t *, size_t);
121 size_t	_collate_sxfrm(const wchar_t *, char *, size_t, locale_t);
122 int	_collate_range_cmp(wchar_t, wchar_t, locale_t);
123 
124 #endif /* !_COLLATE_H_ */
125