xref: /freebsd/lib/libc/locale/collate.h (revision b74a756a9f537c6da41d721075f69216160a08a6)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause
3  *
4  * Copyright 2010 Nexenta Systems, Inc.  All rights reserved.
5  * Copyright (c) 1995 Alex Tatmanjants <alex@elvisti.kiev.ua>
6  *		at Electronni Visti IA, Kiev, Ukraine.
7  *			All rights reserved.
8  *
9  * Copyright (c) 2011 The FreeBSD Foundation
10  *
11  * Portions of this software were developed by David Chisnall
12  * under sponsorship from the FreeBSD Foundation.
13  *
14  * Redistribution and use in source and binary forms, with or without
15  * modification, are permitted provided that the following conditions
16  * are met:
17  * 1. Redistributions of source code must retain the above copyright
18  *    notice, this list of conditions and the following disclaimer.
19  * 2. Redistributions in binary form must reproduce the above copyright
20  *    notice, this list of conditions and the following disclaimer in the
21  *    documentation and/or other materials provided with the distribution.
22  *
23  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND
24  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR BE LIABLE
27  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
32  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33  * SUCH DAMAGE.
34  */
35 
36 #ifndef _COLLATE_H_
37 #define	_COLLATE_H_
38 
39 #include <sys/types.h>
40 #include <limits.h>
41 #include <wchar.h>
42 #include "xlocale_private.h"
43 
44 /*
45  * Work around buildworld bootstrapping from older systems whose limits.h
46  * sets COLL_WEIGHTS_MAX to 0.
47  */
48 #if COLL_WEIGHTS_MAX == 0
49 #undef COLL_WEIGHTS_MAX
50 #define COLL_WEIGHTS_MAX 10
51 #endif
52 
53 #define	COLLATE_STR_LEN		24		/* should be 64-bit multiple */
54 
55 #define	COLLATE_FMT_VERSION_LEN	12
56 #define	COLLATE_FMT_VERSION	"BSD 1.0\n"
57 
58 #define	COLLATE_MAX_PRIORITY	(0x7fffffff)	/* max signed value */
59 #define	COLLATE_SUBST_PRIORITY	(0x40000000)	/* bit indicates subst table */
60 
61 #define	DIRECTIVE_UNDEF		0x00
62 #define	DIRECTIVE_FORWARD	0x01
63 #define	DIRECTIVE_BACKWARD	0x02
64 #define	DIRECTIVE_POSITION	0x04
65 #define	DIRECTIVE_UNDEFINED	0x08	/* special last weight for UNDEFINED */
66 
67 #define	DIRECTIVE_DIRECTION_MASK (DIRECTIVE_FORWARD | DIRECTIVE_BACKWARD)
68 
69 #define	IGNORE_EQUIV_CLASS 1
70 
71 /*
72  * The collate file format is as follows:
73  *
74  * char	fmt_version[COLLATE_FMT_VERSION_LEN];	// must be COLLATE_FMT_VERSION
75  * char	def_version[XLOCALE_DEF_VERSION_LEN];	// NUL-terminated, may be empty
76  * collate_info_t	info;			// see below, includes padding
77  * collate_char_pri_t	char_data[256];		// 8 bit char values
78  * collate_subst_t	subst[*];		// 0 or more substitutions
79  * collate_chain_pri_t	chains[*];		// 0 or more chains
80  * collate_large_pri_t	large[*];		// extended char priorities
81  *
82  * Note that all structures must be 32-bit aligned, as each structure
83  * contains 32-bit member fields.  The entire file is mmap'd, so its
84  * critical that alignment be observed.  It is not generally safe to
85  * use any 64-bit values in the structures.
86  */
87 
88 typedef struct collate_info {
89 	uint8_t directive_count;
90 	uint8_t directive[COLL_WEIGHTS_MAX];
91 	uint8_t chain_max_len; /* In padding */
92 	int32_t pri_count[COLL_WEIGHTS_MAX];
93 	int32_t flags;
94 	int32_t chain_count;
95 	int32_t large_count;
96 	int32_t subst_count[COLL_WEIGHTS_MAX];
97 	int32_t undef_pri[COLL_WEIGHTS_MAX];
98 } collate_info_t;
99 
100 typedef struct collate_char {
101 	int32_t pri[COLL_WEIGHTS_MAX];
102 } collate_char_t;
103 
104 typedef struct collate_chain {
105 	wchar_t str[COLLATE_STR_LEN];
106 	int32_t pri[COLL_WEIGHTS_MAX];
107 } collate_chain_t;
108 
109 typedef struct collate_large {
110 	int32_t val;
111 	collate_char_t pri;
112 } collate_large_t;
113 
114 typedef struct collate_subst {
115 	int32_t key;
116 	int32_t pri[COLLATE_STR_LEN];
117 } collate_subst_t;
118 
119 struct xlocale_collate {
120 	struct xlocale_component header;
121 	int __collate_load_error;
122 	char * map;
123 	size_t maplen;
124 
125 	collate_info_t	*info;
126 	collate_char_t	*char_pri_table;
127 	collate_large_t	*large_pri_table;
128 	collate_chain_t	*chain_pri_table;
129 	collate_subst_t	*subst_table[COLL_WEIGHTS_MAX];
130 };
131 
132 __BEGIN_DECLS
133 size_t	__collate_collating_symbol(wchar_t *, size_t, const char *, size_t,
134     mbstate_t *);
135 int	__collate_equiv_class(const char *, size_t, mbstate_t *);
136 int	__collate_equiv_value(locale_t, const wchar_t *, size_t);
137 size_t	__collate_equiv_match(int, wchar_t *, size_t, wchar_t, const char *,
138     size_t, mbstate_t *, size_t *);
139 int	__collate_load_tables(const char *);
140 void	_collate_lookup(struct xlocale_collate *,const wchar_t *, int *, int *,
141 	int, const int **);
142 int	__collate_range_cmp(char, char);
143 int	__wcollate_range_cmp(wchar_t, wchar_t);
144 size_t	_collate_wxfrm(struct xlocale_collate *, const wchar_t *, wchar_t *,
145 	size_t);
146 size_t	_collate_sxfrm(struct xlocale_collate *, const wchar_t *, char *,
147 	size_t);
148 __END_DECLS
149 
150 #endif /* !_COLLATE_H_ */
151