xref: /illumos-gate/usr/src/uts/common/sys/kiconv.h (revision 88e55da9244bc48e3b3ad957a29e4be71309adcd)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #ifndef _SYS_KICONV_H
27 #define	_SYS_KICONV_H
28 
29 #ifdef __cplusplus
30 extern "C" {
31 #endif
32 
33 #include <sys/types.h>
34 
35 #ifdef	_KERNEL
36 
37 /*
38  * Supported fromcode/tocode values are saved in the following component type
39  * of (name, id) pair. The id values of fromcode and tocode are used to
40  * find out the corresponding code conversions.
41  */
42 typedef struct {
43 	char		*name;
44 	size_t		id;
45 } kiconv_code_list_t;
46 
47 /*
48  * Each unique kiconv code conversion identified by tocode and fromcode ids
49  * have corresponding module id and internal function pointers to open(),
50  * kiconv(), close(), and kiconvstr().
51  */
52 typedef struct {
53 	uint16_t	tid;		/* tocode id. */
54 	uint16_t	fid;		/* fromcode id. */
55 	uint16_t	mid;		/* module id. */
56 	void		*(*open)(void);
57 	size_t		(*kiconv)(void *, char **, size_t *, char **, size_t *,
58 			int *);
59 	int		(*close)(void *);
60 	size_t		(*kiconvstr)(char *, size_t *, char *, size_t *, int,
61 			int *);
62 } kiconv_conv_list_t;
63 
64 /*
65  * Each module id has a corresponding module name that is used to load
66  * the module as needed and a reference counter.
67  */
68 typedef struct {
69 	char		*name;
70 	uint_t		refcount;
71 } kiconv_mod_list_t;
72 
73 /*
74  * The following two data structures are being used to transfer information
75  * on the supported kiconv code conversions from a module to the framework.
76  *
77  * Details can be found from kiconv_ops(9S) and kiconv_module_info(9S)
78  * man pages at PSARC/2007/173.
79  */
80 typedef struct {
81 	char		*tocode;
82 	char		*fromcode;
83 	void		*(*kiconv_open)(void);
84 	size_t		(*kiconv)(void *, char **, size_t *, char **, size_t *,
85 			int *);
86 	int		(*kiconv_close)(void *);
87 	size_t		(*kiconvstr)(char *, size_t *, char *, size_t *, int,
88 			int *);
89 } kiconv_ops_t;
90 
91 typedef struct kiconv_mod_info {
92 	char		*module_name;
93 	size_t		kiconv_num_convs;
94 	kiconv_ops_t	*kiconv_ops_tbl;
95 	size_t		kiconv_num_aliases;
96 	char		**aliases;
97 	char		**canonicals;
98 	int		nowait;
99 } kiconv_module_info_t;
100 
101 /* The kiconv code conversion descriptor data structure. */
102 typedef struct {
103 	void		*handle;	/* Handle from the actual open(). */
104 	size_t		id;		/* Index to the conv_list[]. */
105 } kiconv_data_t, *kiconv_t;
106 
107 /* Common conversion state data structure. */
108 typedef struct {
109 	uint8_t		id;
110 	uint8_t		bom_processed;
111 } kiconv_state_data_t, *kiconv_state_t;
112 
113 /* Common component types for possible code conversion mapping tables. */
114 typedef struct {
115 	uchar_t		u8[3];
116 } kiconv_to_utf8_tbl_comp_t;
117 
118 typedef struct {
119 	uint32_t	u8:24;
120 	uint32_t	sb:8;
121 } kiconv_to_sb_tbl_comp_t;
122 
123 /*
124  * The maximum name length for any given codeset or alias names; the following
125  * should be plenty big enough.
126  */
127 #define	KICONV_MAX_CODENAME_LEN		63
128 
129 /* The following characters do not exist in the normalized code names. */
130 #define	KICONV_SKIPPABLE_CHAR(c)	\
131 	((c) == '-' || (c) == '_' || (c) == '.' || (c) == '@')
132 
133 /*
134  * When we encounter non-identical characters, as like iconv(3C) we have,
135  * map them into either one of the replacement characters based on what is
136  * the current target tocde.
137  *
138  * The 0xefbfdb in UTF-8 is U+FFFD in Unicode scalar value.
139  */
140 #define	KICONV_ASCII_REPLACEMENT_CHAR	('?')
141 #define	KICONV_UTF8_REPLACEMENT_CHAR	(0xefbfbd)
142 
143 /* Numeric ids for kiconv modules. */
144 #define	KICONV_EMBEDDED			(0)
145 #define	KICONV_MODULE_ID_JA		(1)
146 #define	KICONV_MODULE_ID_SC		(2)
147 #define	KICONV_MODULE_ID_KO		(3)
148 #define	KICONV_MODULE_ID_TC		(4)
149 #define	KICONV_MODULE_ID_EMEA		(5)
150 
151 #define	KICONV_MAX_MODULE_ID		KICONV_MODULE_ID_EMEA
152 
153 /* Functions used in kiconv conversion and module management. */
154 extern void	kiconv_init();
155 extern int	kiconv_register_module(kiconv_module_info_t *);
156 extern int	kiconv_unregister_module(kiconv_module_info_t *);
157 extern size_t	kiconv_module_ref_count(size_t);
158 
159 #endif	/* _KERNEL */
160 
161 #ifdef __cplusplus
162 }
163 #endif
164 
165 #endif /* _SYS_KICONV_H */
166