xref: /freebsd/lib/libkiconv/quirks.c (revision 5e53a4f90f82c4345f277dd87cc9292f26e04a29)
1c4f02a89SMax Khon /*-
2*5e53a4f9SPedro F. Giffuni  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3*5e53a4f9SPedro F. Giffuni  *
4c4f02a89SMax Khon  * Copyright (c) 2003 Ryuichiro Imura
5c4f02a89SMax Khon  * All rights reserved.
6c4f02a89SMax Khon  *
7c4f02a89SMax Khon  * Redistribution and use in source and binary forms, with or without
8c4f02a89SMax Khon  * modification, are permitted provided that the following conditions
9c4f02a89SMax Khon  * are met:
10c4f02a89SMax Khon  * 1. Redistributions of source code must retain the above copyright
11c4f02a89SMax Khon  *    notice, this list of conditions and the following disclaimer.
12c4f02a89SMax Khon  * 2. Redistributions in binary form must reproduce the above copyright
13c4f02a89SMax Khon  *    notice, this list of conditions and the following disclaimer in the
14c4f02a89SMax Khon  *    documentation and/or other materials provided with the distribution.
15c4f02a89SMax Khon  *
16c4f02a89SMax Khon  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17c4f02a89SMax Khon  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18c4f02a89SMax Khon  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19c4f02a89SMax Khon  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20c4f02a89SMax Khon  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21c4f02a89SMax Khon  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22c4f02a89SMax Khon  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23c4f02a89SMax Khon  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24c4f02a89SMax Khon  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25c4f02a89SMax Khon  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26c4f02a89SMax Khon  * SUCH DAMAGE.
27c4f02a89SMax Khon  *
28c4f02a89SMax Khon  * $FreeBSD$
29c4f02a89SMax Khon  */
30c4f02a89SMax Khon 
31c4f02a89SMax Khon /*
32c4f02a89SMax Khon  * kiconv(3) requires shared linked, and reduce module size
33c4f02a89SMax Khon  * when statically linked.
34c4f02a89SMax Khon  */
35c4f02a89SMax Khon 
36294246bbSEd Maste #ifdef PIC
37c4f02a89SMax Khon 
38c4f02a89SMax Khon /*
39c4f02a89SMax Khon  * Why do we need quirks?
40c4f02a89SMax Khon  * Since each vendors has their own Unicode mapping rules,
41c4f02a89SMax Khon  * we need some quirks until iconv(3) supports them.
42c4f02a89SMax Khon  * We can define Microsoft mappings here.
43c4f02a89SMax Khon  *
44c4f02a89SMax Khon  * For example, the eucJP and Unocode mapping rule is based on
45c4f02a89SMax Khon  * the JIS standard. Since Microsoft uses cp932 for Unicode mapping
4675f46cf6SPedro F. Giffuni  * which is not truly based on the JIS standard, reading a file
47c4f02a89SMax Khon  * system created by Microsoft Windows family using eucJP/Unicode
48c4f02a89SMax Khon  * mapping rule will cause a problem. That's why we define eucJP-ms here.
4975f46cf6SPedro F. Giffuni  * The eucJP-ms has been defined by The Open Group Japan Vendor Council.
50c4f02a89SMax Khon  *
51c4f02a89SMax Khon  * Well, Apple Mac OS also has their own Unicode mappings,
52c4f02a89SMax Khon  * but we won't require these quirks here, because HFS doesn't have
53c4f02a89SMax Khon  * Unicode and HFS+ has decomposed Unicode which can not be
54c4f02a89SMax Khon  * handled by this xlat16 converter.
55c4f02a89SMax Khon  */
56c4f02a89SMax Khon 
57c4f02a89SMax Khon #include <sys/types.h>
58c4f02a89SMax Khon #include <sys/iconv.h>
59c4f02a89SMax Khon 
60c4f02a89SMax Khon #include <stdio.h>
61c4f02a89SMax Khon #include <string.h>
62c4f02a89SMax Khon 
63c4f02a89SMax Khon #include "quirks.h"
64c4f02a89SMax Khon 
65c4f02a89SMax Khon /*
66c4f02a89SMax Khon  * All lists of quirk character set
67c4f02a89SMax Khon  */
68c4f02a89SMax Khon static struct {
69c4f02a89SMax Khon 	int vendor; /* reserved for non MS mapping */
70c4f02a89SMax Khon 	const char *base_codeset, *quirk_codeset;
71c4f02a89SMax Khon } quirk_list[] = {
72c4f02a89SMax Khon 	{ KICONV_VENDOR_MICSFT,	"eucJP", "eucJP-ms" },
73c4f02a89SMax Khon 	{ KICONV_VENDOR_MICSFT,	"EUC-JP", "eucJP-ms" },
74c4f02a89SMax Khon 	{ KICONV_VENDOR_MICSFT,	"SJIS", "SJIS-ms" },
75c4f02a89SMax Khon 	{ KICONV_VENDOR_MICSFT,	"Shift_JIS", "SJIS-ms" },
76c4f02a89SMax Khon 	{ KICONV_VENDOR_MICSFT,	"Big5", "Big5-ms" }
77c4f02a89SMax Khon };
78c4f02a89SMax Khon 
79c4f02a89SMax Khon /*
80c4f02a89SMax Khon  * The character list to replace for Japanese MS-Windows.
81c4f02a89SMax Khon  */
82c4f02a89SMax Khon static struct quirk_replace_list quirk_jis_cp932[] = {
83c4f02a89SMax Khon 	{ 0x00a2, 0xffe0 }, /* Cent Sign, Fullwidth Cent Sign */
84c4f02a89SMax Khon 	{ 0x00a3, 0xffe1 }, /* Pound Sign, Fullwidth Pound Sign */
85c4f02a89SMax Khon 	{ 0x00ac, 0xffe2 }, /* Not Sign, Fullwidth Not Sign */
86c4f02a89SMax Khon 	{ 0x2016, 0x2225 }, /* Double Vertical Line, Parallel To */
87c4f02a89SMax Khon 	{ 0x203e, 0x007e }, /* Overline, Tilde */
88c4f02a89SMax Khon 	{ 0x2212, 0xff0d }, /* Minus Sign, Fullwidth Hyphenminus */
89c4f02a89SMax Khon 	{ 0x301c, 0xff5e }  /* Wave Dash, Fullwidth Tilde */
90c4f02a89SMax Khon };
91c4f02a89SMax Khon 
92c4f02a89SMax Khon /*
93c4f02a89SMax Khon  * All entries of quirks
94c4f02a89SMax Khon  */
95c4f02a89SMax Khon #define	NumOf(n)	(sizeof((n)) / sizeof((n)[0]))
96c4f02a89SMax Khon static struct {
97c4f02a89SMax Khon 	const char *quirk_codeset, *iconv_codeset, *pair_codeset;
98c4f02a89SMax Khon 	struct quirk_replace_list (*replace_list)[];
99c4f02a89SMax Khon 	size_t num_of_replaces;
100c4f02a89SMax Khon } quirk_table[] = {
101c4f02a89SMax Khon 	{
102c4f02a89SMax Khon 		"eucJP-ms", "eucJP", ENCODING_UNICODE,
103c4f02a89SMax Khon 		(struct quirk_replace_list (*)[])&quirk_jis_cp932,
104c4f02a89SMax Khon 		NumOf(quirk_jis_cp932)
105c4f02a89SMax Khon 	},
106c4f02a89SMax Khon 	{
107c4f02a89SMax Khon 		"SJIS-ms", "CP932", ENCODING_UNICODE,
108c4f02a89SMax Khon 		/* XXX - quirk_replace_list should be NULL */
109c4f02a89SMax Khon 		(struct quirk_replace_list (*)[])&quirk_jis_cp932,
110c4f02a89SMax Khon 		NumOf(quirk_jis_cp932)
111c4f02a89SMax Khon 	},
112c4f02a89SMax Khon 	{
113c4f02a89SMax Khon 		"Big5-ms", "CP950", ENCODING_UNICODE,
114c4f02a89SMax Khon 		NULL, 0
115c4f02a89SMax Khon 	}
116c4f02a89SMax Khon };
117c4f02a89SMax Khon 
118c4f02a89SMax Khon 
119c4f02a89SMax Khon const char *
120c4f02a89SMax Khon kiconv_quirkcs(const char* base, int vendor)
121c4f02a89SMax Khon {
122c4f02a89SMax Khon 	size_t i;
123c4f02a89SMax Khon 
124c4f02a89SMax Khon 	/*
125c4f02a89SMax Khon 	 * We should compare codeset names ignoring case here,
126c4f02a89SMax Khon 	 * so that quirk could be used for all of the user input
127c4f02a89SMax Khon 	 * patterns.
128c4f02a89SMax Khon 	 */
129c4f02a89SMax Khon 	for (i = 0; i < NumOf(quirk_list); i++)
130c4f02a89SMax Khon 		if (quirk_list[i].vendor == vendor &&
131c4f02a89SMax Khon 		    strcasecmp(quirk_list[i].base_codeset, base) == 0)
132c4f02a89SMax Khon 			return (quirk_list[i].quirk_codeset);
133c4f02a89SMax Khon 
134c4f02a89SMax Khon 	return (base);
135c4f02a89SMax Khon }
136c4f02a89SMax Khon 
137c4f02a89SMax Khon /*
138c4f02a89SMax Khon  * Internal Functions
139c4f02a89SMax Khon  */
140c4f02a89SMax Khon const char *
141c4f02a89SMax Khon search_quirk(const char *given_codeset,
142c4f02a89SMax Khon 	     const char *pair_codeset,
143c4f02a89SMax Khon 	     struct quirk_replace_list **replace_list,
144c4f02a89SMax Khon 	     size_t *num_of_replaces)
145c4f02a89SMax Khon {
146c4f02a89SMax Khon 	size_t i;
147c4f02a89SMax Khon 
148c4f02a89SMax Khon 	*replace_list = NULL;
149c4f02a89SMax Khon 	*num_of_replaces = 0;
150c4f02a89SMax Khon 	for (i = 0; i < NumOf(quirk_table); i++)
151c4f02a89SMax Khon 		if (strcmp(quirk_table[i].quirk_codeset, given_codeset) == 0) {
152c4f02a89SMax Khon 			if (strcmp(quirk_table[i].pair_codeset, pair_codeset) == 0) {
153c4f02a89SMax Khon 				*replace_list = *quirk_table[i].replace_list;
154c4f02a89SMax Khon 				*num_of_replaces = quirk_table[i].num_of_replaces;
155c4f02a89SMax Khon 			}
156c4f02a89SMax Khon 			return (quirk_table[i].iconv_codeset);
157c4f02a89SMax Khon 		}
158c4f02a89SMax Khon 
159c4f02a89SMax Khon 	return (given_codeset);
160c4f02a89SMax Khon }
161c4f02a89SMax Khon 
162c4f02a89SMax Khon uint16_t
163c4f02a89SMax Khon quirk_vendor2unix(uint16_t c, struct quirk_replace_list *replace_list, size_t num)
164c4f02a89SMax Khon {
165c4f02a89SMax Khon 	size_t i;
166c4f02a89SMax Khon 
167c4f02a89SMax Khon 	for (i = 0; i < num; i++)
168c4f02a89SMax Khon 		if (replace_list[i].vendor_code == c)
169c4f02a89SMax Khon 			return (replace_list[i].standard_code);
170c4f02a89SMax Khon 
171c4f02a89SMax Khon 	return (c);
172c4f02a89SMax Khon }
173c4f02a89SMax Khon 
174c4f02a89SMax Khon uint16_t
175c4f02a89SMax Khon quirk_unix2vendor(uint16_t c, struct quirk_replace_list *replace_list, size_t num)
176c4f02a89SMax Khon {
177c4f02a89SMax Khon 	size_t i;
178c4f02a89SMax Khon 
179c4f02a89SMax Khon 	for (i = 0; i < num; i++)
180c4f02a89SMax Khon 		if (replace_list[i].standard_code == c)
181c4f02a89SMax Khon 			return (replace_list[i].vendor_code);
182c4f02a89SMax Khon 
183c4f02a89SMax Khon 	return (c);
184c4f02a89SMax Khon }
185c4f02a89SMax Khon 
186c4f02a89SMax Khon #else /* statically linked */
187c4f02a89SMax Khon 
188c90c7d69SXin LI #include <sys/types.h>
189c90c7d69SXin LI #include <sys/iconv.h>
190c90c7d69SXin LI 
191c4f02a89SMax Khon const char *
192c90c7d69SXin LI kiconv_quirkcs(const char* base __unused, int vendor __unused)
193c4f02a89SMax Khon {
194c90c7d69SXin LI 
195c4f02a89SMax Khon 	return (base);
196c4f02a89SMax Khon }
197c4f02a89SMax Khon 
198294246bbSEd Maste #endif /* PIC */
199