1c4f02a89SMax Khon /*- 2c4f02a89SMax Khon * Copyright (c) 2003 Ryuichiro Imura 3c4f02a89SMax Khon * All rights reserved. 4c4f02a89SMax Khon * 5c4f02a89SMax Khon * Redistribution and use in source and binary forms, with or without 6c4f02a89SMax Khon * modification, are permitted provided that the following conditions 7c4f02a89SMax Khon * are met: 8c4f02a89SMax Khon * 1. Redistributions of source code must retain the above copyright 9c4f02a89SMax Khon * notice, this list of conditions and the following disclaimer. 10c4f02a89SMax Khon * 2. Redistributions in binary form must reproduce the above copyright 11c4f02a89SMax Khon * notice, this list of conditions and the following disclaimer in the 12c4f02a89SMax Khon * documentation and/or other materials provided with the distribution. 13c4f02a89SMax Khon * 14c4f02a89SMax Khon * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 15c4f02a89SMax Khon * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16c4f02a89SMax Khon * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17c4f02a89SMax Khon * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 18c4f02a89SMax Khon * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19c4f02a89SMax Khon * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20c4f02a89SMax Khon * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21c4f02a89SMax Khon * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22c4f02a89SMax Khon * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23c4f02a89SMax Khon * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24c4f02a89SMax Khon * SUCH DAMAGE. 25c4f02a89SMax Khon * 26c4f02a89SMax Khon * $FreeBSD$ 27c4f02a89SMax Khon */ 28c4f02a89SMax Khon 29c4f02a89SMax Khon /* 30c4f02a89SMax Khon * kiconv(3) requires shared linked, and reduce module size 31c4f02a89SMax Khon * when statically linked. 32c4f02a89SMax Khon */ 33c4f02a89SMax Khon 34c4f02a89SMax Khon #ifdef PIC 35c4f02a89SMax Khon 36c4f02a89SMax Khon /* 37c4f02a89SMax Khon * Why do we need quirks? 38c4f02a89SMax Khon * Since each vendors has their own Unicode mapping rules, 39c4f02a89SMax Khon * we need some quirks until iconv(3) supports them. 40c4f02a89SMax Khon * We can define Microsoft mappings here. 41c4f02a89SMax Khon * 42c4f02a89SMax Khon * For example, the eucJP and Unocode mapping rule is based on 43c4f02a89SMax Khon * the JIS standard. Since Microsoft uses cp932 for Unicode mapping 44c4f02a89SMax Khon * witch is not truly based on the JIS standard, reading a file 45c4f02a89SMax Khon * system created by Microsoft Windows family using eucJP/Unicode 46c4f02a89SMax Khon * mapping rule will cause a problem. That's why we define eucJP-ms here. 47c4f02a89SMax Khon * The eucJP-ms has been defined by The Open Group Japan Vendor Coucil. 48c4f02a89SMax Khon * 49c4f02a89SMax Khon * Well, Apple Mac OS also has their own Unicode mappings, 50c4f02a89SMax Khon * but we won't require these quirks here, because HFS doesn't have 51c4f02a89SMax Khon * Unicode and HFS+ has decomposed Unicode which can not be 52c4f02a89SMax Khon * handled by this xlat16 converter. 53c4f02a89SMax Khon */ 54c4f02a89SMax Khon 55c4f02a89SMax Khon #include <sys/types.h> 56c4f02a89SMax Khon #include <sys/iconv.h> 57c4f02a89SMax Khon 58c4f02a89SMax Khon #include <stdio.h> 59c4f02a89SMax Khon #include <string.h> 60c4f02a89SMax Khon 61c4f02a89SMax Khon #include "quirks.h" 62c4f02a89SMax Khon 63c4f02a89SMax Khon /* 64c4f02a89SMax Khon * All lists of quirk character set 65c4f02a89SMax Khon */ 66c4f02a89SMax Khon static struct { 67c4f02a89SMax Khon int vendor; /* reserved for non MS mapping */ 68c4f02a89SMax Khon const char *base_codeset, *quirk_codeset; 69c4f02a89SMax Khon } quirk_list[] = { 70c4f02a89SMax Khon { KICONV_VENDOR_MICSFT, "eucJP", "eucJP-ms" }, 71c4f02a89SMax Khon { KICONV_VENDOR_MICSFT, "EUC-JP", "eucJP-ms" }, 72c4f02a89SMax Khon { KICONV_VENDOR_MICSFT, "SJIS", "SJIS-ms" }, 73c4f02a89SMax Khon { KICONV_VENDOR_MICSFT, "Shift_JIS", "SJIS-ms" }, 74c4f02a89SMax Khon { KICONV_VENDOR_MICSFT, "Big5", "Big5-ms" } 75c4f02a89SMax Khon }; 76c4f02a89SMax Khon 77c4f02a89SMax Khon /* 78c4f02a89SMax Khon * The character list to replace for Japanese MS-Windows. 79c4f02a89SMax Khon */ 80c4f02a89SMax Khon static struct quirk_replace_list quirk_jis_cp932[] = { 81c4f02a89SMax Khon { 0x00a2, 0xffe0 }, /* Cent Sign, Fullwidth Cent Sign */ 82c4f02a89SMax Khon { 0x00a3, 0xffe1 }, /* Pound Sign, Fullwidth Pound Sign */ 83c4f02a89SMax Khon { 0x00ac, 0xffe2 }, /* Not Sign, Fullwidth Not Sign */ 84c4f02a89SMax Khon { 0x2016, 0x2225 }, /* Double Vertical Line, Parallel To */ 85c4f02a89SMax Khon { 0x203e, 0x007e }, /* Overline, Tilde */ 86c4f02a89SMax Khon { 0x2212, 0xff0d }, /* Minus Sign, Fullwidth Hyphenminus */ 87c4f02a89SMax Khon { 0x301c, 0xff5e } /* Wave Dash, Fullwidth Tilde */ 88c4f02a89SMax Khon }; 89c4f02a89SMax Khon 90c4f02a89SMax Khon /* 91c4f02a89SMax Khon * All entries of quirks 92c4f02a89SMax Khon */ 93c4f02a89SMax Khon #define NumOf(n) (sizeof((n)) / sizeof((n)[0])) 94c4f02a89SMax Khon static struct { 95c4f02a89SMax Khon const char *quirk_codeset, *iconv_codeset, *pair_codeset; 96c4f02a89SMax Khon struct quirk_replace_list (*replace_list)[]; 97c4f02a89SMax Khon size_t num_of_replaces; 98c4f02a89SMax Khon } quirk_table[] = { 99c4f02a89SMax Khon { 100c4f02a89SMax Khon "eucJP-ms", "eucJP", ENCODING_UNICODE, 101c4f02a89SMax Khon (struct quirk_replace_list (*)[])&quirk_jis_cp932, 102c4f02a89SMax Khon NumOf(quirk_jis_cp932) 103c4f02a89SMax Khon }, 104c4f02a89SMax Khon { 105c4f02a89SMax Khon "SJIS-ms", "CP932", ENCODING_UNICODE, 106c4f02a89SMax Khon /* XXX - quirk_replace_list should be NULL */ 107c4f02a89SMax Khon (struct quirk_replace_list (*)[])&quirk_jis_cp932, 108c4f02a89SMax Khon NumOf(quirk_jis_cp932) 109c4f02a89SMax Khon }, 110c4f02a89SMax Khon { 111c4f02a89SMax Khon "Big5-ms", "CP950", ENCODING_UNICODE, 112c4f02a89SMax Khon NULL, 0 113c4f02a89SMax Khon } 114c4f02a89SMax Khon }; 115c4f02a89SMax Khon 116c4f02a89SMax Khon 117c4f02a89SMax Khon const char * 118c4f02a89SMax Khon kiconv_quirkcs(const char* base, int vendor) 119c4f02a89SMax Khon { 120c4f02a89SMax Khon size_t i; 121c4f02a89SMax Khon 122c4f02a89SMax Khon /* 123c4f02a89SMax Khon * We should compare codeset names ignoring case here, 124c4f02a89SMax Khon * so that quirk could be used for all of the user input 125c4f02a89SMax Khon * patterns. 126c4f02a89SMax Khon */ 127c4f02a89SMax Khon for (i = 0; i < NumOf(quirk_list); i++) 128c4f02a89SMax Khon if (quirk_list[i].vendor == vendor && 129c4f02a89SMax Khon strcasecmp(quirk_list[i].base_codeset, base) == 0) 130c4f02a89SMax Khon return (quirk_list[i].quirk_codeset); 131c4f02a89SMax Khon 132c4f02a89SMax Khon return (base); 133c4f02a89SMax Khon } 134c4f02a89SMax Khon 135c4f02a89SMax Khon /* 136c4f02a89SMax Khon * Internal Functions 137c4f02a89SMax Khon */ 138c4f02a89SMax Khon const char * 139c4f02a89SMax Khon search_quirk(const char *given_codeset, 140c4f02a89SMax Khon const char *pair_codeset, 141c4f02a89SMax Khon struct quirk_replace_list **replace_list, 142c4f02a89SMax Khon size_t *num_of_replaces) 143c4f02a89SMax Khon { 144c4f02a89SMax Khon size_t i; 145c4f02a89SMax Khon 146c4f02a89SMax Khon *replace_list = NULL; 147c4f02a89SMax Khon *num_of_replaces = 0; 148c4f02a89SMax Khon for (i = 0; i < NumOf(quirk_table); i++) 149c4f02a89SMax Khon if (strcmp(quirk_table[i].quirk_codeset, given_codeset) == 0) { 150c4f02a89SMax Khon if (strcmp(quirk_table[i].pair_codeset, pair_codeset) == 0) { 151c4f02a89SMax Khon *replace_list = *quirk_table[i].replace_list; 152c4f02a89SMax Khon *num_of_replaces = quirk_table[i].num_of_replaces; 153c4f02a89SMax Khon } 154c4f02a89SMax Khon return (quirk_table[i].iconv_codeset); 155c4f02a89SMax Khon } 156c4f02a89SMax Khon 157c4f02a89SMax Khon return (given_codeset); 158c4f02a89SMax Khon } 159c4f02a89SMax Khon 160c4f02a89SMax Khon uint16_t 161c4f02a89SMax Khon quirk_vendor2unix(uint16_t c, struct quirk_replace_list *replace_list, size_t num) 162c4f02a89SMax Khon { 163c4f02a89SMax Khon size_t i; 164c4f02a89SMax Khon 165c4f02a89SMax Khon for (i = 0; i < num; i++) 166c4f02a89SMax Khon if (replace_list[i].vendor_code == c) 167c4f02a89SMax Khon return (replace_list[i].standard_code); 168c4f02a89SMax Khon 169c4f02a89SMax Khon return (c); 170c4f02a89SMax Khon } 171c4f02a89SMax Khon 172c4f02a89SMax Khon uint16_t 173c4f02a89SMax Khon quirk_unix2vendor(uint16_t c, struct quirk_replace_list *replace_list, size_t num) 174c4f02a89SMax Khon { 175c4f02a89SMax Khon size_t i; 176c4f02a89SMax Khon 177c4f02a89SMax Khon for (i = 0; i < num; i++) 178c4f02a89SMax Khon if (replace_list[i].standard_code == c) 179c4f02a89SMax Khon return (replace_list[i].vendor_code); 180c4f02a89SMax Khon 181c4f02a89SMax Khon return (c); 182c4f02a89SMax Khon } 183c4f02a89SMax Khon 184c4f02a89SMax Khon #else /* statically linked */ 185c4f02a89SMax Khon 186c4f02a89SMax Khon const char * 187c4f02a89SMax Khon kiconv_quirkcs(const char* base, int vendor) 188c4f02a89SMax Khon { 189c4f02a89SMax Khon return (base); 190c4f02a89SMax Khon } 191c4f02a89SMax Khon 192c4f02a89SMax Khon #endif /* PIC */ 193