1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #ifndef _SYS_KICONV_CCK_COMMON_H 27 #define _SYS_KICONV_CCK_COMMON_H 28 29 #ifdef __cplusplus 30 extern "C" { 31 #endif 32 33 #ifdef _KERNEL 34 35 /* The start value of leading byte of EUC encoding. */ 36 #define KICONV_EUC_START (0xA1) 37 38 /* Valid EUC range or not. */ 39 #define KICONV_IS_VALID_EUC_BYTE(v) ((v) >= 0xA1 && (v) <= 0xFE) 40 41 /* Is ASCII character or not: 0x00 - 0x7F. */ 42 #define KICONV_IS_ASCII(c) (((uchar_t)(c)) <= 0x7F) 43 44 /* UTF-8 replacement character for non-identicals and its length. */ 45 #define KICONV_UTF8_REPLACEMENT_CHAR1 (0xEF) 46 #define KICONV_UTF8_REPLACEMENT_CHAR2 (0xBF) 47 #define KICONV_UTF8_REPLACEMENT_CHAR3 (0xBD) 48 #define KICONV_UTF8_REPLACEMENT_CHAR (0xefbfbd) 49 #define KICONV_UTF8_REPLACEMENT_CHAR_LEN (3) 50 51 /* 52 * Whether the 2nd byte of 3 or 4 bytes UTF-8 character is invalid or not. 53 */ 54 #define KICONV_IS_INVALID_UTF8_SECOND_BYTE(second, first) \ 55 ((second) < u8_valid_min_2nd_byte[(first)] || \ 56 (second) > u8_valid_max_2nd_byte[(first)]) 57 58 /* 59 * If we haven't checked on the UTF-8 signature BOM character in 60 * the beginning of the conversion data stream, we check it and if 61 * find one, we skip it since we have no use for it. 62 */ 63 #define KICONV_CHECK_UTF8_BOM(ib, ibtail) \ 64 if (((kiconv_state_t)kcd)->bom_processed == 0 && \ 65 ((ibtail) - (ib)) >= 3 && *(ib) == 0xef && \ 66 *((ib) + 1) == 0xbb && *((ib) + 2) == 0xbf) { \ 67 (ib) += 3; \ 68 } \ 69 ((kiconv_state_t)kcd)->bom_processed = 1 70 71 /* 72 * Check BOM of UTF-8 without state information. 73 */ 74 #define KICONV_CHECK_UTF8_BOM_WITHOUT_STATE(ib, ibtail) \ 75 if (((ibtail) - (ib)) >= 3 && *(ib) == 0xef && \ 76 *((ib) + 1) == 0xbb && *((ib) + 2) == 0xbf) { \ 77 (ib) += 3; \ 78 } 79 80 /* 81 * Set errno and break. 82 */ 83 #define KICONV_SET_ERRNO_AND_BREAK(err) \ 84 *errno = (err); \ 85 ret_val = (size_t)-1; \ 86 break 87 88 /* 89 * Handling flag, advance input buffer, set errno and break. 90 */ 91 #define KICONV_SET_ERRNO_WITH_FLAG(advance, err) \ 92 if (flag & KICONV_REPLACE_INVALID) { \ 93 ib += (advance); \ 94 goto REPLACE_INVALID; \ 95 } \ 96 KICONV_SET_ERRNO_AND_BREAK((err)) 97 98 /* Conversion table for UTF-8 -> CCK encoding. */ 99 typedef struct { 100 uint32_t key; 101 uint32_t value; 102 } kiconv_table_t; 103 104 /* Conversion table for CCK encoding -> utf8. */ 105 typedef struct { 106 uint32_t key; 107 uchar_t u8[4]; 108 } kiconv_table_array_t; 109 110 /* 111 * Function prototype for UTF-8 -> GB18030/BIG5/EUC-TW/UHC... 112 * Currently parameter ib/ibtail are used by BIG5HKSCS only. 113 */ 114 typedef int8_t (*kiconv_utf8tocck_t)(uint32_t utf8, uchar_t **ib, 115 uchar_t *ibtail, uchar_t *ob, uchar_t *obtail, size_t *ret_val); 116 117 /* Common open and close function for UTF-8 to CCK conversion. */ 118 void * kiconv_open_to_cck(void); 119 int kiconv_close_to_cck(void *); 120 121 /* Binary search funciton. */ 122 size_t kiconv_binsearch(uint32_t key, void *tbl, size_t nitems); 123 124 /* Wrapper for conversion from UTF-8 to GB18030/BIG5/EUC-TW/UHC... */ 125 size_t kiconv_utf8_to_cck(void *kcd, char **inbuf, size_t *inbytesleft, 126 char **outbuf, size_t *outbytesleft, int *errno, 127 kiconv_utf8tocck_t ptr_utf8tocck); 128 129 /* 130 * Wrapper for string based conversion from UTF-8 to GB18030/BIG5/EUC-TW/UHC... 131 */ 132 size_t kiconvstr_utf8_to_cck(uchar_t *inarray, size_t *inlen, 133 uchar_t *outarray, size_t *outlen, int flag, int *errno, 134 kiconv_utf8tocck_t ptr_utf8tocck); 135 136 /* 137 * The following tables are coming from u8_textprep.c. We use them to 138 * check on validity of UTF-8 characters and their bytes. 139 */ 140 extern const int8_t u8_number_of_bytes[]; 141 extern const uint8_t u8_valid_min_2nd_byte[]; 142 extern const uint8_t u8_valid_max_2nd_byte[]; 143 144 #endif /* _KERNEL */ 145 146 #ifdef __cplusplus 147 } 148 #endif 149 150 #endif /* _SYS_KICONV_CCK_COMMON_H */ 151