1*15d9d0b5Syy154373 /* 2*15d9d0b5Syy154373 * CDDL HEADER START 3*15d9d0b5Syy154373 * 4*15d9d0b5Syy154373 * The contents of this file are subject to the terms of the 5*15d9d0b5Syy154373 * Common Development and Distribution License (the "License"). 6*15d9d0b5Syy154373 * You may not use this file except in compliance with the License. 7*15d9d0b5Syy154373 * 8*15d9d0b5Syy154373 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9*15d9d0b5Syy154373 * or http://www.opensolaris.org/os/licensing. 10*15d9d0b5Syy154373 * See the License for the specific language governing permissions 11*15d9d0b5Syy154373 * and limitations under the License. 12*15d9d0b5Syy154373 * 13*15d9d0b5Syy154373 * When distributing Covered Code, include this CDDL HEADER in each 14*15d9d0b5Syy154373 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15*15d9d0b5Syy154373 * If applicable, add the following below this CDDL HEADER, with the 16*15d9d0b5Syy154373 * fields enclosed by brackets "[]" replaced with your own identifying 17*15d9d0b5Syy154373 * information: Portions Copyright [yyyy] [name of copyright owner] 18*15d9d0b5Syy154373 * 19*15d9d0b5Syy154373 * CDDL HEADER END 20*15d9d0b5Syy154373 */ 21*15d9d0b5Syy154373 /* 22*15d9d0b5Syy154373 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 23*15d9d0b5Syy154373 * Use is subject to license terms. 24*15d9d0b5Syy154373 */ 25*15d9d0b5Syy154373 26*15d9d0b5Syy154373 #ifndef _SYS_KICONV_CCK_COMMON_H 27*15d9d0b5Syy154373 #define _SYS_KICONV_CCK_COMMON_H 28*15d9d0b5Syy154373 29*15d9d0b5Syy154373 #pragma ident "%Z%%M% %I% %E% SMI" 30*15d9d0b5Syy154373 31*15d9d0b5Syy154373 #ifdef __cplusplus 32*15d9d0b5Syy154373 extern "C" { 33*15d9d0b5Syy154373 #endif 34*15d9d0b5Syy154373 35*15d9d0b5Syy154373 #ifdef _KERNEL 36*15d9d0b5Syy154373 37*15d9d0b5Syy154373 /* The start value of leading byte of EUC encoding. */ 38*15d9d0b5Syy154373 #define KICONV_EUC_START (0xA1) 39*15d9d0b5Syy154373 40*15d9d0b5Syy154373 /* Valid EUC range or not. */ 41*15d9d0b5Syy154373 #define KICONV_IS_VALID_EUC_BYTE(v) ((v) >= 0xA1 && (v) <= 0xFE) 42*15d9d0b5Syy154373 43*15d9d0b5Syy154373 /* Is ASCII character or not: 0x00 - 0x7F. */ 44*15d9d0b5Syy154373 #define KICONV_IS_ASCII(c) (((uchar_t)(c)) <= 0x7F) 45*15d9d0b5Syy154373 46*15d9d0b5Syy154373 /* UTF-8 replacement character for non-identicals and its length. */ 47*15d9d0b5Syy154373 #define KICONV_UTF8_REPLACEMENT_CHAR1 (0xEF) 48*15d9d0b5Syy154373 #define KICONV_UTF8_REPLACEMENT_CHAR2 (0xBF) 49*15d9d0b5Syy154373 #define KICONV_UTF8_REPLACEMENT_CHAR3 (0xBD) 50*15d9d0b5Syy154373 #define KICONV_UTF8_REPLACEMENT_CHAR (0xefbfbd) 51*15d9d0b5Syy154373 #define KICONV_UTF8_REPLACEMENT_CHAR_LEN (3) 52*15d9d0b5Syy154373 53*15d9d0b5Syy154373 /* 54*15d9d0b5Syy154373 * Whether the 2nd byte of 3 or 4 bytes UTF-8 character is invalid or not. 55*15d9d0b5Syy154373 */ 56*15d9d0b5Syy154373 #define KICONV_IS_INVALID_UTF8_SECOND_BYTE(second, first) \ 57*15d9d0b5Syy154373 ((second) < u8_valid_min_2nd_byte[(first)] || \ 58*15d9d0b5Syy154373 (second) > u8_valid_max_2nd_byte[(first)]) 59*15d9d0b5Syy154373 60*15d9d0b5Syy154373 /* 61*15d9d0b5Syy154373 * If we haven't checked on the UTF-8 signature BOM character in 62*15d9d0b5Syy154373 * the beginning of the conversion data stream, we check it and if 63*15d9d0b5Syy154373 * find one, we skip it since we have no use for it. 64*15d9d0b5Syy154373 */ 65*15d9d0b5Syy154373 #define KICONV_CHECK_UTF8_BOM(ib, ibtail) \ 66*15d9d0b5Syy154373 if (((kiconv_state_t)kcd)->bom_processed == 0 && \ 67*15d9d0b5Syy154373 ((ibtail) - (ib)) >= 3 && *(ib) == 0xef && \ 68*15d9d0b5Syy154373 *((ib) + 1) == 0xbb && *((ib) + 2) == 0xbf) { \ 69*15d9d0b5Syy154373 (ib) += 3; \ 70*15d9d0b5Syy154373 } \ 71*15d9d0b5Syy154373 ((kiconv_state_t)kcd)->bom_processed = 1 72*15d9d0b5Syy154373 73*15d9d0b5Syy154373 /* 74*15d9d0b5Syy154373 * Check BOM of UTF-8 without state information. 75*15d9d0b5Syy154373 */ 76*15d9d0b5Syy154373 #define KICONV_CHECK_UTF8_BOM_WITHOUT_STATE(ib, ibtail) \ 77*15d9d0b5Syy154373 if (((ibtail) - (ib)) >= 3 && *(ib) == 0xef && \ 78*15d9d0b5Syy154373 *((ib) + 1) == 0xbb && *((ib) + 2) == 0xbf) { \ 79*15d9d0b5Syy154373 (ib) += 3; \ 80*15d9d0b5Syy154373 } 81*15d9d0b5Syy154373 82*15d9d0b5Syy154373 /* 83*15d9d0b5Syy154373 * Set errno and break. 84*15d9d0b5Syy154373 */ 85*15d9d0b5Syy154373 #define KICONV_SET_ERRNO_AND_BREAK(err) \ 86*15d9d0b5Syy154373 *errno = (err); \ 87*15d9d0b5Syy154373 ret_val = (size_t)-1; \ 88*15d9d0b5Syy154373 break 89*15d9d0b5Syy154373 90*15d9d0b5Syy154373 /* 91*15d9d0b5Syy154373 * Handling flag, advance input buffer, set errno and break. 92*15d9d0b5Syy154373 */ 93*15d9d0b5Syy154373 #define KICONV_SET_ERRNO_WITH_FLAG(advance, err) \ 94*15d9d0b5Syy154373 if (flag & KICONV_REPLACE_INVALID) { \ 95*15d9d0b5Syy154373 ib += (advance); \ 96*15d9d0b5Syy154373 goto REPLACE_INVALID; \ 97*15d9d0b5Syy154373 } \ 98*15d9d0b5Syy154373 KICONV_SET_ERRNO_AND_BREAK((err)) 99*15d9d0b5Syy154373 100*15d9d0b5Syy154373 /* Conversion table for UTF-8 -> CCK encoding. */ 101*15d9d0b5Syy154373 typedef struct { 102*15d9d0b5Syy154373 uint32_t key; 103*15d9d0b5Syy154373 uint32_t value; 104*15d9d0b5Syy154373 } kiconv_table_t; 105*15d9d0b5Syy154373 106*15d9d0b5Syy154373 /* Conversion table for CCK encoding -> utf8. */ 107*15d9d0b5Syy154373 typedef struct { 108*15d9d0b5Syy154373 uint32_t key; 109*15d9d0b5Syy154373 uchar_t u8[4]; 110*15d9d0b5Syy154373 } kiconv_table_array_t; 111*15d9d0b5Syy154373 112*15d9d0b5Syy154373 /* 113*15d9d0b5Syy154373 * Function prototype for UTF-8 -> GB18030/BIG5/EUC-TW/UHC... 114*15d9d0b5Syy154373 * Currently parameter ib/ibtail are used by BIG5HKSCS only. 115*15d9d0b5Syy154373 */ 116*15d9d0b5Syy154373 typedef int8_t (*kiconv_utf8tocck_t)(uint32_t utf8, uchar_t **ib, 117*15d9d0b5Syy154373 uchar_t *ibtail, uchar_t *ob, uchar_t *obtail, size_t *ret_val); 118*15d9d0b5Syy154373 119*15d9d0b5Syy154373 /* Common open and close function for UTF-8 to CCK conversion. */ 120*15d9d0b5Syy154373 void * kiconv_open_to_cck(void); 121*15d9d0b5Syy154373 int kiconv_close_to_cck(void *); 122*15d9d0b5Syy154373 123*15d9d0b5Syy154373 /* Binary search funciton. */ 124*15d9d0b5Syy154373 size_t kiconv_binsearch(uint32_t key, void *tbl, size_t nitems); 125*15d9d0b5Syy154373 126*15d9d0b5Syy154373 /* Wrapper for conversion from UTF-8 to GB18030/BIG5/EUC-TW/UHC... */ 127*15d9d0b5Syy154373 size_t kiconv_utf8_to_cck(void *kcd, char **inbuf, size_t *inbytesleft, 128*15d9d0b5Syy154373 char **outbuf, size_t *outbytesleft, int *errno, 129*15d9d0b5Syy154373 kiconv_utf8tocck_t ptr_utf8tocck); 130*15d9d0b5Syy154373 131*15d9d0b5Syy154373 /* 132*15d9d0b5Syy154373 * Wrapper for string based conversion from UTF-8 to GB18030/BIG5/EUC-TW/UHC... 133*15d9d0b5Syy154373 */ 134*15d9d0b5Syy154373 size_t kiconvstr_utf8_to_cck(uchar_t *inarray, size_t *inlen, 135*15d9d0b5Syy154373 uchar_t *outarray, size_t *outlen, int flag, int *errno, 136*15d9d0b5Syy154373 kiconv_utf8tocck_t ptr_utf8tocck); 137*15d9d0b5Syy154373 138*15d9d0b5Syy154373 /* 139*15d9d0b5Syy154373 * The following tables are coming from u8_textprep.c. We use them to 140*15d9d0b5Syy154373 * check on validity of UTF-8 characters and their bytes. 141*15d9d0b5Syy154373 */ 142*15d9d0b5Syy154373 extern const int8_t u8_number_of_bytes[]; 143*15d9d0b5Syy154373 extern const uint8_t u8_valid_min_2nd_byte[]; 144*15d9d0b5Syy154373 extern const uint8_t u8_valid_max_2nd_byte[]; 145*15d9d0b5Syy154373 146*15d9d0b5Syy154373 #endif /* _KERNEL */ 147*15d9d0b5Syy154373 148*15d9d0b5Syy154373 #ifdef __cplusplus 149*15d9d0b5Syy154373 } 150*15d9d0b5Syy154373 #endif 151*15d9d0b5Syy154373 152*15d9d0b5Syy154373 #endif /* _SYS_KICONV_CCK_COMMON_H */ 153