14297a3b0SGarrett D'Amore /* 2*2d08521bSGarrett D'Amore * Copyright 2013 Garrett D'Amore <garrett@damore.org> 36b5e5868SGarrett D'Amore * Copyright 2010 Nexenta Systmes, Inc. All rights reserved. 44297a3b0SGarrett D'Amore * Copyright (c) 1995 Alex Tatmanjants <alex@elvisti.kiev.ua> 54297a3b0SGarrett D'Amore * at Electronni Visti IA, Kiev, Ukraine. 64297a3b0SGarrett D'Amore * All rights reserved. 74297a3b0SGarrett D'Amore * 84297a3b0SGarrett D'Amore * Redistribution and use in source and binary forms, with or without 94297a3b0SGarrett D'Amore * modification, are permitted provided that the following conditions 104297a3b0SGarrett D'Amore * are met: 114297a3b0SGarrett D'Amore * 1. Redistributions of source code must retain the above copyright 124297a3b0SGarrett D'Amore * notice, this list of conditions and the following disclaimer. 134297a3b0SGarrett D'Amore * 2. Redistributions in binary form must reproduce the above copyright 144297a3b0SGarrett D'Amore * notice, this list of conditions and the following disclaimer in the 154297a3b0SGarrett D'Amore * documentation and/or other materials provided with the distribution. 164297a3b0SGarrett D'Amore * 174297a3b0SGarrett D'Amore * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND 184297a3b0SGarrett D'Amore * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 194297a3b0SGarrett D'Amore * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 204297a3b0SGarrett D'Amore * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE 214297a3b0SGarrett D'Amore * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 224297a3b0SGarrett D'Amore * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 234297a3b0SGarrett D'Amore * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 244297a3b0SGarrett D'Amore * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 254297a3b0SGarrett D'Amore * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 264297a3b0SGarrett D'Amore * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 274297a3b0SGarrett D'Amore * SUCH DAMAGE. 284297a3b0SGarrett D'Amore */ 294297a3b0SGarrett D'Amore 304297a3b0SGarrett D'Amore #ifndef _COLLATE_H_ 314297a3b0SGarrett D'Amore #define _COLLATE_H_ 324297a3b0SGarrett D'Amore 334297a3b0SGarrett D'Amore #include <sys/types.h> 344297a3b0SGarrett D'Amore #include <limits.h> 35*2d08521bSGarrett D'Amore #include <locale.h> 36*2d08521bSGarrett D'Amore #include "localeimpl.h" 374297a3b0SGarrett D'Amore 386b5e5868SGarrett D'Amore #define COLLATE_STR_LEN 24 /* should be 64-bit multiple */ 39723fee08SGarrett D'Amore #define COLLATE_VERSION "IllumosCollate2\n" 404297a3b0SGarrett D'Amore 416b5e5868SGarrett D'Amore #define COLLATE_MAX_PRIORITY (0x7fffffff) /* max signed value */ 426b5e5868SGarrett D'Amore #define COLLATE_SUBST_PRIORITY (0x40000000) /* bit indicates subst table */ 434297a3b0SGarrett D'Amore 446b5e5868SGarrett D'Amore #define DIRECTIVE_UNDEF 0x00 456b5e5868SGarrett D'Amore #define DIRECTIVE_FORWARD 0x01 466b5e5868SGarrett D'Amore #define DIRECTIVE_BACKWARD 0x02 476b5e5868SGarrett D'Amore #define DIRECTIVE_POSITION 0x04 486b5e5868SGarrett D'Amore #define DIRECTIVE_UNDEFINED 0x08 /* special last weight for UNDEFINED */ 494297a3b0SGarrett D'Amore 506b5e5868SGarrett D'Amore #define DIRECTIVE_DIRECTION_MASK (DIRECTIVE_FORWARD | DIRECTIVE_BACKWARD) 514297a3b0SGarrett D'Amore 526b5e5868SGarrett D'Amore /* 536b5e5868SGarrett D'Amore * The collate file format is as follows: 546b5e5868SGarrett D'Amore * 556b5e5868SGarrett D'Amore * char version[COLLATE_STR_LEN]; // must be COLLATE_VERSION 566b5e5868SGarrett D'Amore * collate_info_t info; // see below, includes padding 576b5e5868SGarrett D'Amore * collate_char_pri_t char_data[256]; // 8 bit char values 586b5e5868SGarrett D'Amore * collate_subst_t subst[*]; // 0 or more substitutions 596b5e5868SGarrett D'Amore * collate_chain_pri_t chains[*]; // 0 or more chains 606b5e5868SGarrett D'Amore * collate_large_pri_t large[*]; // extended char priorities 616b5e5868SGarrett D'Amore * 626b5e5868SGarrett D'Amore * Note that all structures must be 32-bit aligned, as each structure 636b5e5868SGarrett D'Amore * contains 32-bit member fields. The entire file is mmap'd, so its 646b5e5868SGarrett D'Amore * critical that alignment be observed. It is not generally safe to 656b5e5868SGarrett D'Amore * use any 64-bit values in the structures. 666b5e5868SGarrett D'Amore */ 676b5e5868SGarrett D'Amore 686b5e5868SGarrett D'Amore typedef struct collate_info { 696b5e5868SGarrett D'Amore uint8_t directive_count; 706b5e5868SGarrett D'Amore uint8_t directive[COLL_WEIGHTS_MAX]; 71723fee08SGarrett D'Amore int32_t pri_count[COLL_WEIGHTS_MAX]; 726b5e5868SGarrett D'Amore int32_t flags; 736b5e5868SGarrett D'Amore int32_t chain_count; 74723fee08SGarrett D'Amore int32_t large_count; 756b5e5868SGarrett D'Amore int32_t subst_count[COLL_WEIGHTS_MAX]; 766b5e5868SGarrett D'Amore int32_t undef_pri[COLL_WEIGHTS_MAX]; 776b5e5868SGarrett D'Amore } collate_info_t; 786b5e5868SGarrett D'Amore 79723fee08SGarrett D'Amore typedef struct collate_char { 806b5e5868SGarrett D'Amore int32_t pri[COLL_WEIGHTS_MAX]; 81723fee08SGarrett D'Amore } collate_char_t; 826b5e5868SGarrett D'Amore 83723fee08SGarrett D'Amore typedef struct collate_chain { 846b5e5868SGarrett D'Amore wchar_t str[COLLATE_STR_LEN]; 856b5e5868SGarrett D'Amore int32_t pri[COLL_WEIGHTS_MAX]; 86723fee08SGarrett D'Amore } collate_chain_t; 876b5e5868SGarrett D'Amore 88723fee08SGarrett D'Amore typedef struct collate_large { 896b5e5868SGarrett D'Amore int32_t val; 90723fee08SGarrett D'Amore collate_char_t pri; 91723fee08SGarrett D'Amore } collate_large_t; 926b5e5868SGarrett D'Amore 936b5e5868SGarrett D'Amore typedef struct collate_subst { 946b5e5868SGarrett D'Amore int32_t key; 956b5e5868SGarrett D'Amore int32_t pri[COLLATE_STR_LEN]; 966b5e5868SGarrett D'Amore } collate_subst_t; 976b5e5868SGarrett D'Amore 98*2d08521bSGarrett D'Amore struct lc_collate { 99*2d08521bSGarrett D'Amore int lc_is_posix; 1006b5e5868SGarrett D'Amore 101*2d08521bSGarrett D'Amore uint8_t lc_directive_count; 102*2d08521bSGarrett D'Amore uint8_t lc_directive[COLL_WEIGHTS_MAX]; 103*2d08521bSGarrett D'Amore int32_t lc_pri_count[COLL_WEIGHTS_MAX]; 104*2d08521bSGarrett D'Amore int32_t lc_flags; 105*2d08521bSGarrett D'Amore int32_t lc_chain_count; 106*2d08521bSGarrett D'Amore int32_t lc_large_count; 107*2d08521bSGarrett D'Amore int32_t lc_subst_count[COLL_WEIGHTS_MAX]; 108*2d08521bSGarrett D'Amore int32_t lc_undef_pri[COLL_WEIGHTS_MAX]; 109*2d08521bSGarrett D'Amore 110*2d08521bSGarrett D'Amore collate_info_t *lc_info; 111*2d08521bSGarrett D'Amore collate_char_t *lc_char_table; 112*2d08521bSGarrett D'Amore collate_large_t *lc_large_table; 113*2d08521bSGarrett D'Amore collate_chain_t *lc_chain_table; 114*2d08521bSGarrett D'Amore collate_subst_t *lc_subst_table[COLL_WEIGHTS_MAX]; 115*2d08521bSGarrett D'Amore }; 116*2d08521bSGarrett D'Amore 117*2d08521bSGarrett D'Amore void _collate_lookup(const struct lc_collate *, const wchar_t *, 118*2d08521bSGarrett D'Amore int *, int *, int, const int **); 119*2d08521bSGarrett D'Amore size_t _collate_wxfrm(const struct lc_collate *, const wchar_t *, 120*2d08521bSGarrett D'Amore wchar_t *, size_t); 121*2d08521bSGarrett D'Amore size_t _collate_sxfrm(const wchar_t *, char *, size_t, locale_t); 122*2d08521bSGarrett D'Amore int _collate_range_cmp(wchar_t, wchar_t, locale_t); 1234297a3b0SGarrett D'Amore 1244297a3b0SGarrett D'Amore #endif /* !_COLLATE_H_ */ 125