1*7f399375SBaptiste Daroussin /* $NetBSD: chartype.h,v 1.37 2022/04/11 19:37:20 tnn Exp $ */ 2d0ef721eSBaptiste Daroussin 3d0ef721eSBaptiste Daroussin /*- 4d0ef721eSBaptiste Daroussin * Copyright (c) 2009 The NetBSD Foundation, Inc. 5d0ef721eSBaptiste Daroussin * All rights reserved. 6d0ef721eSBaptiste Daroussin * 7d0ef721eSBaptiste Daroussin * Redistribution and use in source and binary forms, with or without 8d0ef721eSBaptiste Daroussin * modification, are permitted provided that the following conditions 9d0ef721eSBaptiste Daroussin * are met: 10d0ef721eSBaptiste Daroussin * 1. Redistributions of source code must retain the above copyright 11d0ef721eSBaptiste Daroussin * notice, this list of conditions and the following disclaimer. 12d0ef721eSBaptiste Daroussin * 2. Redistributions in binary form must reproduce the above copyright 13d0ef721eSBaptiste Daroussin * notice, this list of conditions and the following disclaimer in the 14d0ef721eSBaptiste Daroussin * documentation and/or other materials provided with the distribution. 15d0ef721eSBaptiste Daroussin * 16d0ef721eSBaptiste Daroussin * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 17d0ef721eSBaptiste Daroussin * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 18d0ef721eSBaptiste Daroussin * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 19d0ef721eSBaptiste Daroussin * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 20d0ef721eSBaptiste Daroussin * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 21d0ef721eSBaptiste Daroussin * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 22d0ef721eSBaptiste Daroussin * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 23d0ef721eSBaptiste Daroussin * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 24d0ef721eSBaptiste Daroussin * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 25d0ef721eSBaptiste Daroussin * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 26d0ef721eSBaptiste Daroussin * POSSIBILITY OF SUCH DAMAGE. 27d0ef721eSBaptiste Daroussin */ 28d0ef721eSBaptiste Daroussin 29d0ef721eSBaptiste Daroussin #ifndef _h_chartype_f 30d0ef721eSBaptiste Daroussin #define _h_chartype_f 31d0ef721eSBaptiste Daroussin 32d0ef721eSBaptiste Daroussin /* Ideally we should also test the value of the define to see if it 33d0ef721eSBaptiste Daroussin * supports non-BMP code points without requiring UTF-16, but nothing 34d0ef721eSBaptiste Daroussin * seems to actually advertise this properly, despite Unicode 3.1 having 35d0ef721eSBaptiste Daroussin * been around since 2001... */ 36d0ef721eSBaptiste Daroussin #if !defined(__NetBSD__) && \ 37d0ef721eSBaptiste Daroussin !defined(__sun) && \ 38*7f399375SBaptiste Daroussin !defined(__osf__) && \ 39d0ef721eSBaptiste Daroussin !(defined(__APPLE__) && defined(__MACH__)) && \ 40d0ef721eSBaptiste Daroussin !defined(__OpenBSD__) && \ 41d0ef721eSBaptiste Daroussin !defined(__FreeBSD__) && \ 42d0ef721eSBaptiste Daroussin !defined(__DragonFly__) 43d0ef721eSBaptiste Daroussin #ifndef __STDC_ISO_10646__ 44d0ef721eSBaptiste Daroussin /* In many places it is assumed that the first 127 code points are ASCII 45d0ef721eSBaptiste Daroussin * compatible, so ensure wchar_t indeed does ISO 10646 and not some other 46d0ef721eSBaptiste Daroussin * funky encoding that could break us in weird and wonderful ways. */ 47d0ef721eSBaptiste Daroussin #error wchar_t must store ISO 10646 characters 48d0ef721eSBaptiste Daroussin #endif 49d0ef721eSBaptiste Daroussin #endif 50d0ef721eSBaptiste Daroussin 51d0ef721eSBaptiste Daroussin /* Oh for a <uchar.h> with char32_t and __STDC_UTF_32__ in it... 52d0ef721eSBaptiste Daroussin * ref: ISO/IEC DTR 19769 53d0ef721eSBaptiste Daroussin */ 54d0ef721eSBaptiste Daroussin #if WCHAR_MAX < INT32_MAX 55d0ef721eSBaptiste Daroussin #warning Build environment does not support non-BMP characters 56d0ef721eSBaptiste Daroussin #endif 57d0ef721eSBaptiste Daroussin 58d0ef721eSBaptiste Daroussin /* 59d0ef721eSBaptiste Daroussin * Conversion buffer 60d0ef721eSBaptiste Daroussin */ 61d0ef721eSBaptiste Daroussin typedef struct ct_buffer_t { 62d0ef721eSBaptiste Daroussin char *cbuff; 63d0ef721eSBaptiste Daroussin size_t csize; 64d0ef721eSBaptiste Daroussin wchar_t *wbuff; 65d0ef721eSBaptiste Daroussin size_t wsize; 66d0ef721eSBaptiste Daroussin } ct_buffer_t; 67d0ef721eSBaptiste Daroussin 68d0ef721eSBaptiste Daroussin /* Encode a wide-character string and return the UTF-8 encoded result. */ 69d0ef721eSBaptiste Daroussin char *ct_encode_string(const wchar_t *, ct_buffer_t *); 70d0ef721eSBaptiste Daroussin 71d0ef721eSBaptiste Daroussin /* Decode a (multi)?byte string and return the wide-character string result. */ 72d0ef721eSBaptiste Daroussin wchar_t *ct_decode_string(const char *, ct_buffer_t *); 73d0ef721eSBaptiste Daroussin 74d0ef721eSBaptiste Daroussin /* Decode a (multi)?byte argv string array. 75d0ef721eSBaptiste Daroussin * The pointer returned must be free()d when done. */ 76d0ef721eSBaptiste Daroussin libedit_private wchar_t **ct_decode_argv(int, const char *[], ct_buffer_t *); 77d0ef721eSBaptiste Daroussin 78d0ef721eSBaptiste Daroussin /* Encode a character into the destination buffer, provided there is sufficient 79d0ef721eSBaptiste Daroussin * buffer space available. Returns the number of bytes used up (zero if the 80d0ef721eSBaptiste Daroussin * character cannot be encoded, -1 if there was not enough space available). */ 81d0ef721eSBaptiste Daroussin libedit_private ssize_t ct_encode_char(char *, size_t, wchar_t); 82d0ef721eSBaptiste Daroussin libedit_private size_t ct_enc_width(wchar_t); 83d0ef721eSBaptiste Daroussin 84d0ef721eSBaptiste Daroussin /* The maximum buffer size to hold the most unwieldy visual representation, 85d0ef721eSBaptiste Daroussin * in this case \U+nnnnn. */ 86d0ef721eSBaptiste Daroussin #define VISUAL_WIDTH_MAX ((size_t)8) 87d0ef721eSBaptiste Daroussin 88d0ef721eSBaptiste Daroussin /* The terminal is thought of in terms of X columns by Y lines. In the cases 89d0ef721eSBaptiste Daroussin * where a wide character takes up more than one column, the adjacent 90d0ef721eSBaptiste Daroussin * occupied column entries will contain this faux character. */ 91c0f37bf6SDimitry Andric #define MB_FILL_CHAR ((wint_t)-1) 92d0ef721eSBaptiste Daroussin 93d0ef721eSBaptiste Daroussin /* Visual width of character c, taking into account ^? , \0177 and \U+nnnnn 94d0ef721eSBaptiste Daroussin * style visual expansions. */ 95d0ef721eSBaptiste Daroussin libedit_private int ct_visual_width(wchar_t); 96d0ef721eSBaptiste Daroussin 97d0ef721eSBaptiste Daroussin /* Turn the given character into the appropriate visual format, matching 98d0ef721eSBaptiste Daroussin * the width given by ct_visual_width(). Returns the number of characters used 99d0ef721eSBaptiste Daroussin * up, or -1 if insufficient space. Buffer length is in count of wchar_t's. */ 100d0ef721eSBaptiste Daroussin libedit_private ssize_t ct_visual_char(wchar_t *, size_t, wchar_t); 101d0ef721eSBaptiste Daroussin 102d0ef721eSBaptiste Daroussin /* Convert the given string into visual format, using the ct_visual_char() 103d0ef721eSBaptiste Daroussin * function. Uses a static buffer, so not threadsafe. */ 104d0ef721eSBaptiste Daroussin libedit_private const wchar_t *ct_visual_string(const wchar_t *, ct_buffer_t *); 105d0ef721eSBaptiste Daroussin 106d0ef721eSBaptiste Daroussin 107d0ef721eSBaptiste Daroussin /* printable character, use ct_visual_width() to find out display width */ 108d0ef721eSBaptiste Daroussin #define CHTYPE_PRINT ( 0) 109d0ef721eSBaptiste Daroussin /* control character found inside the ASCII portion of the charset */ 110d0ef721eSBaptiste Daroussin #define CHTYPE_ASCIICTL (-1) 111d0ef721eSBaptiste Daroussin /* a \t */ 112d0ef721eSBaptiste Daroussin #define CHTYPE_TAB (-2) 113d0ef721eSBaptiste Daroussin /* a \n */ 114d0ef721eSBaptiste Daroussin #define CHTYPE_NL (-3) 115d0ef721eSBaptiste Daroussin /* non-printable character */ 116d0ef721eSBaptiste Daroussin #define CHTYPE_NONPRINT (-4) 117d0ef721eSBaptiste Daroussin /* classification of character c, as one of the above defines */ 118d0ef721eSBaptiste Daroussin libedit_private int ct_chr_class(wchar_t c); 119d0ef721eSBaptiste Daroussin 120d0ef721eSBaptiste Daroussin #endif /* _chartype_f */ 121