1 /* 2 * Copyright 2014 Garrett D'Amore <garrett@damore.org> 3 * Copyright 2010 Nexenta Systems, Inc. All rights reserved. 4 * Copyright (c) 1989, 1993 5 * The Regents of the University of California. All rights reserved. 6 * (c) UNIX System Laboratories, Inc. 7 * All or some portions of this file are derived from material licensed 8 * to the University of California by American Telephone and Telegraph 9 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 10 * the permission of UNIX System Laboratories, Inc. 11 * 12 * This code is derived from software contributed to Berkeley by 13 * Paul Borman at Krystal Technologies. 14 * 15 * Redistribution and use in source and binary forms, with or without 16 * modification, are permitted provided that the following conditions 17 * are met: 18 * 1. Redistributions of source code must retain the above copyright 19 * notice, this list of conditions and the following disclaimer. 20 * 2. Redistributions in binary form must reproduce the above copyright 21 * notice, this list of conditions and the following disclaimer in the 22 * documentation and/or other materials provided with the distribution. 23 * 4. Neither the name of the University nor the names of its contributors 24 * may be used to endorse or promote products derived from this software 25 * without specific prior written permission. 26 * 27 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 28 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 29 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 30 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 31 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 32 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 33 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 34 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 35 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 36 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 37 * SUCH DAMAGE. 38 */ 39 40 #include "lint.h" 41 #include <wctype.h> 42 #include <locale.h> 43 #include "runefile.h" 44 #include "runetype.h" 45 #include "localeimpl.h" 46 #include "_ctype.h" 47 48 /* 49 * Note that the standard requires iswascii to be a macro, so it is defined 50 * in our headers. 51 * 52 * We aliased (per Solaris) iswideogram, iswspecial, iswspecial to the 53 * equivalent values without "w". The Solaris specific function isenglish() 54 * is here, but does not get an isw* equivalent. 55 * 56 * Note that various code assumes that "numbers" (iswdigit, iswxdigit) 57 * only return true for characters in the portable set. While the assumption 58 * is not technically correct, it turns out that for all of our locales this 59 * is true. iswhexnumber is aliased to iswxdigit. 60 */ 61 62 static int 63 __istype_l(locale_t loc, wint_t c, unsigned int f) 64 { 65 unsigned int rt; 66 67 if (c < 0 || c >= _CACHED_RUNES) 68 rt = __runetype(loc->runelocale, c); 69 else 70 rt = loc->runelocale->__runetype[c]; 71 return (rt & f); 72 } 73 74 static int 75 __istype(wint_t c, unsigned int f) 76 { 77 return (__istype_l(uselocale(NULL), c, f)); 78 } 79 80 int 81 iswctype_l(wint_t wc, wctype_t class, locale_t loc) 82 { 83 if (iswascii(wc)) 84 return (__ctype_mask[wc] & class); 85 return (__istype_l(loc, wc, class)); 86 } 87 88 #undef iswctype 89 int 90 iswctype(wint_t wc, wctype_t class) 91 { 92 /* 93 * Note that we don't just call iswctype_l because we optimize for 94 * the iswascii() case, so that most of the time we have no need to 95 * call uselocale(). 96 */ 97 if (iswascii(wc)) 98 return (__ctype_mask[wc] & class); 99 return (__istype(wc, class)); 100 } 101 102 /* 103 * This is a legacy version, baked into binaries. 104 */ 105 #undef _iswctype 106 unsigned 107 _iswctype(wchar_t wc, int class) 108 { 109 if (iswascii(wc)) 110 return (__ctype_mask[wc] & class); 111 return (__istype((wint_t)wc, (unsigned int)class)); 112 } 113 114 #define DEFN_ISWTYPE(type, mask) \ 115 int \ 116 isw##type##_l(wint_t wc, locale_t loc) \ 117 { \ 118 return (iswascii(wc) ? \ 119 (__ctype_mask[wc] & (mask)) : \ 120 __istype_l(loc, wc, mask)); \ 121 } \ 122 \ 123 int \ 124 isw##type(wint_t wc) \ 125 { \ 126 return (iswascii(wc) ? \ 127 (__ctype_mask[wc] & (mask)) : \ 128 __istype(wc, mask)); \ 129 } 130 131 /* kill off any macros */ 132 #undef iswalnum 133 #undef iswalpha 134 #undef iswblank 135 136 DEFN_ISWTYPE(alnum, _CTYPE_A|_CTYPE_D) 137 DEFN_ISWTYPE(alpha, _CTYPE_A) 138 DEFN_ISWTYPE(blank, _CTYPE_B) 139 DEFN_ISWTYPE(cntrl, _CTYPE_C) 140 DEFN_ISWTYPE(digit, _CTYPE_D) 141 DEFN_ISWTYPE(graph, _CTYPE_G) 142 DEFN_ISWTYPE(lower, _CTYPE_L) 143 DEFN_ISWTYPE(upper, _CTYPE_U) 144 DEFN_ISWTYPE(print, _CTYPE_R) 145 DEFN_ISWTYPE(punct, _CTYPE_P) 146 DEFN_ISWTYPE(space, _CTYPE_S) 147 DEFN_ISWTYPE(xdigit, _CTYPE_X) 148 DEFN_ISWTYPE(ideogram, _CTYPE_I) 149 DEFN_ISWTYPE(phonogram, _CTYPE_Q) 150 DEFN_ISWTYPE(special, _CTYPE_T) 151 DEFN_ISWTYPE(number, _CTYPE_N) 152 153 154 #undef iswhexnumber 155 #pragma weak iswhexnumber = iswxdigit 156 #pragma weak iswhexnumber_l = iswxdigit_l 157 158 #undef isideogram 159 #pragma weak isideogram = iswideogram 160 161 #undef isphonogram 162 #pragma weak isphonogram = iswphonogram 163 164 #undef isspecial 165 #pragma weak isspecial = iswspecial 166 167 #undef isnumber 168 #pragma weak isnumber = iswnumber 169 170 /* 171 * FreeBSD has iswrune() for use by external programs, and this is used by 172 * the "tr" program. As that program is part of our consolidation, we 173 * provide an _ILLUMOS_PRIVATE version of this function that we can use. 174 * 175 * No programs that are not part of the illumos stack itself should use 176 * this function -- programs that do reference will not be portable to 177 * other versions of SunOS or Solaris. 178 */ 179 int 180 __iswrune(wint_t wc) 181 { 182 /* 183 * Note, FreeBSD ignored the low order byte, as they encode their 184 * ctype values differently. We can't do that (ctype is baked into 185 * applications), but instead can just check if *any* bit is set in 186 * the ctype. Any bit being set indicates its a valid rune. 187 * 188 * NB: For ASCII all positions except NULL are runes. 189 */ 190 return (wc == 0 ? 0 : iswascii(wc) ? 1 : __istype(wc, 0xffffffffU)); 191 } 192 193 /* 194 * isenglish is a Solaris legacy. No isw* equivalent. Note that this most 195 * likely doesn't work, as the locale data we have doesn't include it. It 196 * specifically is only valid for non-ASCII characters. We're not sure this 197 * is in actual use in the wild. 198 */ 199 #undef isenglish 200 int 201 isenglish(wint_t wc) 202 { 203 return (__istype(wc, _CTYPE_E)); 204 } 205