1 /*
2 * Copyright 2014 Garrett D'Amore <garrett@damore.org>
3 * Copyright 2010 Nexenta Systems, Inc. All rights reserved.
4 * Copyright (c) 1989, 1993
5 * The Regents of the University of California. All rights reserved.
6 * (c) UNIX System Laboratories, Inc.
7 * All or some portions of this file are derived from material licensed
8 * to the University of California by American Telephone and Telegraph
9 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
10 * the permission of UNIX System Laboratories, Inc.
11 *
12 * This code is derived from software contributed to Berkeley by
13 * Paul Borman at Krystal Technologies.
14 *
15 * Redistribution and use in source and binary forms, with or without
16 * modification, are permitted provided that the following conditions
17 * are met:
18 * 1. Redistributions of source code must retain the above copyright
19 * notice, this list of conditions and the following disclaimer.
20 * 2. Redistributions in binary form must reproduce the above copyright
21 * notice, this list of conditions and the following disclaimer in the
22 * documentation and/or other materials provided with the distribution.
23 * 4. Neither the name of the University nor the names of its contributors
24 * may be used to endorse or promote products derived from this software
25 * without specific prior written permission.
26 *
27 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
28 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
29 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
30 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
31 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
32 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
33 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
34 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
35 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
36 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
37 * SUCH DAMAGE.
38 */
39
40 #include "lint.h"
41 #include <wctype.h>
42 #include <locale.h>
43 #include "runefile.h"
44 #include "runetype.h"
45 #include "localeimpl.h"
46 #include "_ctype.h"
47
48 /*
49 * Note that the standard requires iswascii to be a macro, so it is defined
50 * in our headers.
51 *
52 * We aliased (per Solaris) iswideogram, iswspecial, iswspecial to the
53 * equivalent values without "w". The Solaris specific function isenglish()
54 * is here, but does not get an isw* equivalent.
55 *
56 * Note that various code assumes that "numbers" (iswdigit, iswxdigit)
57 * only return true for characters in the portable set. While the assumption
58 * is not technically correct, it turns out that for all of our locales this
59 * is true. iswhexnumber is aliased to iswxdigit.
60 */
61
62 static int
__istype_l(locale_t loc,wint_t c,unsigned int f)63 __istype_l(locale_t loc, wint_t c, unsigned int f)
64 {
65 unsigned int rt;
66
67 if (c < 0 || c >= _CACHED_RUNES)
68 rt = __runetype(loc->runelocale, c);
69 else
70 rt = loc->runelocale->__runetype[c];
71 return (rt & f);
72 }
73
74 static int
__istype(wint_t c,unsigned int f)75 __istype(wint_t c, unsigned int f)
76 {
77 return (__istype_l(uselocale(NULL), c, f));
78 }
79
80 int
iswctype_l(wint_t wc,wctype_t class,locale_t loc)81 iswctype_l(wint_t wc, wctype_t class, locale_t loc)
82 {
83 if (iswascii(wc))
84 return (__ctype_mask[wc] & class);
85 return (__istype_l(loc, wc, class));
86 }
87
88 #undef iswctype
89 int
iswctype(wint_t wc,wctype_t class)90 iswctype(wint_t wc, wctype_t class)
91 {
92 /*
93 * Note that we don't just call iswctype_l because we optimize for
94 * the iswascii() case, so that most of the time we have no need to
95 * call uselocale().
96 */
97 if (iswascii(wc))
98 return (__ctype_mask[wc] & class);
99 return (__istype(wc, class));
100 }
101
102 /*
103 * This is a legacy version, baked into binaries.
104 */
105 #undef _iswctype
106 unsigned
_iswctype(wchar_t wc,int class)107 _iswctype(wchar_t wc, int class)
108 {
109 if (iswascii(wc))
110 return (__ctype_mask[wc] & class);
111 return (__istype((wint_t)wc, (unsigned int)class));
112 }
113
114 #define DEFN_ISWTYPE(type, mask) \
115 int \
116 isw##type##_l(wint_t wc, locale_t loc) \
117 { \
118 return (iswascii(wc) ? \
119 (__ctype_mask[wc] & (mask)) : \
120 __istype_l(loc, wc, mask)); \
121 } \
122 \
123 int \
124 isw##type(wint_t wc) \
125 { \
126 return (iswascii(wc) ? \
127 (__ctype_mask[wc] & (mask)) : \
128 __istype(wc, mask)); \
129 }
130
131 /* kill off any macros */
132 #undef iswalnum
133 #undef iswalpha
134 #undef iswblank
135
136 DEFN_ISWTYPE(alnum, _CTYPE_A|_CTYPE_D)
DEFN_ISWTYPE(alpha,_CTYPE_A)137 DEFN_ISWTYPE(alpha, _CTYPE_A)
138 DEFN_ISWTYPE(blank, _CTYPE_B)
139 DEFN_ISWTYPE(cntrl, _CTYPE_C)
140 DEFN_ISWTYPE(digit, _CTYPE_D)
141 DEFN_ISWTYPE(graph, _CTYPE_G)
142 DEFN_ISWTYPE(lower, _CTYPE_L)
143 DEFN_ISWTYPE(upper, _CTYPE_U)
144 DEFN_ISWTYPE(print, _CTYPE_R)
145 DEFN_ISWTYPE(punct, _CTYPE_P)
146 DEFN_ISWTYPE(space, _CTYPE_S)
147 DEFN_ISWTYPE(xdigit, _CTYPE_X)
148 DEFN_ISWTYPE(ideogram, _CTYPE_I)
149 DEFN_ISWTYPE(phonogram, _CTYPE_Q)
150 DEFN_ISWTYPE(special, _CTYPE_T)
151 DEFN_ISWTYPE(number, _CTYPE_N)
152
153
154 #undef iswhexnumber
155 #pragma weak iswhexnumber = iswxdigit
156 #pragma weak iswhexnumber_l = iswxdigit_l
157
158 #undef isideogram
159 #pragma weak isideogram = iswideogram
160
161 #undef isphonogram
162 #pragma weak isphonogram = iswphonogram
163
164 #undef isspecial
165 #pragma weak isspecial = iswspecial
166
167 #undef isnumber
168 #pragma weak isnumber = iswnumber
169
170 /*
171 * FreeBSD has iswrune() for use by external programs, and this is used by
172 * the "tr" program. As that program is part of our consolidation, we
173 * provide an _ILLUMOS_PRIVATE version of this function that we can use.
174 *
175 * No programs that are not part of the illumos stack itself should use
176 * this function -- programs that do reference will not be portable to
177 * other versions of SunOS or Solaris.
178 */
179 int
180 __iswrune(wint_t wc)
181 {
182 /*
183 * Note, FreeBSD ignored the low order byte, as they encode their
184 * ctype values differently. We can't do that (ctype is baked into
185 * applications), but instead can just check if *any* bit is set in
186 * the ctype. Any bit being set indicates its a valid rune.
187 *
188 * NB: For ASCII all positions except NULL are runes.
189 */
190 return (wc == 0 ? 0 : iswascii(wc) ? 1 : __istype(wc, 0xffffffffU));
191 }
192
193 /*
194 * isenglish is a Solaris legacy. No isw* equivalent. Note that this most
195 * likely doesn't work, as the locale data we have doesn't include it. It
196 * specifically is only valid for non-ASCII characters. We're not sure this
197 * is in actual use in the wild.
198 */
199 #undef isenglish
200 int
isenglish(wint_t wc)201 isenglish(wint_t wc)
202 {
203 return (__istype(wc, _CTYPE_E));
204 }
205