1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License, Version 1.0 only
6 * (the "License"). You may not use this file except in compliance
7 * with the License.
8 *
9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10 * or http://www.opensolaris.org/os/licensing.
11 * See the License for the specific language governing permissions
12 * and limitations under the License.
13 *
14 * When distributing Covered Code, include this CDDL HEADER in each
15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16 * If applicable, add the following below this CDDL HEADER, with the
17 * fields enclosed by brackets "[]" replaced with your own identifying
18 * information: Portions Copyright [yyyy] [name of copyright owner]
19 *
20 * CDDL HEADER END
21 */
22 /*
23 * Copyright (c) 1999 by Sun Microsystems, Inc.
24 * All rights reserved.
25 */
26
27 /*
28 * UTF-8 encoded Unicode parsing routines. For efficiency, we convert
29 * to wide chars only when absolutely needed. The following interfaces
30 * are exported to libslp:
31 *
32 * slp_utf_strchr: same semantics as strchr, but handles UTF-8 strings
33 * slp_fold_space: folds white space around and in between works;
34 * handles UTF-8 strings
35 * slp_strcasecmp: same semantics as strcasecmp, but also folds white
36 * space and attempts locale-specific
37 * case-insensitive comparisons.
38 */
39
40 #include <stdio.h>
41 #include <string.h>
42 #include <widec.h>
43 #include <stdlib.h>
44 #include <syslog.h>
45 #include <slp-internal.h>
46
47 /*
48 * Same semantics as strchr.
49 * Assumes that we start on a char boundry, and that c is a 7-bit
50 * ASCII char.
51 */
slp_utf_strchr(const char * s,char c)52 char *slp_utf_strchr(const char *s, char c) {
53 int len;
54 char *p;
55
56 for (p = (char *)s; *p; p += len) {
57 len = mblen(p, MB_CUR_MAX);
58 if (len == 1 && *p == c)
59 return (p);
60 }
61 return (NULL);
62 }
63
64 /*
65 * folds white space around and in between words.
66 * " aa bb " becomes "aa bb".
67 * returns NULL if it couldn't allocate memory. The caller must free
68 * the result when done.
69 */
slp_fold_space(const char * s)70 static char *slp_fold_space(const char *s) {
71 int len;
72 char *folded, *f;
73
74 if (!(folded = malloc(strlen(s) + 1))) {
75 slp_err(LOG_CRIT, 0, "slp_fold_space", "out of memory");
76 return (NULL);
77 }
78
79 f = folded;
80 for (;;) {
81 /* step 1: skip white space */
82 for (; *s; s++) {
83 len = mblen(s, MB_CUR_MAX);
84 if (len != 1)
85 break;
86 if (!isspace(*s))
87 break;
88 }
89
90 if (!*s) {
91 /* end of string */
92 *f = 0;
93 return (folded);
94 }
95 /* if we are in between words, keep one space */
96 if (f != folded)
97 *f++ = ' ';
98
99 /* step 2: copy into folded until we hit more white space */
100 while (*s) {
101 int i;
102 len = mblen(s, MB_CUR_MAX);
103 if (len == 1 && isspace(*s))
104 break;
105
106 for (i = 0; i < len; i++)
107 *f++ = *s++;
108 }
109 *f = *s;
110 if (!*s++)
111 return (folded);
112 }
113 }
114
115 /*
116 * performs like strcasecmp, but also folds white space before comparing,
117 * and will handle UTF-8 comparisons (including case). Note that the
118 * application's locale must have been set to a UTF-8 locale for this
119 * to work properly.
120 */
slp_strcasecmp(const char * s1,const char * s2)121 int slp_strcasecmp(const char *s1, const char *s2) {
122 int diff = -1;
123 char *p1, *p2;
124 size_t wcslen1, wcslen2;
125 wchar_t *wcs1, *wcs2;
126
127 p1 = p2 = NULL; wcs1 = wcs2 = NULL;
128
129 /* optimization: try simple case first */
130 if (strcasecmp(s1, s2) == 0)
131 return (0);
132
133 /* fold white space, and try again */
134 p1 = slp_fold_space(s1);
135 p2 = slp_fold_space(s2);
136 if (!p1 || !p2)
137 goto cleanup;
138
139 if ((diff = strcasecmp(p1, p2)) == 0)
140 goto cleanup;
141
142 /*
143 * try converting to wide char -- we must be in a locale which
144 * supports the UTF8 codeset for this to work.
145 */
146 if ((wcslen1 = mbstowcs(NULL, p1, 0)) == (size_t)-1)
147 goto cleanup;
148
149 if (!(wcs1 = malloc(sizeof (*wcs1) * (wcslen1 + 1)))) {
150 slp_err(LOG_CRIT, 0, "slp_strcasecmp", "out of memory");
151 goto cleanup;
152 }
153
154 if ((wcslen2 = mbstowcs(NULL, p2, 0)) == (size_t)-1)
155 goto cleanup;
156
157 if (!(wcs2 = malloc(sizeof (*wcs2) * (wcslen2 + 1)))) {
158 slp_err(LOG_CRIT, 0, "slp_strcasecmp", "out of memory");
159 goto cleanup;
160 }
161 if (mbstowcs(wcs1, p1, wcslen1 + 1) == (size_t)-1)
162 goto cleanup;
163 if (mbstowcs(wcs2, p2, wcslen2 + 1) == (size_t)-1)
164 goto cleanup;
165
166 diff = wscasecmp(wcs1, wcs2);
167
168 cleanup:
169 if (p1) free(p1);
170 if (p2) free(p2);
171 if (wcs1) free(wcs1);
172 if (wcs2) free(wcs2);
173 return (diff);
174 }
175