1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License, Version 1.0 only 6 * (the "License"). You may not use this file except in compliance 7 * with the License. 8 * 9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10 * or http://www.opensolaris.org/os/licensing. 11 * See the License for the specific language governing permissions 12 * and limitations under the License. 13 * 14 * When distributing Covered Code, include this CDDL HEADER in each 15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16 * If applicable, add the following below this CDDL HEADER, with the 17 * fields enclosed by brackets "[]" replaced with your own identifying 18 * information: Portions Copyright [yyyy] [name of copyright owner] 19 * 20 * CDDL HEADER END 21 */ 22 /* 23 * Copyright (c) 1999 by Sun Microsystems, Inc. 24 * All rights reserved. 25 */ 26 27 /* 28 * UTF-8 encoded Unicode parsing routines. For efficiency, we convert 29 * to wide chars only when absolutely needed. The following interfaces 30 * are exported to libslp: 31 * 32 * slp_utf_strchr: same semantics as strchr, but handles UTF-8 strings 33 * slp_fold_space: folds white space around and in between works; 34 * handles UTF-8 strings 35 * slp_strcasecmp: same semantics as strcasecmp, but also folds white 36 * space and attempts locale-specific 37 * case-insensitive comparisons. 38 */ 39 40 #include <stdio.h> 41 #include <string.h> 42 #include <widec.h> 43 #include <stdlib.h> 44 #include <syslog.h> 45 #include <slp-internal.h> 46 47 /* 48 * Same semantics as strchr. 49 * Assumes that we start on a char boundry, and that c is a 7-bit 50 * ASCII char. 51 */ 52 char *slp_utf_strchr(const char *s, char c) { 53 int len; 54 char *p; 55 56 for (p = (char *)s; *p; p += len) { 57 len = mblen(p, MB_CUR_MAX); 58 if (len == 1 && *p == c) 59 return (p); 60 } 61 return (NULL); 62 } 63 64 /* 65 * folds white space around and in between words. 66 * " aa bb " becomes "aa bb". 67 * returns NULL if it couldn't allocate memory. The caller must free 68 * the result when done. 69 */ 70 static char *slp_fold_space(const char *s) { 71 int len; 72 char *folded, *f; 73 74 if (!(folded = malloc(strlen(s) + 1))) { 75 slp_err(LOG_CRIT, 0, "slp_fold_space", "out of memory"); 76 return (NULL); 77 } 78 79 f = folded; 80 for (;;) { 81 /* step 1: skip white space */ 82 for (; *s; s++) { 83 len = mblen(s, MB_CUR_MAX); 84 if (len != 1) 85 break; 86 if (!isspace(*s)) 87 break; 88 } 89 90 if (!*s) { 91 /* end of string */ 92 *f = 0; 93 return (folded); 94 } 95 /* if we are in between words, keep one space */ 96 if (f != folded) 97 *f++ = ' '; 98 99 /* step 2: copy into folded until we hit more white space */ 100 while (*s) { 101 int i; 102 len = mblen(s, MB_CUR_MAX); 103 if (len == 1 && isspace(*s)) 104 break; 105 106 for (i = 0; i < len; i++) 107 *f++ = *s++; 108 } 109 *f = *s; 110 if (!*s++) 111 return (folded); 112 } 113 } 114 115 /* 116 * performs like strcasecmp, but also folds white space before comparing, 117 * and will handle UTF-8 comparisons (including case). Note that the 118 * application's locale must have been set to a UTF-8 locale for this 119 * to work properly. 120 */ 121 int slp_strcasecmp(const char *s1, const char *s2) { 122 int diff = -1; 123 char *p1, *p2; 124 size_t wcslen1, wcslen2; 125 wchar_t *wcs1, *wcs2; 126 127 p1 = p2 = NULL; wcs1 = wcs2 = NULL; 128 129 /* optimization: try simple case first */ 130 if (strcasecmp(s1, s2) == 0) 131 return (0); 132 133 /* fold white space, and try again */ 134 p1 = slp_fold_space(s1); 135 p2 = slp_fold_space(s2); 136 if (!p1 || !p2) 137 goto cleanup; 138 139 if ((diff = strcasecmp(p1, p2)) == 0) 140 goto cleanup; 141 142 /* 143 * try converting to wide char -- we must be in a locale which 144 * supports the UTF8 codeset for this to work. 145 */ 146 if ((wcslen1 = mbstowcs(NULL, p1, 0)) == (size_t)-1) 147 goto cleanup; 148 149 if (!(wcs1 = malloc(sizeof (*wcs1) * (wcslen1 + 1)))) { 150 slp_err(LOG_CRIT, 0, "slp_strcasecmp", "out of memory"); 151 goto cleanup; 152 } 153 154 if ((wcslen2 = mbstowcs(NULL, p2, 0)) == (size_t)-1) 155 goto cleanup; 156 157 if (!(wcs2 = malloc(sizeof (*wcs2) * (wcslen2 + 1)))) { 158 slp_err(LOG_CRIT, 0, "slp_strcasecmp", "out of memory"); 159 goto cleanup; 160 } 161 if (mbstowcs(wcs1, p1, wcslen1 + 1) == (size_t)-1) 162 goto cleanup; 163 if (mbstowcs(wcs2, p2, wcslen2 + 1) == (size_t)-1) 164 goto cleanup; 165 166 diff = wscasecmp(wcs1, wcs2); 167 168 cleanup: 169 if (p1) free(p1); 170 if (p2) free(p2); 171 if (wcs1) free(wcs1); 172 if (wcs2) free(wcs2); 173 return (diff); 174 } 175