1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License, Version 1.0 only 6 * (the "License"). You may not use this file except in compliance 7 * with the License. 8 * 9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10 * or http://www.opensolaris.org/os/licensing. 11 * See the License for the specific language governing permissions 12 * and limitations under the License. 13 * 14 * When distributing Covered Code, include this CDDL HEADER in each 15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16 * If applicable, add the following below this CDDL HEADER, with the 17 * fields enclosed by brackets "[]" replaced with your own identifying 18 * information: Portions Copyright [yyyy] [name of copyright owner] 19 * 20 * CDDL HEADER END 21 */ 22 /* 23 * Copyright (c) 1999 by Sun Microsystems, Inc. 24 * All rights reserved. 25 */ 26 27 #pragma ident "%Z%%M% %I% %E% SMI" 28 29 /* 30 * UTF-8 encoded Unicode parsing routines. For efficiency, we convert 31 * to wide chars only when absolutely needed. The following interfaces 32 * are exported to libslp: 33 * 34 * slp_utf_strchr: same semantics as strchr, but handles UTF-8 strings 35 * slp_fold_space: folds white space around and in between works; 36 * handles UTF-8 strings 37 * slp_strcasecmp: same semantics as strcasecmp, but also folds white 38 * space and attempts locale-specific 39 * case-insensitive comparisons. 40 */ 41 42 #include <stdio.h> 43 #include <string.h> 44 #include <widec.h> 45 #include <stdlib.h> 46 #include <syslog.h> 47 #include <slp-internal.h> 48 49 /* 50 * Same semantics as strchr. 51 * Assumes that we start on a char boundry, and that c is a 7-bit 52 * ASCII char. 53 */ 54 char *slp_utf_strchr(const char *s, char c) { 55 int len; 56 char *p; 57 58 for (p = (char *)s; *p; p += len) { 59 len = mblen(p, MB_CUR_MAX); 60 if (len == 1 && *p == c) 61 return (p); 62 } 63 return (NULL); 64 } 65 66 /* 67 * folds white space around and in between words. 68 * " aa bb " becomes "aa bb". 69 * returns NULL if it couldn't allocate memory. The caller must free 70 * the result when done. 71 */ 72 static char *slp_fold_space(const char *s) { 73 int len; 74 char *folded, *f; 75 76 if (!(folded = malloc(strlen(s) + 1))) { 77 slp_err(LOG_CRIT, 0, "slp_fold_space", "out of memory"); 78 return (NULL); 79 } 80 81 f = folded; 82 for (;;) { 83 /* step 1: skip white space */ 84 for (; *s; s++) { 85 len = mblen(s, MB_CUR_MAX); 86 if (len != 1) 87 break; 88 if (!isspace(*s)) 89 break; 90 } 91 92 if (!*s) { 93 /* end of string */ 94 *f = 0; 95 return (folded); 96 } 97 /* if we are in between words, keep one space */ 98 if (f != folded) 99 *f++ = ' '; 100 101 /* step 2: copy into folded until we hit more white space */ 102 while (*s) { 103 int i; 104 len = mblen(s, MB_CUR_MAX); 105 if (len == 1 && isspace(*s)) 106 break; 107 108 for (i = 0; i < len; i++) 109 *f++ = *s++; 110 } 111 *f = *s; 112 if (!*s++) 113 return (folded); 114 } 115 } 116 117 /* 118 * performs like strcasecmp, but also folds white space before comparing, 119 * and will handle UTF-8 comparisons (including case). Note that the 120 * application's locale must have been set to a UTF-8 locale for this 121 * to work properly. 122 */ 123 int slp_strcasecmp(const char *s1, const char *s2) { 124 int diff = -1; 125 char *p1, *p2; 126 size_t wcslen1, wcslen2; 127 wchar_t *wcs1, *wcs2; 128 129 p1 = p2 = NULL; wcs1 = wcs2 = NULL; 130 131 /* optimization: try simple case first */ 132 if (strcasecmp(s1, s2) == 0) 133 return (0); 134 135 /* fold white space, and try again */ 136 p1 = slp_fold_space(s1); 137 p2 = slp_fold_space(s2); 138 if (!p1 || !p2) 139 goto cleanup; 140 141 if ((diff = strcasecmp(p1, p2)) == 0) 142 goto cleanup; 143 144 /* 145 * try converting to wide char -- we must be in a locale which 146 * supports the UTF8 codeset for this to work. 147 */ 148 if ((wcslen1 = mbstowcs(NULL, p1, 0)) == (size_t)-1) 149 goto cleanup; 150 151 if (!(wcs1 = malloc(sizeof (*wcs1) * (wcslen1 + 1)))) { 152 slp_err(LOG_CRIT, 0, "slp_strcasecmp", "out of memory"); 153 goto cleanup; 154 } 155 156 if ((wcslen2 = mbstowcs(NULL, p2, 0)) == (size_t)-1) 157 goto cleanup; 158 159 if (!(wcs2 = malloc(sizeof (*wcs2) * (wcslen2 + 1)))) { 160 slp_err(LOG_CRIT, 0, "slp_strcasecmp", "out of memory"); 161 goto cleanup; 162 } 163 if (mbstowcs(wcs1, p1, wcslen1 + 1) == (size_t)-1) 164 goto cleanup; 165 if (mbstowcs(wcs2, p2, wcslen2 + 1) == (size_t)-1) 166 goto cleanup; 167 168 diff = wscasecmp(wcs1, wcs2); 169 170 cleanup: 171 if (p1) free(p1); 172 if (p2) free(p2); 173 if (wcs1) free(wcs1); 174 if (wcs2) free(wcs2); 175 return (diff); 176 } 177