1*7c478bd9Sstevel@tonic-gate /* 2*7c478bd9Sstevel@tonic-gate * CDDL HEADER START 3*7c478bd9Sstevel@tonic-gate * 4*7c478bd9Sstevel@tonic-gate * The contents of this file are subject to the terms of the 5*7c478bd9Sstevel@tonic-gate * Common Development and Distribution License, Version 1.0 only 6*7c478bd9Sstevel@tonic-gate * (the "License"). You may not use this file except in compliance 7*7c478bd9Sstevel@tonic-gate * with the License. 8*7c478bd9Sstevel@tonic-gate * 9*7c478bd9Sstevel@tonic-gate * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10*7c478bd9Sstevel@tonic-gate * or http://www.opensolaris.org/os/licensing. 11*7c478bd9Sstevel@tonic-gate * See the License for the specific language governing permissions 12*7c478bd9Sstevel@tonic-gate * and limitations under the License. 13*7c478bd9Sstevel@tonic-gate * 14*7c478bd9Sstevel@tonic-gate * When distributing Covered Code, include this CDDL HEADER in each 15*7c478bd9Sstevel@tonic-gate * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16*7c478bd9Sstevel@tonic-gate * If applicable, add the following below this CDDL HEADER, with the 17*7c478bd9Sstevel@tonic-gate * fields enclosed by brackets "[]" replaced with your own identifying 18*7c478bd9Sstevel@tonic-gate * information: Portions Copyright [yyyy] [name of copyright owner] 19*7c478bd9Sstevel@tonic-gate * 20*7c478bd9Sstevel@tonic-gate * CDDL HEADER END 21*7c478bd9Sstevel@tonic-gate */ 22*7c478bd9Sstevel@tonic-gate /* 23*7c478bd9Sstevel@tonic-gate * Copyright (c) 1999 by Sun Microsystems, Inc. 24*7c478bd9Sstevel@tonic-gate * All rights reserved. 25*7c478bd9Sstevel@tonic-gate */ 26*7c478bd9Sstevel@tonic-gate 27*7c478bd9Sstevel@tonic-gate #pragma ident "%Z%%M% %I% %E% SMI" 28*7c478bd9Sstevel@tonic-gate 29*7c478bd9Sstevel@tonic-gate /* 30*7c478bd9Sstevel@tonic-gate * UTF-8 encoded Unicode parsing routines. For efficiency, we convert 31*7c478bd9Sstevel@tonic-gate * to wide chars only when absolutely needed. The following interfaces 32*7c478bd9Sstevel@tonic-gate * are exported to libslp: 33*7c478bd9Sstevel@tonic-gate * 34*7c478bd9Sstevel@tonic-gate * slp_utf_strchr: same semantics as strchr, but handles UTF-8 strings 35*7c478bd9Sstevel@tonic-gate * slp_fold_space: folds white space around and in between works; 36*7c478bd9Sstevel@tonic-gate * handles UTF-8 strings 37*7c478bd9Sstevel@tonic-gate * slp_strcasecmp: same semantics as strcasecmp, but also folds white 38*7c478bd9Sstevel@tonic-gate * space and attempts locale-specific 39*7c478bd9Sstevel@tonic-gate * case-insensitive comparisons. 40*7c478bd9Sstevel@tonic-gate */ 41*7c478bd9Sstevel@tonic-gate 42*7c478bd9Sstevel@tonic-gate #include <stdio.h> 43*7c478bd9Sstevel@tonic-gate #include <string.h> 44*7c478bd9Sstevel@tonic-gate #include <widec.h> 45*7c478bd9Sstevel@tonic-gate #include <stdlib.h> 46*7c478bd9Sstevel@tonic-gate #include <syslog.h> 47*7c478bd9Sstevel@tonic-gate #include <slp-internal.h> 48*7c478bd9Sstevel@tonic-gate 49*7c478bd9Sstevel@tonic-gate /* 50*7c478bd9Sstevel@tonic-gate * Same semantics as strchr. 51*7c478bd9Sstevel@tonic-gate * Assumes that we start on a char boundry, and that c is a 7-bit 52*7c478bd9Sstevel@tonic-gate * ASCII char. 53*7c478bd9Sstevel@tonic-gate */ 54*7c478bd9Sstevel@tonic-gate char *slp_utf_strchr(const char *s, char c) { 55*7c478bd9Sstevel@tonic-gate int len; 56*7c478bd9Sstevel@tonic-gate char *p; 57*7c478bd9Sstevel@tonic-gate 58*7c478bd9Sstevel@tonic-gate for (p = (char *)s; *p; p += len) { 59*7c478bd9Sstevel@tonic-gate len = mblen(p, MB_CUR_MAX); 60*7c478bd9Sstevel@tonic-gate if (len == 1 && *p == c) 61*7c478bd9Sstevel@tonic-gate return (p); 62*7c478bd9Sstevel@tonic-gate } 63*7c478bd9Sstevel@tonic-gate return (NULL); 64*7c478bd9Sstevel@tonic-gate } 65*7c478bd9Sstevel@tonic-gate 66*7c478bd9Sstevel@tonic-gate /* 67*7c478bd9Sstevel@tonic-gate * folds white space around and in between words. 68*7c478bd9Sstevel@tonic-gate * " aa bb " becomes "aa bb". 69*7c478bd9Sstevel@tonic-gate * returns NULL if it couldn't allocate memory. The caller must free 70*7c478bd9Sstevel@tonic-gate * the result when done. 71*7c478bd9Sstevel@tonic-gate */ 72*7c478bd9Sstevel@tonic-gate static char *slp_fold_space(const char *s) { 73*7c478bd9Sstevel@tonic-gate int len; 74*7c478bd9Sstevel@tonic-gate char *folded, *f; 75*7c478bd9Sstevel@tonic-gate 76*7c478bd9Sstevel@tonic-gate if (!(folded = malloc(strlen(s) + 1))) { 77*7c478bd9Sstevel@tonic-gate slp_err(LOG_CRIT, 0, "slp_fold_space", "out of memory"); 78*7c478bd9Sstevel@tonic-gate return (NULL); 79*7c478bd9Sstevel@tonic-gate } 80*7c478bd9Sstevel@tonic-gate 81*7c478bd9Sstevel@tonic-gate f = folded; 82*7c478bd9Sstevel@tonic-gate for (;;) { 83*7c478bd9Sstevel@tonic-gate /* step 1: skip white space */ 84*7c478bd9Sstevel@tonic-gate for (; *s; s++) { 85*7c478bd9Sstevel@tonic-gate len = mblen(s, MB_CUR_MAX); 86*7c478bd9Sstevel@tonic-gate if (len != 1) 87*7c478bd9Sstevel@tonic-gate break; 88*7c478bd9Sstevel@tonic-gate if (!isspace(*s)) 89*7c478bd9Sstevel@tonic-gate break; 90*7c478bd9Sstevel@tonic-gate } 91*7c478bd9Sstevel@tonic-gate 92*7c478bd9Sstevel@tonic-gate if (!*s) { 93*7c478bd9Sstevel@tonic-gate /* end of string */ 94*7c478bd9Sstevel@tonic-gate *f = 0; 95*7c478bd9Sstevel@tonic-gate return (folded); 96*7c478bd9Sstevel@tonic-gate } 97*7c478bd9Sstevel@tonic-gate /* if we are in between words, keep one space */ 98*7c478bd9Sstevel@tonic-gate if (f != folded) 99*7c478bd9Sstevel@tonic-gate *f++ = ' '; 100*7c478bd9Sstevel@tonic-gate 101*7c478bd9Sstevel@tonic-gate /* step 2: copy into folded until we hit more white space */ 102*7c478bd9Sstevel@tonic-gate while (*s) { 103*7c478bd9Sstevel@tonic-gate int i; 104*7c478bd9Sstevel@tonic-gate len = mblen(s, MB_CUR_MAX); 105*7c478bd9Sstevel@tonic-gate if (len == 1 && isspace(*s)) 106*7c478bd9Sstevel@tonic-gate break; 107*7c478bd9Sstevel@tonic-gate 108*7c478bd9Sstevel@tonic-gate for (i = 0; i < len; i++) 109*7c478bd9Sstevel@tonic-gate *f++ = *s++; 110*7c478bd9Sstevel@tonic-gate } 111*7c478bd9Sstevel@tonic-gate *f = *s; 112*7c478bd9Sstevel@tonic-gate if (!*s++) 113*7c478bd9Sstevel@tonic-gate return (folded); 114*7c478bd9Sstevel@tonic-gate } 115*7c478bd9Sstevel@tonic-gate } 116*7c478bd9Sstevel@tonic-gate 117*7c478bd9Sstevel@tonic-gate /* 118*7c478bd9Sstevel@tonic-gate * performs like strcasecmp, but also folds white space before comparing, 119*7c478bd9Sstevel@tonic-gate * and will handle UTF-8 comparisons (including case). Note that the 120*7c478bd9Sstevel@tonic-gate * application's locale must have been set to a UTF-8 locale for this 121*7c478bd9Sstevel@tonic-gate * to work properly. 122*7c478bd9Sstevel@tonic-gate */ 123*7c478bd9Sstevel@tonic-gate int slp_strcasecmp(const char *s1, const char *s2) { 124*7c478bd9Sstevel@tonic-gate int diff = -1; 125*7c478bd9Sstevel@tonic-gate char *p1, *p2; 126*7c478bd9Sstevel@tonic-gate size_t wcslen1, wcslen2; 127*7c478bd9Sstevel@tonic-gate wchar_t *wcs1, *wcs2; 128*7c478bd9Sstevel@tonic-gate 129*7c478bd9Sstevel@tonic-gate p1 = p2 = NULL; wcs1 = wcs2 = NULL; 130*7c478bd9Sstevel@tonic-gate 131*7c478bd9Sstevel@tonic-gate /* optimization: try simple case first */ 132*7c478bd9Sstevel@tonic-gate if (strcasecmp(s1, s2) == 0) 133*7c478bd9Sstevel@tonic-gate return (0); 134*7c478bd9Sstevel@tonic-gate 135*7c478bd9Sstevel@tonic-gate /* fold white space, and try again */ 136*7c478bd9Sstevel@tonic-gate p1 = slp_fold_space(s1); 137*7c478bd9Sstevel@tonic-gate p2 = slp_fold_space(s2); 138*7c478bd9Sstevel@tonic-gate if (!p1 || !p2) 139*7c478bd9Sstevel@tonic-gate goto cleanup; 140*7c478bd9Sstevel@tonic-gate 141*7c478bd9Sstevel@tonic-gate if ((diff = strcasecmp(p1, p2)) == 0) 142*7c478bd9Sstevel@tonic-gate goto cleanup; 143*7c478bd9Sstevel@tonic-gate 144*7c478bd9Sstevel@tonic-gate /* 145*7c478bd9Sstevel@tonic-gate * try converting to wide char -- we must be in a locale which 146*7c478bd9Sstevel@tonic-gate * supports the UTF8 codeset for this to work. 147*7c478bd9Sstevel@tonic-gate */ 148*7c478bd9Sstevel@tonic-gate if ((wcslen1 = mbstowcs(NULL, p1, 0)) == (size_t)-1) 149*7c478bd9Sstevel@tonic-gate goto cleanup; 150*7c478bd9Sstevel@tonic-gate 151*7c478bd9Sstevel@tonic-gate if (!(wcs1 = malloc(sizeof (*wcs1) * (wcslen1 + 1)))) { 152*7c478bd9Sstevel@tonic-gate slp_err(LOG_CRIT, 0, "slp_strcasecmp", "out of memory"); 153*7c478bd9Sstevel@tonic-gate goto cleanup; 154*7c478bd9Sstevel@tonic-gate } 155*7c478bd9Sstevel@tonic-gate 156*7c478bd9Sstevel@tonic-gate if ((wcslen2 = mbstowcs(NULL, p2, 0)) == (size_t)-1) 157*7c478bd9Sstevel@tonic-gate goto cleanup; 158*7c478bd9Sstevel@tonic-gate 159*7c478bd9Sstevel@tonic-gate if (!(wcs2 = malloc(sizeof (*wcs2) * (wcslen2 + 1)))) { 160*7c478bd9Sstevel@tonic-gate slp_err(LOG_CRIT, 0, "slp_strcasecmp", "out of memory"); 161*7c478bd9Sstevel@tonic-gate goto cleanup; 162*7c478bd9Sstevel@tonic-gate } 163*7c478bd9Sstevel@tonic-gate if (mbstowcs(wcs1, p1, wcslen1 + 1) == (size_t)-1) 164*7c478bd9Sstevel@tonic-gate goto cleanup; 165*7c478bd9Sstevel@tonic-gate if (mbstowcs(wcs2, p2, wcslen2 + 1) == (size_t)-1) 166*7c478bd9Sstevel@tonic-gate goto cleanup; 167*7c478bd9Sstevel@tonic-gate 168*7c478bd9Sstevel@tonic-gate diff = wscasecmp(wcs1, wcs2); 169*7c478bd9Sstevel@tonic-gate 170*7c478bd9Sstevel@tonic-gate cleanup: 171*7c478bd9Sstevel@tonic-gate if (p1) free(p1); 172*7c478bd9Sstevel@tonic-gate if (p2) free(p2); 173*7c478bd9Sstevel@tonic-gate if (wcs1) free(wcs1); 174*7c478bd9Sstevel@tonic-gate if (wcs2) free(wcs2); 175*7c478bd9Sstevel@tonic-gate return (diff); 176*7c478bd9Sstevel@tonic-gate } 177